<!doctype html><html lang="en">
 <head>
  <meta content="text/html; charset=utf-8" http-equiv="Content-Type">
  <meta content="width=device-width, initial-scale=1, shrink-to-fit=no" name="viewport">
  <title>P1729R1: Text Parsing</title>
<style data-fill-with="stylesheet">/******************************************************************************
 *                   Style sheet for the W3C specifications                   *
 *
 * Special classes handled by this style sheet include:
 *
 * Indices
 *   - .toc for the Table of Contents (<ol class="toc">)
 *     + <span class="secno"> for the section numbers
 *   - #toc for the Table of Contents (<nav id="toc">)
 *   - ul.index for Indices (<a href="#ref">term</a><span>, in §N.M</span>)
 *   - table.index for Index Tables (e.g. for properties or elements)
 *
 * Structural Markup
 *   - table.data for general data tables
 *     -> use 'scope' attribute, <colgroup>, <thead>, and <tbody> for best results !
 *     -> use <table class='complex data'> for extra-complex tables
 *     -> use <td class='long'> for paragraph-length cell content
 *     -> use <td class='pre'> when manual line breaks/indentation would help readability
 *   - dl.switch for switch statements
 *   - ol.algorithm for algorithms (helps to visualize nesting)
 *   - .figure and .caption (HTML4) and figure and figcaption (HTML5)
 *     -> .sidefigure for right-floated figures
 *   - ins/del
 *
 * Code
 *   - pre and code
 *
 * Special Sections
 *   - .note       for informative notes             (div, p, span, aside, details)
 *   - .example    for informative examples          (div, p, pre, span)
 *   - .issue      for issues                        (div, p, span)
 *   - .assertion  for assertions                    (div, p, span)
 *   - .advisement for loud normative statements     (div, p, strong)
 *   - .annoying-warning for spec obsoletion notices (div, aside, details)
 *
 * Definition Boxes
 *   - pre.def   for WebIDL definitions
 *   - table.def for tables that define other entities (e.g. CSS properties)
 *   - dl.def    for definition lists that define other entitles (e.g. HTML elements)
 *
 * Numbering
 *   - .secno for section numbers in .toc and headings (<span class='secno'>3.2</span>)
 *   - .marker for source-inserted example/figure/issue numbers (<span class='marker'>Issue 4</span>)
 *   - ::before styled for CSS-generated issue/example/figure numbers:
 *     -> Documents wishing to use this only need to add
 *        figcaption::before,
 *        .caption::before { content: "Figure "  counter(figure) " ";  }
 *        .example::before { content: "Example " counter(example) " "; }
 *        .issue::before   { content: "Issue "   counter(issue) " ";   }
 *
 * Header Stuff (ignore, just don't conflict with these classes)
 *   - .head for the header
 *   - .copyright for the copyright
 *
 * Miscellaneous
 *   - .overlarge for things that should be as wide as possible, even if
 *     that overflows the body text area. This can be used on an item or
 *     on its container, depending on the effect desired.
 *     Note that this styling basically doesn't help at all when printing,
 *     since A4 paper isn't much wider than the max-width here.
 *     It's better to design things to fit into a narrower measure if possible.
 *   - js-added ToC jump links (see fixup.js)
 *
 ******************************************************************************/

/******************************************************************************/
/*                                   Body                                     */
/******************************************************************************/

	body {
		counter-reset: example figure issue;

		/* Layout */
		max-width: 50em;               /* limit line length to 50em for readability   */
		margin: 0 auto;                /* center text within page                     */
		padding: 1.6em 1.5em 2em 50px; /* assume 16px font size for downlevel clients */
		padding: 1.6em 1.5em 2em calc(26px + 1.5em); /* leave space for status flag     */

		/* Typography */
		line-height: 1.5;
		font-family: sans-serif;
		widows: 2;
		orphans: 2;
		word-wrap: break-word;
		overflow-wrap: break-word;
		hyphens: auto;

		/* Colors */
		color: black;
		background: white top left fixed no-repeat;
		background-size: 25px auto;
	}


/******************************************************************************/
/*                         Front Matter & Navigation                          */
/******************************************************************************/

/** Header ********************************************************************/

	div.head { margin-bottom: 1em }
	div.head hr { border-style: solid; }

	div.head h1 {
		font-weight: bold;
		margin: 0 0 .1em;
		font-size: 220%;
	}

	div.head h2 { margin-bottom: 1.5em;}

/** W3C Logo ******************************************************************/

	.head .logo {
		float: right;
		margin: 0.4rem 0 0.2rem .4rem;
	}

	.head img[src*="logos/W3C"] {
		display: block;
		border: solid #1a5e9a;
		border-width: .65rem .7rem .6rem;
		border-radius: .4rem;
		background: #1a5e9a;
		color: white;
		font-weight: bold;
	}

	.head a:hover > img[src*="logos/W3C"],
	.head a:focus > img[src*="logos/W3C"] {
		opacity: .8;
	}

	.head a:active > img[src*="logos/W3C"] {
		background: #c00;
		border-color: #c00;
	}

	/* see also additional rules in Link Styling section */

/** Copyright *****************************************************************/

	p.copyright,
	p.copyright small { font-size: small }

/** Back to Top / ToC Toggle **************************************************/

	@media print {
		#toc-nav {
			display: none;
		}
	}
	@media not print {
		#toc-nav {
			position: fixed;
			z-index: 2;
			bottom: 0; left: 0;
			margin: 0;
			min-width: 1.33em;
			border-top-right-radius: 2rem;
			box-shadow: 0 0 2px;
			font-size: 1.5em;
			color: black;
		}
		#toc-nav > a {
			display: block;
			white-space: nowrap;

			height: 1.33em;
			padding: .1em 0.3em;
			margin: 0;

			background: white;
			box-shadow: 0 0 2px;
			border: none;
			border-top-right-radius: 1.33em;
			background: white;
		}
		#toc-nav > #toc-jump {
			padding-bottom: 2em;
			margin-bottom: -1.9em;
		}

		#toc-nav > a:hover,
		#toc-nav > a:focus {
			background: #f8f8f8;
		}
		#toc-nav > a:not(:hover):not(:focus) {
			color: #707070;
		}

		/* statusbar gets in the way on keyboard focus; remove once browsers fix */
		#toc-nav > a[href="#toc"]:not(:hover):focus:last-child {
			padding-bottom: 1.5rem;
		}

		#toc-nav:not(:hover) > a:not(:focus) > span + span {
			/* Ideally this uses :focus-within on #toc-nav */
			display: none;
		}
		#toc-nav > a > span + span {
			padding-right: 0.2em;
		}

		#toc-toggle-inline {
			vertical-align: 0.05em;
			font-size: 80%;
			color: gray;
			color: hsla(203,20%,40%,.7);
			border-style: none;
			background: transparent;
			position: relative;
		}
		#toc-toggle-inline:hover:not(:active),
		#toc-toggle-inline:focus:not(:active) {
			text-shadow: 1px 1px silver;
			top: -1px;
			left: -1px;
		}

		#toc-nav :active {
			color: #C00;
		}
	}

/** ToC Sidebar ***************************************************************/

	/* Floating sidebar */
	@media screen {
		body.toc-sidebar #toc {
			position: fixed;
			top: 0; bottom: 0;
			left: 0;
			width: 23.5em;
			max-width: 80%;
			max-width: calc(100% - 2em - 26px);
			overflow: auto;
			padding: 0 1em;
			padding-left: 42px;
			padding-left: calc(1em + 26px);
			background: inherit;
			background-color: #f7f8f9;
			z-index: 1;
			box-shadow: -.1em 0 .25em rgba(0,0,0,.1) inset;
		}
		body.toc-sidebar #toc h2 {
			margin-top: .8rem;
			font-variant: small-caps;
			font-variant: all-small-caps;
			text-transform: lowercase;
			font-weight: bold;
			color: gray;
			color: hsla(203,20%,40%,.7);
		}
		body.toc-sidebar #toc-jump:not(:focus) {
			width: 0;
			height: 0;
			padding: 0;
			position: absolute;
			overflow: hidden;
		}
	}
	/* Hide main scroller when only the ToC is visible anyway */
	@media screen and (max-width: 28em) {
		body.toc-sidebar {
			overflow: hidden;
		}
	}

	/* Sidebar with its own space */
	@media screen and (min-width: 78em) {
		body:not(.toc-inline) #toc {
			position: fixed;
			top: 0; bottom: 0;
			left: 0;
			width: 23.5em;
			overflow: auto;
			padding: 0 1em;
			padding-left: 42px;
			padding-left: calc(1em + 26px);
			background: inherit;
			background-color: #f7f8f9;
			z-index: 1;
			box-shadow: -.1em 0 .25em rgba(0,0,0,.1) inset;
		}
		body:not(.toc-inline) #toc h2 {
			margin-top: .8rem;
			font-variant: small-caps;
			font-variant: all-small-caps;
			text-transform: lowercase;
			font-weight: bold;
			color: gray;
			color: hsla(203,20%,40%,.7);
		}

		body:not(.toc-inline) {
			padding-left: 29em;
		}
		/* See also Overflow section at the bottom */

		body:not(.toc-inline) #toc-jump:not(:focus) {
			width: 0;
			height: 0;
			padding: 0;
			position: absolute;
			overflow: hidden;
		}
	}
	@media screen and (min-width: 90em) {
		body:not(.toc-inline) {
			margin: 0 4em;
		}
	}

/******************************************************************************/
/*                                Sectioning                                  */
/******************************************************************************/

/** Headings ******************************************************************/

	h1, h2, h3, h4, h5, h6, dt {
		page-break-after: avoid;
		page-break-inside: avoid;
		font: 100% sans-serif;   /* Reset all font styling to clear out UA styles */
		font-family: inherit;    /* Inherit the font family. */
		line-height: 1.2;        /* Keep wrapped headings compact */
		hyphens: manual;         /* Hyphenated headings look weird */
	}

	h2, h3, h4, h5, h6 {
		margin-top: 3rem;
	}

	h1, h2, h3 {
		color: #005A9C;
		background: transparent;
	}

	h1 { font-size: 170%; }
	h2 { font-size: 140%; }
	h3 { font-size: 120%; }
	h4 { font-weight: bold; }
	h5 { font-style: italic; }
	h6 { font-variant: small-caps; }
	dt { font-weight: bold; }

/** Subheadings ***************************************************************/

	h1 + h2,
	#subtitle {
		/* #subtitle is a subtitle in an H2 under the H1 */
		margin-top: 0;
	}
	h2 + h3,
	h3 + h4,
	h4 + h5,
	h5 + h6 {
		margin-top: 1.2em; /* = 1 x line-height */
	}

/** Section divider ***********************************************************/

	:not(.head) > hr {
		font-size: 1.5em;
		text-align: center;
		margin: 1em auto;
		height: auto;
		border: transparent solid 0;
		background: transparent;
	}
	:not(.head) > hr::before {
		content: "\2727\2003\2003\2727\2003\2003\2727";
	}

/******************************************************************************/
/*                            Paragraphs and Lists                            */
/******************************************************************************/

	p {
		margin: 1em 0;
	}

	dd > p:first-child,
	li > p:first-child {
		margin-top: 0;
	}

	ul, ol {
		margin-left: 0;
		padding-left: 2em;
	}

	li {
		margin: 0.25em 0 0.5em;
		padding: 0;
	}

	dl dd {
		margin: 0 0 .5em 2em;
	}

	.head dd + dd { /* compact for header */
		margin-top: -.5em;
	}

	/* Style for algorithms */
	ol.algorithm ol:not(.algorithm),
	.algorithm > ol ol:not(.algorithm) {
	 border-left: 0.5em solid #DEF;
	}

	/* Put nice boxes around each algorithm. */
	[data-algorithm]:not(.heading) {
	  padding: .5em;
	  border: thin solid #ddd; border-radius: .5em;
	  margin: .5em calc(-0.5em - 1px);
	}
	[data-algorithm]:not(.heading) > :first-child {
	  margin-top: 0;
	}
	[data-algorithm]:not(.heading) > :last-child {
	  margin-bottom: 0;
	}

	/* Style for switch/case <dl>s */
	dl.switch > dd > ol.only,
	dl.switch > dd > .only > ol {
	 margin-left: 0;
	}
	dl.switch > dd > ol.algorithm,
	dl.switch > dd > .algorithm > ol {
	 margin-left: -2em;
	}
	dl.switch {
	 padding-left: 2em;
	}
	dl.switch > dt {
	 text-indent: -1.5em;
	 margin-top: 1em;
	}
	dl.switch > dt + dt {
	 margin-top: 0;
	}
	dl.switch > dt::before {
	 content: '\21AA';
	 padding: 0 0.5em 0 0;
	 display: inline-block;
	 width: 1em;
	 text-align: right;
	 line-height: 0.5em;
	}

/** Terminology Markup ********************************************************/


/******************************************************************************/
/*                                 Inline Markup                              */
/******************************************************************************/

/** Terminology Markup ********************************************************/
	dfn   { /* Defining instance */
		font-weight: bolder;
	}
	a > i { /* Instance of term */
		font-style: normal;
	}
	dt dfn code, code.idl {
		font-size: medium;
	}
	dfn var {
		font-style: normal;
	}

/** Change Marking ************************************************************/

	del { color: red;  text-decoration: line-through; }
	ins { color: #080; text-decoration: underline;    }

/** Miscellaneous improvements to inline formatting ***************************/

	sup {
		vertical-align: super;
		font-size: 80%
	}

/******************************************************************************/
/*                                    Code                                    */
/******************************************************************************/

/** General monospace/pre rules ***********************************************/

	pre, code, samp {
		font-family: Menlo, Consolas, "DejaVu Sans Mono", Monaco, monospace;
		font-size: .9em;
		page-break-inside: avoid;
		hyphens: none;
		text-transform: none;
	}
	pre code,
	code code {
		font-size: 100%;
	}

	pre {
		margin-top: 1em;
		margin-bottom: 1em;
		overflow: auto;
	}

/** Inline Code fragments *****************************************************/

  /* Do something nice. */

/******************************************************************************/
/*                                    Links                                   */
/******************************************************************************/

/** General Hyperlinks ********************************************************/

	/* We hyperlink a lot, so make it less intrusive */
	a[href] {
		color: #034575;
		text-decoration: none;
		border-bottom: 1px solid #707070;
		/* Need a bit of extending for it to look okay */
		padding: 0 1px 0;
		margin: 0 -1px 0;
	}
	a:visited {
		border-bottom-color: #BBB;
	}

	/* Use distinguishing colors when user is interacting with the link */
	a[href]:focus,
	a[href]:hover {
		background: #f8f8f8;
		background: rgba(75%, 75%, 75%, .25);
		border-bottom-width: 3px;
		margin-bottom: -2px;
	}
	a[href]:active {
		color: #C00;
		border-color: #C00;
	}

	/* Backout above styling for W3C logo */
	.head .logo,
	.head .logo a {
		border: none;
		text-decoration: none;
		background: transparent;
	}

/******************************************************************************/
/*                                    Images                                  */
/******************************************************************************/

	img {
		border-style: none;
	}

	/* For autogen numbers, add
	   .caption::before, figcaption::before { content: "Figure " counter(figure) ". "; }
	*/

	figure, .figure, .sidefigure {
		page-break-inside: avoid;
		text-align: center;
		margin: 2.5em 0;
	}
	.figure img,    .sidefigure img,    figure img,
	.figure object, .sidefigure object, figure object {
		max-width: 100%;
		margin: auto;
	}
	.figure pre, .sidefigure pre, figure pre {
		text-align: left;
		display: table;
		margin: 1em auto;
	}
	.figure table, figure table {
		margin: auto;
	}
	@media screen and (min-width: 20em) {
		.sidefigure {
			float: right;
			width: 50%;
			margin: 0 0 0.5em 0.5em
		}
	}
	.caption, figcaption, caption {
		font-style: italic;
		font-size: 90%;
	}
	.caption::before, figcaption::before, figcaption > .marker {
		font-weight: bold;
	}
	.caption, figcaption {
		counter-increment: figure;
	}

	/* DL list is indented 2em, but figure inside it is not */
	dd > .figure, dd > figure { margin-left: -2em }

/******************************************************************************/
/*                             Colored Boxes                                  */
/******************************************************************************/

	.issue, .note, .example, .assertion, .advisement, blockquote {
		padding: .5em;
		border: .5em;
		border-left-style: solid;
		page-break-inside: avoid;
	}
	span.issue, span.note {
		padding: .1em .5em .15em;
		border-right-style: solid;
	}

	.issue,
	.note,
	.example,
	.advisement,
	.assertion,
	blockquote {
		margin: 1em auto;
	}
	.note  > p:first-child,
	.issue > p:first-child,
	blockquote > :first-child {
		margin-top: 0;
	}
	blockquote > :last-child {
		margin-bottom: 0;
	}

/** Blockquotes ***************************************************************/

	blockquote {
		border-color: silver;
	}

/** Open issue ****************************************************************/

	.issue {
		border-color: #E05252;
		background: #FBE9E9;
		counter-increment: issue;
		overflow: auto;
	}
	.issue::before, .issue > .marker {
		text-transform: uppercase;
		color: #AE1E1E;
		padding-right: 1em;
		text-transform: uppercase;
	}
	/* Add .issue::before { content: "Issue " counter(issue) " "; } for autogen numbers,
	   or use class="marker" to mark up the issue number in source. */

/** Example *******************************************************************/

	.example {
		border-color: #E0CB52;
		background: #FCFAEE;
		counter-increment: example;
		overflow: auto;
		clear: both;
	}
	.example::before, .example > .marker {
		text-transform: uppercase;
		color: #827017;
		min-width: 7.5em;
		display: block;
	}
	/* Add .example::before { content: "Example " counter(example) " "; } for autogen numbers,
	   or use class="marker" to mark up the example number in source. */

/** Non-normative Note ********************************************************/

	.note {
		border-color: #52E052;
		background: #E9FBE9;
		overflow: auto;
	}

	.note::before, .note > .marker,
	details.note > summary::before,
	details.note > summary > .marker {
		text-transform: uppercase;
		display: block;
		color: hsl(120, 70%, 30%);
	}
	/* Add .note::before { content: "Note"; } for autogen label,
	   or use class="marker" to mark up the label in source. */

	details.note > summary {
		display: block;
		color: hsl(120, 70%, 30%);
	}
	details.note[open] > summary {
		border-bottom: 1px silver solid;
	}

/** Assertion Box *************************************************************/
	/*  for assertions in algorithms */

	.assertion {
		border-color: #AAA;
		background: #EEE;
	}

/** Advisement Box ************************************************************/
	/*  for attention-grabbing normative statements */

	.advisement {
		border-color: orange;
		border-style: none solid;
		background: #FFEECC;
	}
	strong.advisement {
		display: block;
		text-align: center;
	}
	.advisement > .marker {
		color: #B35F00;
	}

/** Spec Obsoletion Notice ****************************************************/
	/* obnoxious obsoletion notice for older/abandoned specs. */

	details {
		display: block;
	}
	summary {
		font-weight: bolder;
	}

	.annoying-warning:not(details),
	details.annoying-warning:not([open]) > summary,
	details.annoying-warning[open] {
		background: #fdd;
		color: red;
		font-weight: bold;
		padding: .75em 1em;
		border: thick red;
		border-style: solid;
		border-radius: 1em;
	}
	.annoying-warning :last-child {
		margin-bottom: 0;
	}

@media not print {
	details.annoying-warning[open] {
		position: fixed;
		left: 1em;
		right: 1em;
		bottom: 1em;
		z-index: 1000;
	}
}

	details.annoying-warning:not([open]) > summary {
		text-align: center;
	}

/** Entity Definition Boxes ***************************************************/

	.def {
		padding: .5em 1em;
		background: #DEF;
		margin: 1.2em 0;
		border-left: 0.5em solid #8CCBF2;
	}

/******************************************************************************/
/*                                    Tables                                  */
/******************************************************************************/

	th, td {
		text-align: left;
		text-align: start;
	}

/** Property/Descriptor Definition Tables *************************************/

	table.def {
		/* inherits .def box styling, see above */
		width: 100%;
		border-spacing: 0;
	}

	table.def td,
	table.def th {
		padding: 0.5em;
		vertical-align: baseline;
		border-bottom: 1px solid #bbd7e9;
	}

	table.def > tbody > tr:last-child th,
	table.def > tbody > tr:last-child td {
		border-bottom: 0;
	}

	table.def th {
		font-style: italic;
		font-weight: normal;
		padding-left: 1em;
		width: 3em;
	}

	/* For when values are extra-complex and need formatting for readability */
	table td.pre {
		white-space: pre-wrap;
	}

	/* A footnote at the bottom of a def table */
	table.def           td.footnote {
		padding-top: 0.6em;
	}
	table.def           td.footnote::before {
		content: " ";
		display: block;
		height: 0.6em;
		width: 4em;
		border-top: thin solid;
	}

/** Data tables (and properly marked-up index tables) *************************/
	/*
		 <table class="data"> highlights structural relationships in a table
		 when correct markup is used (e.g. thead/tbody, th vs. td, scope attribute)

		 Use class="complex data" for particularly complicated tables --
		 (This will draw more lines: busier, but clearer.)

		 Use class="long" on table cells with paragraph-like contents
		 (This will adjust text alignment accordingly.)
		 Alternately use class="longlastcol" on tables, to have the last column assume "long".
	*/

	table {
		word-wrap: normal;
		overflow-wrap: normal;
		hyphens: manual;
	}

	table.data,
	table.index {
		margin: 1em auto;
		border-collapse: collapse;
		border: hidden;
		width: 100%;
	}
	table.data caption,
	table.index caption {
		max-width: 50em;
		margin: 0 auto 1em;
	}

	table.data td,  table.data th,
	table.index td, table.index th {
		padding: 0.5em 1em;
		border-width: 1px;
		border-color: silver;
		border-top-style: solid;
	}

	table.data thead td:empty {
		padding: 0;
		border: 0;
	}

	table.data  thead,
	table.index thead,
	table.data  tbody,
	table.index tbody {
		border-bottom: 2px solid;
	}

	table.data colgroup,
	table.index colgroup {
		border-left: 2px solid;
	}

	table.data  tbody th:first-child,
	table.index tbody th:first-child  {
		border-right: 2px solid;
		border-top: 1px solid silver;
		padding-right: 1em;
	}

	table.data th[colspan],
	table.data td[colspan] {
		text-align: center;
	}

	table.complex.data th,
	table.complex.data td {
		border: 1px solid silver;
		text-align: center;
	}

	table.data.longlastcol td:last-child,
	table.data td.long {
	 vertical-align: baseline;
	 text-align: left;
	}

	table.data img {
		vertical-align: middle;
	}


/*
Alternate table alignment rules

	table.data,
	table.index {
		text-align: center;
	}

	table.data  thead th[scope="row"],
	table.index thead th[scope="row"] {
		text-align: right;
	}

	table.data  tbody th:first-child,
	table.index tbody th:first-child  {
		text-align: right;
	}

Possible extra rowspan handling

	table.data  tbody th[rowspan]:not([rowspan='1']),
	table.index tbody th[rowspan]:not([rowspan='1']),
	table.data  tbody td[rowspan]:not([rowspan='1']),
	table.index tbody td[rowspan]:not([rowspan='1']) {
		border-left: 1px solid silver;
	}

	table.data  tbody th[rowspan]:first-child,
	table.index tbody th[rowspan]:first-child,
	table.data  tbody td[rowspan]:first-child,
	table.index tbody td[rowspan]:first-child{
		border-left: 0;
		border-right: 1px solid silver;
	}
*/

/******************************************************************************/
/*                                  Indices                                   */
/******************************************************************************/


/** Table of Contents *********************************************************/

	.toc a {
		/* More spacing; use padding to make it part of the click target. */
		padding-top: 0.1rem;
		/* Larger, more consistently-sized click target */
		display: block;
		/* Reverse color scheme */
		color: black;
		border-color: #3980B5;
		border-bottom-width: 3px !important;
		margin-bottom: 0px !important;
	}
	.toc a:visited {
		border-color: #054572;
	}
	.toc a:not(:focus):not(:hover) {
		/* Allow colors to cascade through from link styling */
		border-bottom-color: transparent;
	}

	.toc, .toc ol, .toc ul, .toc li {
		list-style: none; /* Numbers must be inlined into source */
		/* because generated content isn't search/selectable and markers can't do multilevel yet */
		margin:  0;
		padding: 0;
		line-height: 1.1rem; /* consistent spacing */
	}

	/* ToC not indented until third level, but font style & margins show hierarchy */
	.toc > li             { font-weight: bold;   }
	.toc > li li          { font-weight: normal; }
	.toc > li li li       { font-size:   95%;    }
	.toc > li li li li    { font-size:   90%;    }
	.toc > li li li li .secno { font-size: 85%; }
	.toc > li li li li li { font-size:   85%;    }
	.toc > li li li li li .secno { font-size: 100%; }

	/* @supports not (display:grid) { */
		.toc > li             { margin: 1.5rem 0;    }
		.toc > li li          { margin: 0.3rem 0;    }
		.toc > li li li       { margin-left: 2rem;   }

		/* Section numbers in a column of their own */
		.toc .secno {
			float: left;
			width: 4rem;
			white-space: nowrap;
		}

		.toc li {
			clear: both;
		}

		:not(li) > .toc              { margin-left:  5rem; }
		.toc .secno                  { margin-left: -5rem; }
		.toc > li li li .secno       { margin-left: -7rem; }
		.toc > li li li li .secno    { margin-left: -9rem; }
		.toc > li li li li li .secno { margin-left: -11rem; }

		/* Tighten up indentation in narrow ToCs */
		@media (max-width: 30em) {
			:not(li) > .toc              { margin-left:  4rem; }
			.toc .secno                  { margin-left: -4rem; }
			.toc > li li li              { margin-left:  1rem; }
			.toc > li li li .secno       { margin-left: -5rem; }
			.toc > li li li li .secno    { margin-left: -6rem; }
			.toc > li li li li li .secno { margin-left: -7rem; }
		}
	/* } */

	@supports (display:grid) {
		/* Use #toc over .toc to override non-@supports rules. */
		#toc {
			display: grid;
			align-content: start;
			grid-template-columns: auto 1fr;
			grid-column-gap: 1rem;
			column-gap: 1rem;
			grid-row-gap: .6rem;
			row-gap: .6rem;
		}
		#toc h2 {
			grid-column: 1 / -1;
			margin-bottom: 0;
		}
		#toc ol,
		#toc li,
		#toc a {
			display: contents;
			/* Switch <a> to subgrid when supported */
		}
		#toc span {
			margin: 0;
		}
		#toc > .toc > li > a > span {
			/* The spans of the top-level list,
			   comprising the first items of each top-level section. */
			margin-top: 1.1rem;
		}
		#toc#toc .secno { /* Ugh, need more specificity to override base.css */
			grid-column: 1;
			width: auto;
			margin-left: 0;
		}
		#toc .content {
			grid-column: 2;
			width: auto;
			margin-right: 1rem;
		}
		#toc .content:hover {
			background: rgba(75%, 75%, 75%, .25);
			border-bottom: 3px solid #054572;
			margin-bottom: -3px;
		}
		#toc li li li .content {
			margin-left: 1rem;
		}
		#toc li li li li .content {
			margin-left: 2rem;
		}
	}


/** Index *********************************************************************/

	/* Index Lists: Layout */
	ul.index       { margin-left: 0; columns: 15em; text-indent: 1em hanging; }
	ul.index li    { margin-left: 0; list-style: none; break-inside: avoid; }
	ul.index li li { margin-left: 1em }
	ul.index dl    { margin-top: 0; }
	ul.index dt    { margin: .2em 0 .2em 20px;}
	ul.index dd    { margin: .2em 0 .2em 40px;}
	/* Index Lists: Typography */
	ul.index ul,
	ul.index dl { font-size: smaller; }
	@media not print {
		ul.index li span {
			white-space: nowrap;
			color: transparent; }
		ul.index li a:hover + span,
		ul.index li a:focus + span {
			color: #707070;
		}
	}

/** Index Tables *****************************************************/
	/* See also the data table styling section, which this effectively subclasses */

	table.index {
		font-size: small;
		border-collapse: collapse;
		border-spacing: 0;
		text-align: left;
		margin: 1em 0;
	}

	table.index td,
	table.index th {
		padding: 0.4em;
	}

	table.index tr:hover td:not([rowspan]),
	table.index tr:hover th:not([rowspan]) {
		background: #f7f8f9;
	}

	/* The link in the first column in the property table (formerly a TD) */
	table.index th:first-child a {
		font-weight: bold;
	}

/******************************************************************************/
/*                                    Print                                   */
/******************************************************************************/

	@media print {
		/* Pages have their own margins. */
		html {
			margin: 0;
		}
		/* Serif for print. */
		body {
			font-family: serif;
		}
	}
	@page {
		margin: 1.5cm 1.1cm;
	}

/******************************************************************************/
/*                                    Legacy                                  */
/******************************************************************************/

	/* This rule is inherited from past style sheets. No idea what it's for. */
	.hide { display: none }



/******************************************************************************/
/*                             Overflow Control                               */
/******************************************************************************/

	.figure .caption, .sidefigure .caption, figcaption {
		/* in case figure is overlarge, limit caption to 50em */
		max-width: 50rem;
		margin-left: auto;
		margin-right: auto;
	}
	.overlarge > table {
		/* limit preferred width of table */
		max-width: 50em;
		margin-left: auto;
		margin-right: auto;
	}

	@media (min-width: 55em) {
		.overlarge {
			margin-left: calc(13px + 26.5rem - 50vw);
			margin-right: calc(13px + 26.5rem - 50vw);
			max-width: none;
		}
	}
	@media screen and (min-width: 78em) {
		body:not(.toc-inline) .overlarge {
			/* 30.5em body padding 50em content area */
			margin-left: calc(40em - 50vw) !important;
			margin-right: calc(40em - 50vw) !important;
		}
	}
	@media screen and (min-width: 90em) {
		body:not(.toc-inline) .overlarge {
			/* 4em html margin 30.5em body padding 50em content area */
			margin-left: 0 !important;
			margin-right: calc(84.5em - 100vw) !important;
		}
	}

	@media not print {
		.overlarge {
			overflow-x: auto;
			/* See Lea Verou's explanation background-attachment:
			 * http://lea.verou.me/2012/04/background-attachment-local/
			 *
			background: top left  / 4em 100% linear-gradient(to right,  #ffffff, rgba(255, 255, 255, 0)) local,
			            top right / 4em 100% linear-gradient(to left, #ffffff, rgba(255, 255, 255, 0)) local,
			            top left  / 1em 100% linear-gradient(to right,  #c3c3c5, rgba(195, 195, 197, 0)) scroll,
			            top right / 1em 100% linear-gradient(to left, #c3c3c5, rgba(195, 195, 197, 0)) scroll,
			            white;
			background-repeat: no-repeat;
			*/
		}
	}
</style>
<style type="text/css">
    table, th, td {
      border: 1px solid black;
      border-collapse: collapse;
      vertical-align: top;
    }
    th, td {
      border-left: none;
      border-right: none;
      padding: 0px 10px;
    }
    th {
      text-align: center;
    }
  </style>
  <meta content="Bikeshed version 5b139c639ea0012c5ae911f9675200af42cf05e9" name="generator">
  <link href="http://wg21.link/P1729R1" rel="canonical">
  <link href="https://isocpp.org/favicon.ico" rel="icon">
  <meta content="f2a9ab7f46604cc5a86c9a42b632173347a9d0d8" name="document-revision">
<style>/* style-md-lists */

/* This is a weird hack for me not yet following the commonmark spec
   regarding paragraph and lists. */
[data-md] > :first-child {
    margin-top: 0;
}
[data-md] > :last-child {
    margin-bottom: 0;
}</style>
<style>/* style-counters */

body {
    counter-reset: example figure issue;
}
.issue {
    counter-increment: issue;
}
.issue:not(.no-marker)::before {
    content: "Issue " counter(issue);
}

.example {
    counter-increment: example;
}
.example:not(.no-marker)::before {
    content: "Example " counter(example);
}
.invalid.example:not(.no-marker)::before,
.illegal.example:not(.no-marker)::before {
    content: "Invalid Example" counter(example);
}

figcaption {
    counter-increment: figure;
}
figcaption:not(.no-marker)::before {
    content: "Figure " counter(figure) " ";
}</style>
<style>/* style-syntax-highlighting */

.highlight:not(.idl) { background: hsl(24, 20%, 95%); }
code.highlight { padding: .1em; border-radius: .3em; }
pre.highlight, pre > code.highlight { display: block; padding: 1em; margin: .5em 0; overflow: auto; border-radius: 0; }
c-[a] { color: #990055 } /* Keyword.Declaration */
c-[b] { color: #990055 } /* Keyword.Type */
c-[c] { color: #708090 } /* Comment */
c-[d] { color: #708090 } /* Comment.Multiline */
c-[e] { color: #0077aa } /* Name.Attribute */
c-[f] { color: #669900 } /* Name.Tag */
c-[g] { color: #222222 } /* Name.Variable */
c-[k] { color: #990055 } /* Keyword */
c-[l] { color: #000000 } /* Literal */
c-[m] { color: #000000 } /* Literal.Number */
c-[n] { color: #0077aa } /* Name */
c-[o] { color: #999999 } /* Operator */
c-[p] { color: #999999 } /* Punctuation */
c-[s] { color: #a67f59 } /* Literal.String */
c-[t] { color: #a67f59 } /* Literal.String.Single */
c-[u] { color: #a67f59 } /* Literal.String.Double */
c-[cp] { color: #708090 } /* Comment.Preproc */
c-[c1] { color: #708090 } /* Comment.Single */
c-[cs] { color: #708090 } /* Comment.Special */
c-[kc] { color: #990055 } /* Keyword.Constant */
c-[kn] { color: #990055 } /* Keyword.Namespace */
c-[kp] { color: #990055 } /* Keyword.Pseudo */
c-[kr] { color: #990055 } /* Keyword.Reserved */
c-[ld] { color: #000000 } /* Literal.Date */
c-[nc] { color: #0077aa } /* Name.Class */
c-[no] { color: #0077aa } /* Name.Constant */
c-[nd] { color: #0077aa } /* Name.Decorator */
c-[ni] { color: #0077aa } /* Name.Entity */
c-[ne] { color: #0077aa } /* Name.Exception */
c-[nf] { color: #0077aa } /* Name.Function */
c-[nl] { color: #0077aa } /* Name.Label */
c-[nn] { color: #0077aa } /* Name.Namespace */
c-[py] { color: #0077aa } /* Name.Property */
c-[ow] { color: #999999 } /* Operator.Word */
c-[mb] { color: #000000 } /* Literal.Number.Bin */
c-[mf] { color: #000000 } /* Literal.Number.Float */
c-[mh] { color: #000000 } /* Literal.Number.Hex */
c-[mi] { color: #000000 } /* Literal.Number.Integer */
c-[mo] { color: #000000 } /* Literal.Number.Oct */
c-[sb] { color: #a67f59 } /* Literal.String.Backtick */
c-[sc] { color: #a67f59 } /* Literal.String.Char */
c-[sd] { color: #a67f59 } /* Literal.String.Doc */
c-[se] { color: #a67f59 } /* Literal.String.Escape */
c-[sh] { color: #a67f59 } /* Literal.String.Heredoc */
c-[si] { color: #a67f59 } /* Literal.String.Interpol */
c-[sx] { color: #a67f59 } /* Literal.String.Other */
c-[sr] { color: #a67f59 } /* Literal.String.Regex */
c-[ss] { color: #a67f59 } /* Literal.String.Symbol */
c-[vc] { color: #0077aa } /* Name.Variable.Class */
c-[vg] { color: #0077aa } /* Name.Variable.Global */
c-[vi] { color: #0077aa } /* Name.Variable.Instance */
c-[il] { color: #000000 } /* Literal.Number.Integer.Long */
</style>
<style>/* style-selflinks */

.heading, .issue, .note, .example, li, dt {
    position: relative;
}
a.self-link {
    position: absolute;
    top: 0;
    left: calc(-1 * (3.5rem - 26px));
    width: calc(3.5rem - 26px);
    height: 2em;
    text-align: center;
    border: none;
    transition: opacity .2s;
    opacity: .5;
}
a.self-link:hover {
    opacity: 1;
}
.heading > a.self-link {
    font-size: 83%;
}
li > a.self-link {
    left: calc(-1 * (3.5rem - 26px) - 2em);
}
dfn > a.self-link {
    top: auto;
    left: auto;
    opacity: 0;
    width: 1.5em;
    height: 1.5em;
    background: gray;
    color: white;
    font-style: normal;
    transition: opacity .2s, background-color .2s, color .2s;
}
dfn:hover > a.self-link {
    opacity: 1;
}
dfn > a.self-link:hover {
    color: black;
}

a.self-link::before            { content: "¶"; }
.heading > a.self-link::before { content: "§"; }
dfn > a.self-link::before      { content: "#"; }</style>
<style>/* style-autolinks */

.css.css, .property.property, .descriptor.descriptor {
    color: #005a9c;
    font-size: inherit;
    font-family: inherit;
}
.css::before, .property::before, .descriptor::before {
    content: "‘";
}
.css::after, .property::after, .descriptor::after {
    content: "’";
}
.property, .descriptor {
    /* Don't wrap property and descriptor names */
    white-space: nowrap;
}
.type { /* CSS value <type> */
    font-style: italic;
}
pre .property::before, pre .property::after {
    content: "";
}
[data-link-type="property"]::before,
[data-link-type="propdesc"]::before,
[data-link-type="descriptor"]::before,
[data-link-type="value"]::before,
[data-link-type="function"]::before,
[data-link-type="at-rule"]::before,
[data-link-type="selector"]::before,
[data-link-type="maybe"]::before {
    content: "‘";
}
[data-link-type="property"]::after,
[data-link-type="propdesc"]::after,
[data-link-type="descriptor"]::after,
[data-link-type="value"]::after,
[data-link-type="function"]::after,
[data-link-type="at-rule"]::after,
[data-link-type="selector"]::after,
[data-link-type="maybe"]::after {
    content: "’";
}

[data-link-type].production::before,
[data-link-type].production::after,
.prod [data-link-type]::before,
.prod [data-link-type]::after {
    content: "";
}

[data-link-type=element],
[data-link-type=element-attr] {
    font-family: Menlo, Consolas, "DejaVu Sans Mono", monospace;
    font-size: .9em;
}
[data-link-type=element]::before { content: "<" }
[data-link-type=element]::after  { content: ">" }

[data-link-type=biblio] {
    white-space: pre;
}</style>
 <body class="h-entry">
  <div class="head">
   <p data-fill-with="logo"></p>
   <h1 class="p-name no-ref" id="title">P1729R1<br>Text Parsing</h1>
   <h2 class="no-num no-toc no-ref heading settled" id="subtitle"><span class="content">Published Proposal, <time class="dt-updated" datetime="2019-10-06">2019-10-06</time></span></h2>
   <div data-fill-with="spec-metadata">
    <dl>
     <dt>This version:
     <dd><a class="u-url" href="http://wg21.link/P1729R1">http://wg21.link/P1729R1</a>
     <dt>Authors:
     <dd>
      <dd class="editor p-author h-card vcard"><a class="p-name fn u-email email" href="mailto:viz@fmt.dev">Victor Zverovich</a>
     <dd>
      <dd class="editor p-author h-card vcard"><a class="p-name fn u-email email" href="mailto:isocpp@eliaskosunen.com">Elias Kosunen</a>
     <dt>Project:
     <dd>ISO/IEC JTC1/SC22/WG21 14882: Programming Language — C++
     <dt>Audience:
     <dd>LEWGI
    </dl>
   </div>
   <div data-fill-with="warning"></div>
   <hr title="Separator for header">
  </div>
  <div class="p-summary" data-fill-with="abstract">
   <h2 class="no-num no-toc no-ref heading settled" id="abstract"><span class="content">Abstract</span></h2>
   <p>This paper discusses a new text parsing facility to complement the text

  formatting functionality of <a data-link-type="biblio" href="#biblio-p0645">[P0645]</a>.</p>
  </div>
  <nav data-fill-with="table-of-contents" id="toc">
   <h2 class="no-num no-toc no-ref" id="contents">Table of Contents</h2>
   <ol class="toc" role="directory">
    <li><a href="#introduction"><span class="secno">1</span> <span class="content">Introduction</span></a>
    <li>
     <a href="#design"><span class="secno">2</span> <span class="content">Design</span></a>
     <ol class="toc">
      <li><a href="#format-strings"><span class="secno">2.1</span> <span class="content">Format strings</span></a>
      <li><a href="#safety"><span class="secno">2.2</span> <span class="content">Safety</span></a>
      <li><a href="#extensibility"><span class="secno">2.3</span> <span class="content">Extensibility</span></a>
      <li><a href="#ranges"><span class="secno">2.4</span> <span class="content">Iterator and range support</span></a>
      <li><a href="#locales"><span class="secno">2.5</span> <span class="content">Locales</span></a>
      <li><a href="#performance"><span class="secno">2.6</span> <span class="content">Performance</span></a>
      <li><a href="#binary-footprint"><span class="secno">2.7</span> <span class="content">Binary footprint</span></a>
      <li><a href="#chrono"><span class="secno">2.8</span> <span class="content">Integration with chrono</span></a>
      <li><a href="#deep-impact"><span class="secno">2.9</span> <span class="content">Impact on existing code</span></a>
     </ol>
    <li>
     <a href="#design-questions"><span class="secno">3</span> <span class="content">Open design questions</span></a>
     <ol class="toc">
      <li><a href="#error-handling"><span class="secno">3.1</span> <span class="content">Error handling and partial successes</span></a>
      <li><a href="#tuple-out"><span class="secno">3.2</span> <span class="content">Returning a <code class="highlight"><c- n>tuple</c-></code> vs. output parameters</span></a>
      <li><a href="#naming"><span class="secno">3.3</span> <span class="content">Naming</span></a>
     </ol>
    <li><a href="#existing-work"><span class="secno">4</span> <span class="content">Existing work</span></a>
    <li>
     <a href="#references"><span class="secno"></span> <span class="content">References</span></a>
     <ol class="toc">
      <li><a href="#informative"><span class="secno"></span> <span class="content">Informative References</span></a>
     </ol>
   </ol>
  </nav>
  <main>
   <h2 class="heading settled" data-level="1" id="introduction"><span class="secno">1. </span><span class="content">Introduction</span><a class="self-link" href="#introduction"></a></h2>
   <p><a data-link-type="biblio" href="#biblio-p0645">[P0645]</a> has proposed a text formatting facility that provides a safe and
extensible alternative to the <code class="highlight"><c- n>printf</c-></code> family of functions. This paper explores
the possibility of adding a symmetric parsing facility which is based on the
same design principles and shares many features with <a data-link-type="biblio" href="#biblio-p0645">[P0645]</a>, namely</p>
   <ul>
    <li data-md>
     <p><a href="#safety">Safety</a></p>
    <li data-md>
     <p><a href="#extensibility">Extensibility</a></p>
    <li data-md>
     <p><a href="#performance">Performance</a></p>
    <li data-md>
     <p><a href="#locales">Locale control</a></p>
    <li data-md>
     <p><a href="#binary-footprint">Small binary footprint</a></p>
    <li data-md>
     <p><a href="#chrono">Integration with chrono</a></p>
   </ul>
   <p>According to <a data-link-type="biblio" href="#biblio-codesearch">[CODESEARCH]</a>, a C and C++ codesearch engine based on the ACTCD19
dataset, there are 389,848 calls to <code class="highlight"><c- n>sprintf</c-></code> and 87,815 calls to <code class="highlight"><c- n>sscanf</c-></code> at
the time of writing. So although formatted input functions are less popular than
their output counterparts, they are still widely used.</p>
   <p>Lack of a general-purpose parsing facility based on format strings has been
raised in <a data-link-type="biblio" href="#biblio-p1361">[P1361]</a> in the context of formatting and parsing of dates and times.</p>
   <p>Although having a symmetric parsing facility seems beneficial, not all languages
provide it out-of-the-box. For example, Python doesn’t have a <code class="highlight"><c- n>scanf</c-></code> equivalent
in the standard library but there is a separate <code class="highlight"><c- n>parse</c-></code> package (<a data-link-type="biblio" href="#biblio-parse">[PARSE]</a>).</p>
   <p><strong>Example</strong>:</p>
<pre class="language-c++ highlight"><c- n>std</c-><c- o>::</c-><c- n>string</c-> <c- n>key</c-><c- p>;</c->
<c- b>int</c-> <c- n>value</c-><c- p>;</c->
<c- n>std</c-><c- o>::</c-><c- n>scan</c-><c- p>(</c-><c- s>"answer = 42"</c-><c- p>,</c-> <c- s>"{} = {}"</c-><c- p>,</c-> <c- n>key</c-><c- p>,</c-> <c- n>value</c-><c- p>);</c->
<c- c1>//        ~~~~~~~~~~~~~  ~~~~~~~~~  ~~~~~~~~~~</c->
<c- c1>//            input        format    arguments</c->
<c- c1>//</c->
<c- c1>// Result: key == "answer", value == 42</c->
</pre>
   <h2 class="heading settled" data-level="2" id="design"><span class="secno">2. </span><span class="content">Design</span><a class="self-link" href="#design"></a></h2>
   <p>The new parsing facility is intended to complement the existing C++ I/O streams
library, integrate well with the chrono library, and provide an API similar to <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>format</c-></code>. This section discusses major features of its design.</p>
   <h3 class="heading settled" data-level="2.1" id="format-strings"><span class="secno">2.1. </span><span class="content">Format strings</span><a class="self-link" href="#format-strings"></a></h3>
   <p>As with <code class="highlight"><c- n>printf</c-></code>, the <code class="highlight"><c- n>scanf</c-></code> syntax has the advantage of being familiar to many
programmers. However, it has similar limitations:</p>
   <ul>
    <li data-md>
     <p>Many format specifiers like <code class="highlight"><c- n>hh</c-></code>, <code class="highlight"><c- n>h</c-></code>, <code class="highlight"><c- n>l</c-></code>, <code class="highlight"><c- n>j</c-></code>, etc. are used only to
convey type information. They are redundant in type-safe parsing and would
unnecessarily complicate specification and parsing.</p>
    <li data-md>
     <p>There is no standard way to extend the syntax for user-defined types.</p>
    <li data-md>
     <p>Using <code class="highlight"><c- sc>'%'</c-></code> in a custom format specifier poses difficulties, e.g. for <code class="highlight"><c- n>get_time</c-></code>-like time parsing.</p>
   </ul>
   <p>Therefore we propose a syntax based on <a data-link-type="biblio" href="#biblio-parse">[PARSE]</a> and <a data-link-type="biblio" href="#biblio-p0645">[P0645]</a>. This syntax
employs <code class="highlight"><c- sc>'{'</c-></code> and <code class="highlight"><c- sc>'}'</c-></code> as replacement field delimiters instead of <code class="highlight"><c- sc>'%'</c-></code>. It
will provide the following advantages:</p>
   <ul>
    <li data-md>
     <p>An easy to parse mini-language focused on the data format rather than
conveying the type information</p>
    <li data-md>
     <p>Extensibility for user-defined types</p>
    <li data-md>
     <p>Positional arguments</p>
    <li data-md>
     <p>Support for both locale-specific and locale-independent parsing (see <a href="#locales">§2.5 Locales</a>)</p>
    <li data-md>
     <p>Consistency with <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>format</c-></code> proposed by <a data-link-type="biblio" href="#biblio-p0645">[P0645]</a>.</p>
   </ul>
   <p>At the same time most of the specifiers will remain the same as in <code class="highlight"><c- n>scanf</c-></code> which
can simplify, possibly automated, migration.</p>
   <h3 class="heading settled" data-level="2.2" id="safety"><span class="secno">2.2. </span><span class="content">Safety</span><a class="self-link" href="#safety"></a></h3>
   <p><code class="highlight"><c- n>scanf</c-></code> is arguably more unsafe than <code class="highlight"><c- n>printf</c-></code> because <code class="highlight"><c- n>__attribute__</c-><c- p>((</c-><c- n>format</c-><c- p>(</c-><c- n>scanf</c-><c- p>,</c-> <c- p>...)))</c-></code> (<a data-link-type="biblio" href="#biblio-attr">[ATTR]</a>) implemented by GCC and Clang
doesn’t catch the whole class of buffer overflow bugs, e.g.</p>
<pre class="language-c++ highlight"><c- b>char</c-> <c- n>s</c-><c- p>[</c-><c- mi>10</c-><c- p>];</c->
<c- n>std</c-><c- o>::</c-><c- n>sscanf</c-><c- p>(</c-><c- n>input</c-><c- p>,</c-> <c- s>"%s"</c-><c- p>,</c-> <c- n>s</c-><c- p>);</c-> <c- c1>// s may overflow.</c->
</pre>
   <p>Specifying the maximum length in the format string above solves the issue but is
error-prone especially since one has to account for the terminating null.</p>
   <p>Unlike <code class="highlight"><c- n>scanf</c-></code>, the proposed facility relies on variadic templates instead of
the mechanism provided by <code class="highlight"><c- o>&lt;</c-><c- n>cstdarg</c-><c- o>></c-></code>. The type information is captured
automatically and passed to scanners guaranteeing type safety and making many of
the <code class="highlight"><c- n>scanf</c-></code> specifiers redundant (see <a href="#format-strings">§2.1 Format strings</a>). Memory management is
automatic to prevent buffer overflow errors.</p>
   <h3 class="heading settled" data-level="2.3" id="extensibility"><span class="secno">2.3. </span><span class="content">Extensibility</span><a class="self-link" href="#extensibility"></a></h3>
   <p>We propose an extension API for user-defined types similar to the one of <a data-link-type="biblio" href="#biblio-p0645">[P0645]</a>. It separates format string processing and parsing enabling
compile-time format string checks and allows extending the format specification
language for user types.</p>
   <p>The general syntax of a replacement field in a format string is the same as in <a data-link-type="biblio" href="#biblio-p0645">[P0645]</a>:</p>
<pre class="highlight"><c- n>replacement</c-><c- o>-</c-><c- n>field</c-> <c- o>::=</c-> <c- sc>'{'</c-> <c- p>[</c-><c- n>arg</c-><c- o>-</c-><c- n>id</c-><c- p>]</c-> <c- p>[</c-><c- sc>':'</c-> <c- n>format</c-><c- o>-</c-><c- n>spec</c-><c- p>]</c-> <c- sc>'}'</c->
</pre>
   <p>where <code class="highlight"><c- n>format</c-><c- o>-</c-><c- n>spec</c-></code> is predefined for built-in types, but can be customized
for user-defined types. For example, the syntax can be extended for <code class="highlight"><c- n>get_time</c-></code>-like date and time formatting</p>
<pre class="language-c++ highlight"><c- k>auto</c-> <c- n>t</c-> <c- o>=</c-> <c- n>tm</c-><c- p>();</c->
<c- n>scan</c-><c- p>(</c-><c- n>input</c-><c- p>,</c-> <c- s>"Date: {0:%Y-%m-%d}"</c-><c- p>,</c-> <c- n>t</c-><c- p>);</c->
</pre>
   <p>by providing a specialization of <code class="highlight"><c- n>scanner</c-></code> for <code class="highlight"><c- n>tm</c-></code>:</p>
<pre class="language-c++ highlight"><c- n>template</c-> <c- o>&lt;></c->
<c- k>struct</c-> <c- n>scanner</c-><c- o>&lt;</c-><c- n>tm</c-><c- o>></c-> <c- p>{</c->
  <c- n>constexpr</c-> <c- n>scan_parse_context</c-><c- o>::</c-><c- n>iterator</c-> <c- n>parse</c-><c- p>(</c-><c- n>scan_parse_context</c-><c- o>&amp;</c-> <c- n>ctx</c-><c- p>);</c->

  <c- n>template</c-> <c- o>&lt;</c-><c- n>class</c-> <c- n>ScanContext</c-><c- o>></c->
  <c- kr>typename</c-> <c- n>ScanContext</c-><c- o>::</c-><c- n>iterator</c-> <c- n>scan</c-><c- p>(</c-><c- n>tm</c-><c- o>&amp;</c-> <c- n>t</c-><c- p>,</c-> <c- n>ScanContext</c-><c- o>&amp;</c-> <c- n>ctx</c-><c- p>);</c->
<c- p>};</c->
</pre>
   <p>The <code class="highlight"><c- n>scanner</c-><c- o>&lt;</c-><c- n>tm</c-><c- o>>::</c-><c- n>parse</c-></code> function parses the <code class="highlight"><c- n>format</c-><c- o>-</c-><c- n>spec</c-></code> portion of the format
string corresponding to the current argument and <code class="highlight"><c- n>scanner</c-><c- o>&lt;</c-><c- n>tm</c-><c- o>>::</c-><c- n>scan</c-></code> parses the
input range <code class="highlight"><c- p>[</c-><c- n>ctx</c-><c- p>.</c-><c- n>begin</c-><c- p>(),</c-> <c- n>ctx</c-><c- p>.</c-><c- n>end</c-><c- p>())</c-></code> and stores the result in <code class="highlight"><c- n>t</c-></code>.</p>
   <p>An implementation of <code class="highlight"><c- n>scanner</c-><c- o>&lt;</c-><c- n>T</c-><c- o>>::</c-><c- n>scan</c-></code> can potentially use ostream extraction <code class="highlight"><c- k>operator</c-><c- o>>></c-></code> for user-defined type <code class="highlight"><c- n>T</c-></code> if available.</p>
   <h3 class="heading settled" data-level="2.4" id="ranges"><span class="secno">2.4. </span><span class="content">Iterator and range support</span><a class="self-link" href="#ranges"></a></h3>
   <p>Currently, this paper proposes taking a <code class="highlight"><c- n>string_view</c-></code> (and possibly a <code class="highlight"><c- n>wstring_view</c-></code>) as the first parameter to <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>scan</c-></code>. While convenient for the
common case of parsing a value from a string, it’s less so for a lot of other
cases.</p>
   <p>Consider reading an integer from a file. Because this proposed facility doesn’t
concern itself with I/O, it’s not possible without reaching to other standard
APIs, which are not designed for this purpose. There are two conceivable
approaches to this:</p>
   <p><b>Reading the file character-by-character with C stdio:</b></p>
<pre class="language-c++ highlight"><c- k>auto</c-> <c- n>f</c-> <c- o>=</c-> <c- n>std</c-><c- o>::</c-><c- n>fopen</c-><c- p>(...);</c->

<c- n>std</c-><c- o>::</c-><c- n>string</c-> <c- n>buf</c-><c- p>;</c->
<c- k>for</c-> <c- p>(</c-><c- b>int</c-> <c- n>ch</c-> <c- o>=</c-> <c- mi>0</c-><c- p>;</c-> <c- p>(</c-><c- n>ch</c-> <c- o>=</c-> <c- n>std</c-><c- o>::</c-><c- n>fgetc</c-><c- p>(</c-><c- n>f</c-><c- p>))</c-> <c- o>!=</c-> <c- n>EOF</c-><c- p>;)</c-> <c- p>{</c->
  <c- k>if</c-> <c- p>(</c-><c- n>std</c-><c- o>::</c-><c- n>isspace</c-><c- p>(</c-><c- n>ch</c-><c- p>))</c-> <c- k>break</c-><c- p>;</c->
  <c- n>buf</c-><c- p>.</c-><c- n>push_back</c-><c- p>(</c-><c- n>ch</c-><c- p>);</c->
<c- p>}</c->

<c- c1>// buf now contains the input</c->
<c- b>int</c-> <c- n>i</c-><c- p>{};</c->
<c- k>auto</c-> <c- n>ret</c-> <c- o>=</c-> <c- n>std</c-><c- o>::</c-><c- n>scan</c-><c- p>(</c-><c- n>buf</c-><c- p>,</c-> <c- s>"{}"</c-><c- p>,</c-> <c- n>i</c-><c- p>);</c->

<c- c1>// Input has some leftovers and wasn’t exhausted.</c->
<c- c1>// Unused parts need to be put back into the file stream</c->
<c- c1>// This can happen when, for example, the input is "4.2":</c->
<c- c1>// '4' is read into the integer, ".2" is left over, and must be put back.</c->
<c- c1>// This is consistent with iostreams and scanf.</c->
<c- k>if</c-> <c- p>(</c-><c- o>!</c-><c- n>ret</c-><c- p>.</c-><c- n>empty</c-><c- p>())</c-> <c- p>{</c->
  <c- k>for</c-> <c- p>(</c-><c- k>auto</c-> <c- n>it</c-> <c- o>=</c-> <c- n>ret</c-><c- p>.</c-><c- n>rbegin</c-><c- p>();</c-> <c- n>it</c-> <c- o>!=</c-> <c- n>ret</c-><c- p>.</c-><c- n>rend</c-><c- p>();</c-> <c- o>++</c-><c- n>it</c-><c- p>)</c-> <c- p>{</c->
    <c- k>if</c-> <c- p>(</c-><c- n>std</c-><c- o>::</c-><c- n>ungetc</c-><c- p>(</c-><c- o>*</c-><c- n>it</c-><c- p>,</c-> <c- n>f</c-><c- p>)</c-> <c- o>==</c-> <c- n>EOF</c-><c- p>)</c-> <c- p>{</c->
      <c- c1>// Putback failed; file stream unusable</c->
      <c- n>throw</c-> <c- p>...;</c->
    <c- p>}</c->
  <c- p>}</c->
<c- p>}</c->
</pre>
   <p><b>Reading a "word" from the file with <code class="highlight"><c- n>fstream</c-></code>:</b></p>
<pre class="language-c++ highlight"><c- k>auto</c-> <c- n>f</c-> <c- o>=</c-> <c- n>std</c-><c- o>::</c-><c- n>fstream</c-><c- p>(...);</c->

<c- n>std</c-><c- o>::</c-><c- n>string</c-> <c- n>buf</c-><c- p>{};</c->
<c- n>f</c-> <c- o>>></c-> <c- n>buf</c-><c- p>;</c->

<c- b>int</c-> <c- n>i</c-><c- p>{};</c->
<c- k>auto</c-> <c- n>ret</c-> <c- o>=</c-> <c- n>std</c-><c- o>::</c-><c- n>scan</c-><c- p>(</c-><c- n>buf</c-><c- p>,</c-> <c- s>"{}"</c-><c- p>,</c-> <c- n>i</c-><c- p>);</c->

<c- c1>// See above for rationale</c->
<c- k>if</c-> <c- p>(</c-><c- o>!</c-><c- n>ret</c-><c- p>.</c-><c- n>empty</c-><c- p>())</c-> <c- p>{</c->
  <c- k>for</c-> <c- p>(</c-><c- k>auto</c-> <c- n>it</c-> <c- o>=</c-> <c- n>ret</c-><c- p>.</c-><c- n>rbegin</c-><c- p>();</c-> <c- n>it</c-> <c- o>!=</c-> <c- n>ret</c-><c- p>.</c-><c- n>rend</c-><c- p>();</c-> <c- o>++</c-><c- n>it</c-><c- p>)</c-> <c- p>{</c->
    <c- k>if</c-> <c- p>(</c-><c- o>!</c-><c- n>f</c-><c- p>.</c-><c- n>putback</c-><c- p>(</c-><c- o>*</c-><c- n>it</c-><c- p>))</c-> <c- p>{</c->
      <c- c1>// Putback failed; file stream unusable</c->
      <c- n>throw</c-> <c- p>...;</c->
    <c- p>}</c->
  <c- p>}</c->
<c- p>}</c->
</pre>
   <p>Both of these approaches are flawed.
The first one has a significant amount of bookkeeping that’s required of the user,
and so does the second one, although not as much.
Both examples are difficult to use and are easy to get wrong.
In the second example, it’d probably be better to skip the hassle,
and just do <code class="highlight"><c- n>f</c-> <c- o>>></c-> <c- n>my_int</c-></code> directly, making this proposal obsolete.</p>
   <p>One could argue, that solving this problem is out-of-scope for this proposal. <a data-link-type="biblio" href="#biblio-p0645">[P0645]</a> doesn’t deal with I/O, and maybe this paper shouldn’t either.
Comparisons to <a data-link-type="biblio" href="#biblio-p0645">[P0645]</a> in this case may be unwarranted, however.
Scanning without I/O is significantly less powerful than formatting without I/O;
scanning is, inherently, a different process from formatting.</p>
   <p>With formatting, you can just dump all your values into a string.
With scanning, that just isn’t possible, as seen earlier.
The source has to be read lazily, and might grow mid-call.</p>
   <p>The solution to this is to make <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>scan</c-></code> do I/O and take a <code class="highlight"><c- n>range</c-></code>:</p>
<pre class="language-c++ highlight"><c- c1>// exposition only</c->
<c- n>template</c-> <c- o>&lt;</c-><c- kr>typename</c-> <c- n>Range</c-><c- o>></c->
<c- n>concept</c-> <c- n>scan</c-><c- o>-</c-><c- n>range</c-> <c- o>=</c->
    <c- n>std</c-><c- o>::</c-><c- n>ranges</c-><c- o>::</c-><c- n>bidirectional_range</c-><c- o>&lt;</c-><c- n>Range</c-><c- o>></c-> <c- o>&amp;&amp;</c->
    <c- n>std</c-><c- o>::</c-><c- n>ranges</c-><c- o>::</c-><c- n>view</c-><c- o>&lt;</c-><c- n>Range</c-><c- o>></c-> <c- o>&amp;&amp;</c->
    <c- n>std</c-><c- o>::</c-><c- n>ranges</c-><c- o>::</c-><c- n>pair</c-><c- o>-</c-><c- n>reconstructible</c-><c- o>-</c-><c- n>range</c-><c- o>&lt;</c-><c- n>Range</c-><c- o>></c-><c- p>;</c->
</pre>
   <p>Although not a part of this proposal, a range type wrapping a file could be created
(bidirectional <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>istreambuf_iterator</c-></code>? range-based I/O?),
making the above use cases trivial.</p>
<pre class="language-c++ highlight"><c- c1>// Hypothetical</c->
<c- k>auto</c-> <c- n>f</c-> <c- o>=</c-> <c- n>std</c-><c- o>::</c-><c- n>io</c-><c- o>::</c-><c- n>ifile</c-><c- p>(...);</c->

<c- c1>// All the buffer management is dealt with by the range and std::scan</c->
<c- c1>// Range iterator operator++ would read a character,</c->
<c- c1>// operator* would return the last character read,</c->
<c- c1>// and operator-- would putback a character</c->
<c- b>int</c-> <c- n>i</c-><c- p>{};</c->
<c- n>f</c-> <c- o>=</c-> <c- n>std</c-><c- o>::</c-><c- n>scan</c-><c- p>(</c-><c- n>std</c-><c- o>::</c-><c- n>io</c-><c- o>::</c-><c- n>file_view</c-><c- p>(</c-><c- n>f</c-><c- p>),</c-> <c- s>"{}"</c-><c- p>,</c-> <c- n>i</c-><c- p>);</c->
</pre>
   <p><b>Why <code class="highlight"><c- n>bidirectional_range</c-></code>?</b></p>
   <p>The reason is error recovery.
In the case that reading an argument fails,
the range needs to be reset to the state it was in before starting to read that value.
This behavior is consistent with scanf.</p>
<pre class="language-c++ highlight"><c- k>auto</c-> <c- n>input</c-> <c- o>=</c-> <c- s>"42 foo"</c-><c- n>sv</c-><c- p>;</c->
<c- b>int</c-> <c- n>i</c-><c- p>,</c-> <c- n>j</c-><c- p>;</c->
<c- c1>// Reading of j failed: foo is not an integer</c->
<c- n>input</c-> <c- o>=</c-> <c- n>std</c-><c- o>::</c-><c- n>scan</c-><c- p>(</c-><c- n>input</c-><c- p>,</c-> <c- s>"{} {}"</c-><c- p>,</c-> <c- n>i</c-><c- p>,</c-> <c- n>j</c-><c- p>);</c->
<c- c1>// input == "foo"</c->
<c- c1>// i == 42</c->
<c- c1>// j is uninitialized (not written to)</c->
</pre>
   <p>In fact, this is main the reason why <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>ungetc</c-></code> and <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>istream</c-><c- o>::</c-><c- n>unget</c-></code> exist.</p>
   <p>See more discussion on error handling in <a href="#error-handling">§3.1 Error handling and partial successes</a>.</p>
   <p><b>Why <code class="highlight"><c- n>view</c-></code>?</b></p>
   <p>It makes ownership semantics clearer, and avoids potentially expensive copying.</p>
<pre class="language-c++ highlight"><c- n>std</c-><c- o>::</c-><c- n>string</c-> <c- n>str</c-> <c- o>=</c-> <c- s>"verylongstring"</c-><c- p>;</c->
<c- n>str</c-> <c- o>=</c-> <c- n>std</c-><c- o>::</c-><c- n>scan</c-><c- p>(</c-><c- n>str</c-><c- p>,</c-> <c- p>...);</c->
<c- c1>// str would have to be reallocated and its contents moved</c->
</pre>
   <p><b>Why <code class="highlight"><c- n>pair</c-><c- o>-</c-><c- n>reconstructible</c-><c- o>-</c-><c- n>range</c-></code>?</b></p>
   <p>(Depends on <a data-link-type="biblio" href="#biblio-p1664">[P1664]</a>.)</p>
   <p>Should be pretty obvious.
While parsing the input range, <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>scan</c-></code> advances an iterator into that range.
Once the parsing is complete, the range needs to be returned starting from the iterator.</p>
   <p>Parsing operations could be optimized further for more refined range concepts.
For example, for <code class="highlight"><c- n>contiguous_range</c-></code>s, <code class="highlight"><c- n>string_view</c-></code>s could be read,
eliminating an extra allocation and a copy into a <code class="highlight"><c- n>string</c-></code>.</p>
   <p>A problem with taking a range as input would be a potential increase in generated code size,
as the internals would need to be instantiated for every used range type.
This could be mitigated by:</p>
   <ul>
    <li data-md>
     <p>Generating non-inline <code class="highlight"><c- n>vscan</c-></code> overloads for different range types ->
increase of library compile time and binary size
(first one a non-issue for stdlib, second one maybe not so much)</p>
     <ul>
      <li data-md>
       <p><code class="highlight"><c- n>string_view</c-></code> is a prime candidate for this</p>
     </ul>
    <li data-md>
     <p>Type-erasing the range alongside the arguments ->
virtual function call with every <code class="highlight"><c- o>*</c-><c- n>it</c-></code> and <code class="highlight"><c- o>++</c-><c- n>it</c-></code> (significant performance degradation)</p>
   </ul>
   <h3 class="heading settled" data-level="2.5" id="locales"><span class="secno">2.5. </span><span class="content">Locales</span><a class="self-link" href="#locales"></a></h3>
   <p>As pointed out in <a data-link-type="biblio" href="#biblio-n4412">[N4412]</a>:</p>
   <blockquote>
    <p>There are a number of communications protocol frameworks in use that employ
text-based representations of data, for example XML and JSON. The text is
machine-generated and machine-read and should not depend on or consider the
locales at either end.</p>
   </blockquote>
   <p>To address this <a data-link-type="biblio" href="#biblio-p0645">[P0645]</a> provided control over the use of locales. We propose
doing the same for the current facility by performing locale-independent parsing
by default and designating separate format specifiers for locale-specific one.</p>
   <h3 class="heading settled" data-level="2.6" id="performance"><span class="secno">2.6. </span><span class="content">Performance</span><a class="self-link" href="#performance"></a></h3>
   <p>The API allows efficient implementation that minimizes virtual function calls
and dynamic memory allocations, and avoids unnecessary copies. In particular,
since it doesn’t need to guarantee the lifetime of the input across multiple
function calls, <code class="highlight"><c- n>scan</c-></code> can take <code class="highlight"><c- n>string_view</c-></code> avoiding an extra string copy
compared to <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>istringstream</c-></code>.</p>
   <p>We can also avoid unnecessary copies required by <code class="highlight"><c- n>scanf</c-></code> when parsing string,
e.g.</p>
<pre class="language-c++ highlight"><c- n>std</c-><c- o>::</c-><c- n>string_view</c-> <c- n>key</c-><c- p>;</c->
<c- b>int</c-> <c- n>value</c-><c- p>;</c->
<c- n>std</c-><c- o>::</c-><c- n>scan</c-><c- p>(</c-><c- s>"answer = 42"</c-><c- p>,</c-> <c- s>"{} = {}"</c-><c- p>,</c-> <c- n>key</c-><c- p>,</c-> <c- n>value</c-><c- p>);</c->
</pre>
   <p>This has lifetime implications similar to returning match objects in <a data-link-type="biblio" href="#biblio-p1433">[P1433]</a> and iterator or subranges in the ranges library and can be mitigated in the same
way.</p>
   <h3 class="heading settled" data-level="2.7" id="binary-footprint"><span class="secno">2.7. </span><span class="content">Binary footprint</span><a class="self-link" href="#binary-footprint"></a></h3>
   <p>We propose using a type erasure technique to reduce per-call binary code size.
The scanning function that uses variadic templates can be implemented as a
small inline wrapper around its non-variadic counterpart:</p>
<pre class="language-c++ highlight"><c- n>string_view</c-> <c- nf>vscan</c-><c- p>(</c-><c- n>string_view</c-> <c- n>input</c-><c- p>,</c-> <c- n>string_view</c-> <c- n>fmt</c-><c- p>,</c-> <c- n>scan_args</c-> <c- n>args</c-><c- p>);</c->

<c- n>template</c-> <c- o>&lt;</c-><c- kr>typename</c-><c- p>...</c-> <c- n>Args</c-><c- o>></c->
<c- kr>inline</c-> <c- k>auto</c-> <c- n>scan</c-><c- p>(</c-><c- n>string_view</c-> <c- n>input</c-><c- p>,</c-> <c- n>string_view</c-> <c- n>fmt</c-><c- p>,</c-> <c- k>const</c-> <c- n>Args</c-><c- o>&amp;</c-><c- p>...</c-> <c- n>args</c-><c- p>)</c-> <c- p>{</c->
  <c- k>return</c-> <c- n>vscan</c-><c- p>(</c-><c- n>input</c-><c- p>,</c-> <c- n>fmt</c-><c- p>,</c-> <c- n>make_scan_args</c-><c- p>(</c-><c- n>args</c-><c- p>...));</c->
<c- p>}</c->
</pre>
   <p>As shown in <a data-link-type="biblio" href="#biblio-p0645">[P0645]</a> this dramatically reduces binary code size which will make <code class="highlight"><c- n>scan</c-></code> comparable to <code class="highlight"><c- n>scanf</c-></code> on this metric.</p>
   <h3 class="heading settled" data-level="2.8" id="chrono"><span class="secno">2.8. </span><span class="content">Integration with chrono</span><a class="self-link" href="#chrono"></a></h3>
   <p>The proposed facility can be integrated with <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>chrono</c-><c- o>::</c-><c- n>parse</c-></code> (<a data-link-type="biblio" href="#biblio-p0355">[P0355]</a>)
via the extension mechanism similarly to integration between chrono and text
formatting proposed in <a data-link-type="biblio" href="#biblio-p1361">[P1361]</a>. This will improve consistency between parsing
and formatting, make parsing multiple objects easier, and allow avoiding dynamic
memory allocations without resolving to deprecated <code class="highlight"><c- n>strstream</c-></code>.</p>
   <p>Before:</p>
<pre class="highlight"><c- n>std</c-><c- o>::</c-><c- n>istringstream</c-> <c- n>is</c-><c- p>(</c-><c- s>"start = 10:30"</c-><c- p>);</c->
<c- n>std</c-><c- o>::</c-><c- n>string</c-> <c- n>key</c-><c- p>;</c->
<c- b>char</c-> <c- n>sep</c-><c- p>;</c->
<c- n>std</c-><c- o>::</c-><c- n>chrono</c-><c- o>::</c-><c- n>seconds</c-> <c- n>time</c-><c- p>;</c->
<c- n>is</c-> <c- o>>></c-> <c- n>key</c-> <c- o>>></c-> <c- n>sep</c-> <c- o>>></c-> <c- n>std</c-><c- o>::</c-><c- n>chrono</c-><c- o>::</c-><c- n>parse</c-><c- p>(</c-><c- s>"%H:%M"</c-><c- p>,</c-> <c- n>time</c-><c- p>);</c->
</pre>
   <p>After:</p>
<pre class="highlight"><c- n>std</c-><c- o>::</c-><c- n>string</c-> <c- n>key</c-><c- p>;</c->
<c- n>std</c-><c- o>::</c-><c- n>chrono</c-><c- o>::</c-><c- n>seconds</c-> <c- n>time</c-><c- p>;</c->
<c- n>std</c-><c- o>::</c-><c- n>scan</c-><c- p>(</c-><c- s>"start = 10:30"</c-><c- p>,</c-> <c- s>"{0} = {1:%H:%M}"</c-><c- p>,</c-> <c- n>key</c-><c- p>,</c-> <c- n>time</c-><c- p>);</c->
</pre>
   <p>Note that the <code class="highlight"><c- n>scan</c-></code> version additionally validates the separator.</p>
   <h3 class="heading settled" data-level="2.9" id="deep-impact"><span class="secno">2.9. </span><span class="content">Impact on existing code</span><a class="self-link" href="#deep-impact"></a></h3>
   <p>The proposed API is defined in a new header and should have no impact on
existing code.</p>
   <h2 class="heading settled" data-level="3" id="design-questions"><span class="secno">3. </span><span class="content">Open design questions</span><a class="self-link" href="#design-questions"></a></h2>
   <h3 class="heading settled" data-level="3.1" id="error-handling"><span class="secno">3.1. </span><span class="content">Error handling and partial successes</span><a class="self-link" href="#error-handling"></a></h3>
   <p>This paper deliberately avoids dealing with errors at this point.
To be consistent with <a data-link-type="biblio" href="#biblio-p0645">[P0645]</a>, exceptions would be the proper way to deal with errors.
The problem with this approach is partial successes.</p>
<pre class="language-c++ highlight"><c- k>auto</c-> <c- n>input</c-> <c- o>=</c-> <c- s>"42 foo"</c-><c- n>sv</c-><c- p>;</c->
<c- b>int</c-> <c- n>i</c-><c- p>,</c-> <c- n>j</c-><c- p>;</c->
<c- n>try</c-> <c- p>{</c->
    <c- c1>// Will throw:</c->
    <c- c1>// foo is not an integer</c->
    <c- n>input</c-> <c- o>=</c-> <c- n>std</c-><c- o>::</c-><c- n>scan</c-><c- p>(</c-><c- n>input</c-><c- p>,</c-> <c- s>"{} {}"</c-><c- p>,</c-> <c- n>i</c-><c- p>,</c-> <c- n>j</c-><c- p>);</c->

    <c- c1>// If this was ever reached,</c->
    <c- c1>// i and j would both be usable here</c->
<c- p>}</c-> <c- n>catch</c-> <c- p>(</c-><c- k>const</c-> <c- n>std</c-><c- o>::</c-><c- n>scan_error</c-><c- o>&amp;</c-> <c- n>e</c-><c- p>)</c-> <c- p>{</c->
    <c- n>input</c-> <c- o>=</c-> <c- n>e</c-><c- p>.</c-><c- n>input</c-><c- p>;</c->
    <c- k>if</c-> <c- p>(</c-><c- n>e</c-><c- p>.</c-><c- n>read</c-> <c- o>==</c-> <c- mi>1</c-><c- p>)</c-> <c- p>{</c->
        <c- c1>// 1 value read</c->
        <c- c1>// Only i usable</c->
    <c- p>}</c-> <c- k>else</c-> <c- p>{</c->
        <c- c1>// No values read</c->
        <c- c1>// Neither i or j usable</c->
    <c- p>}</c->
<c- p>}</c->
</pre>
   <p>Problems with <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>scan</c-></code> communicating partial successes by throwing:</p>
   <ul>
    <li data-md>
     <p>The read values need to be declared far away from their use site:
declaration is outside the <code class="highlight"><c- k>try</c-></code>-block,
while using the value is after the call and in the <code class="highlight"><c- k>catch</c-></code>-block.</p>
    <li data-md>
     <p>Possible code duplication:
Say, for example, that the user wants to do
something for the value <code class="highlight"><c- n>i</c-></code> in the previous example.
This code would have to be in two places:
in the end of the <code class="highlight"><c- k>try</c-></code>-block, and inside the <code class="highlight"><c- k>if</c-></code> in the <code class="highlight"><c- k>catch</c-></code>-block.
Also, the input needs to be reassigned in two different places:
in the <code class="highlight"><c- k>try</c-></code>-block in the case of a success, and in the <code class="highlight"><c- k>catch</c-></code> on failure.</p>
   </ul>
   <p>An alternative would be to have an <code class="highlight"><c- n>expected</c-></code>-like return type (<a data-link-type="biblio" href="#biblio-p0323">[P0323]</a>),
except the success side should always be present, almost like:</p>
<pre class="language-c++ highlight"><c- n>namespace</c-> <c- n>std</c-> <c- p>{</c->
<c- n>template</c-> <c- o>&lt;</c-><c- kr>typename</c-> <c- n>Range</c-><c- o>></c->
<c- k>struct</c-> <c- n>scan_result</c-> <c- p>{</c->
    <c- b>int</c-> <c- n>read</c-><c- p>;</c->
    <c- n>Range</c-> <c- n>input</c-><c- p>;</c->
    <c- n>optional</c-><c- o>&lt;</c-><c- n>scan_error</c-><c- o>></c-> <c- n>error</c-><c- p>;</c->
<c- p>};</c->
<c- p>}</c->

<c- k>auto</c-> <c- n>input</c-> <c- o>=</c-> <c- s>"42 foo"</c-><c- n>sv</c-><c- p>;</c->
<c- b>int</c-> <c- n>i</c-><c- p>,</c-> <c- n>j</c-><c- p>;</c->
<c- k>auto</c-> <c- n>ret</c-> <c- o>=</c-> <c- n>std</c-><c- o>::</c-><c- n>scan</c-><c- p>(</c-><c- n>input</c-><c- p>,</c-> <c- s>"{} {}"</c-><c- p>,</c-> <c- n>i</c-><c- p>,</c-> <c- n>j</c-><c- p>);</c->
<c- n>input</c-> <c- o>=</c-> <c- n>ret</c-><c- p>.</c-><c- n>input</c-><c- p>;</c->
<c- k>if</c-> <c- p>(</c-><c- n>ret</c-><c- p>.</c-><c- n>read</c-> <c- o>>=</c-> <c- mi>1</c-><c- p>)</c-> <c- p>{</c->
    <c- c1>// i is usable</c->
<c- p>}</c-> <c- k>else</c-> <c- p>{</c->
    <c- c1>// Neither i or j is usable</c->
<c- p>}</c->
</pre>
   <p>This would, of course, be inconsistent with <a data-link-type="biblio" href="#biblio-p0645">[P0645]</a>, which might be undesirable.</p>
   <h3 class="heading settled" data-level="3.2" id="tuple-out"><span class="secno">3.2. </span><span class="content">Returning a <code class="highlight"><c- n>tuple</c-></code> vs. output parameters</span><a class="self-link" href="#tuple-out"></a></h3>
   <p>In Cologne, LEWGI encouraged to explore an alternative API returning a <code class="highlight"><c- n>tuple</c-></code>,
instead of <code class="highlight"><c- n>scanf</c-></code>-like output parameters.
We find the <code class="highlight"><c- n>scanf</c-></code> approach to be superior, for the following reasons:</p>
   <ul>
    <li data-md>
     <p>The API would be clunky to allow for partial successes.
The return type would have to be <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>tuple</c-><c- o>&lt;</c-><c- n>std</c-><c- o>::</c-><c- n>optional</c-><c- o>&lt;</c-><c- n>T</c-><c- o>></c-><c- p>,</c-> <c- n>std</c-><c- o>::</c-><c- n>optional</c-><c- o>&lt;</c-><c- n>U</c-><c- o>></c-><c- p>...</c-><c- o>></c-></code>,
or even <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>scan_result</c-><c- o>&lt;</c-><c- n>std</c-><c- o>::</c-><c- n>tuple</c-><c- o>&lt;</c-><c- n>std</c-><c- o>::</c-><c- n>optional</c-><c- o>&lt;</c-><c- n>T</c-><c- o>></c-><c- p>,</c-> <c- n>std</c-><c- o>::</c-><c- n>optional</c-><c- o>&lt;</c-><c- n>U</c-><c- o>></c-><c- p>...</c-><c- o>>></c-></code>.
Using this is awkward, with all the destructuring the user would have to do.</p>
    <li data-md>
     <p>Tuples, in conjunction with optionals,
have a measurable overhead compared to output parameters, at ~5-10%,
depending on the use case, according to <a data-link-type="biblio" href="#biblio-scnlib">[SCNLIB]</a> benchmarks.</p>
   </ul>
   <h3 class="heading settled" data-level="3.3" id="naming"><span class="secno">3.3. </span><span class="content">Naming</span><a class="self-link" href="#naming"></a></h3>
   <ol>
    <li data-md>
     <p><code class="highlight"><c- n>scan</c-></code></p>
    <li data-md>
     <p><code class="highlight"><c- n>parse</c-></code></p>
    <li data-md>
     <p>other</p>
   </ol>
   <p>The name "parse" is a bit problematic because of ambiguity between format string
parsing and input parsing.</p>
   <p>"scan" as a name collides with some of the new C++17 <code class="highlight"><c- o>&lt;</c-><c- n>numeric</c-><c- o>></c-></code> algorithms:</p>
   <ul>
    <li data-md>
     <p><code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>inclusive_scan</c-></code></p>
    <li data-md>
     <p><code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>exclusive_scan</c-></code></p>
    <li data-md>
     <p><code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>transform_inclusive_scan</c-></code></p>
    <li data-md>
     <p><code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>transform_exclusive_scan</c-></code></p>
   </ul>
   <p>"scan" is the name used by <a data-link-type="biblio" href="#biblio-scnlib">[SCNLIB]</a> and <a data-link-type="biblio" href="#biblio-fmt">[FMT]</a>, and is the authors' preferred name,
and would be consistent with existing <code class="highlight"><c- n>scanf</c-></code>.</p>
   <table>
    <tbody>
     <tr>
      <td>Main API
      <td><code class="highlight"><c- n>format</c-></code>
      <td><code class="highlight"><c- n>scan</c-></code>
      <td><code class="highlight"><c- n>parse</c-></code>
     <tr>
      <td>Extension point
      <td><code class="highlight"><c- n>formatter</c-></code>
      <td><code class="highlight"><c- n>scanner</c-></code>
      <td><code class="highlight"><c- n>parser</c-></code>
     <tr>
      <td>Parse format string
      <td><code class="highlight"><c- n>formatter</c-><c- o>::</c-><c- n>parse</c-></code>
      <td><code class="highlight"><c- n>scanner</c-><c- o>::</c-><c- n>parse</c-></code>
      <td><code class="highlight"><c- n>parser</c-><c- o>::</c-><c- n>parse_format</c-></code>?
     <tr>
      <td>Extension function
      <td><code class="highlight"><c- n>formatter</c-><c- o>::</c-><c- n>format</c-></code>
      <td><code class="highlight"><c- n>scanner</c-><c- o>::</c-><c- n>scan</c-></code>
      <td><code class="highlight"><c- n>parser</c-><c- o>::</c-><c- n>parse</c-></code>
     <tr>
      <td>Format string parse context
      <td><code class="highlight"><c- n>format_parse_context</c-></code>
      <td><code class="highlight"><c- n>scan_parse_context</c-></code>
      <td><code class="highlight"><c- n>parse_parse_context</c-></code>?
     <tr>
      <td>Context
      <td><code class="highlight"><c- n>format_context</c-></code>
      <td><code class="highlight"><c- n>scan_context</c-></code>
      <td><code class="highlight"><c- n>parse_context</c-></code>
   </table>
   <h2 class="heading settled" data-level="4" id="existing-work"><span class="secno">4. </span><span class="content">Existing work</span><a class="self-link" href="#existing-work"></a></h2>
   <p><a data-link-type="biblio" href="#biblio-scnlib">[SCNLIB]</a> is a C++ library that, among other things,
provides a range-based <code class="highlight"><c- n>scan</c-></code> interface similar to the one described in this paper. <a data-link-type="biblio" href="#biblio-fmt">[FMT]</a> has a prototype implementation of the proposal.</p>
  </main>
<script>
(function() {
  "use strict";
  var collapseSidebarText = '<span aria-hidden="true">←</span> '
                          + '<span>Collapse Sidebar</span>';
  var expandSidebarText   = '<span aria-hidden="true">→</span> '
                          + '<span>Pop Out Sidebar</span>';
  var tocJumpText         = '<span aria-hidden="true">↑</span> '
                          + '<span>Jump to Table of Contents</span>';

  var sidebarMedia = window.matchMedia('screen and (min-width: 78em)');
  var autoToggle   = function(e){ toggleSidebar(e.matches) };
  if(sidebarMedia.addListener) {
    sidebarMedia.addListener(autoToggle);
  }

  function toggleSidebar(on) {
    if (on == undefined) {
      on = !document.body.classList.contains('toc-sidebar');
    }

    /* Don’t scroll to compensate for the ToC if we’re above it already. */
    var headY = 0;
    var head = document.querySelector('.head');
    if (head) {
      // terrible approx of "top of ToC"
      headY += head.offsetTop + head.offsetHeight;
    }
    var skipScroll = window.scrollY < headY;

    var toggle = document.getElementById('toc-toggle');
    var tocNav = document.getElementById('toc');
    if (on) {
      var tocHeight = tocNav.offsetHeight;
      document.body.classList.add('toc-sidebar');
      document.body.classList.remove('toc-inline');
      toggle.innerHTML = collapseSidebarText;
      if (!skipScroll) {
        window.scrollBy(0, 0 - tocHeight);
      }
      tocNav.focus();
      sidebarMedia.addListener(autoToggle); // auto-collapse when out of room
    }
    else {
      document.body.classList.add('toc-inline');
      document.body.classList.remove('toc-sidebar');
      toggle.innerHTML = expandSidebarText;
      if (!skipScroll) {
        window.scrollBy(0, tocNav.offsetHeight);
      }
      if (toggle.matches(':hover')) {
        /* Unfocus button when not using keyboard navigation,
           because I don’t know where else to send the focus. */
        toggle.blur();
      }
    }
  }

  function createSidebarToggle() {
    /* Create the sidebar toggle in JS; it shouldn’t exist when JS is off. */
    var toggle = document.createElement('a');
      /* This should probably be a button, but appearance isn’t standards-track.*/
    toggle.id = 'toc-toggle';
    toggle.class = 'toc-toggle';
    toggle.href = '#toc';
    toggle.innerHTML = collapseSidebarText;

    sidebarMedia.addListener(autoToggle);
    var toggler = function(e) {
      e.preventDefault();
      sidebarMedia.removeListener(autoToggle); // persist explicit off states
      toggleSidebar();
      return false;
    }
    toggle.addEventListener('click', toggler, false);


    /* Get <nav id=toc-nav>, or make it if we don’t have one. */
    var tocNav = document.getElementById('toc-nav');
    if (!tocNav) {
      tocNav = document.createElement('p');
      tocNav.id = 'toc-nav';
      /* Prepend for better keyboard navigation */
      document.body.insertBefore(tocNav, document.body.firstChild);
    }
    /* While we’re at it, make sure we have a Jump to Toc link. */
    var tocJump = document.getElementById('toc-jump');
    if (!tocJump) {
      tocJump = document.createElement('a');
      tocJump.id = 'toc-jump';
      tocJump.href = '#toc';
      tocJump.innerHTML = tocJumpText;
      tocNav.appendChild(tocJump);
    }

    tocNav.appendChild(toggle);
  }

  var toc = document.getElementById('toc');
  if (toc) {
    createSidebarToggle();
    toggleSidebar(sidebarMedia.matches);

    /* If the sidebar has been manually opened and is currently overlaying the text
       (window too small for the MQ to add the margin to body),
       then auto-close the sidebar once you click on something in there. */
    toc.addEventListener('click', function(e) {
      if(e.target.tagName.toLowerCase() == "a" && document.body.classList.contains('toc-sidebar') && !sidebarMedia.matches) {
        toggleSidebar(false);
      }
    }, false);
  }
  else {
    console.warn("Can’t find Table of Contents. Please use <nav id='toc'> around the ToC.");
  }

  /* Wrap tables in case they overflow */
  var tables = document.querySelectorAll(':not(.overlarge) > table.data, :not(.overlarge) > table.index');
  var numTables = tables.length;
  for (var i = 0; i < numTables; i++) {
    var table = tables[i];
    var wrapper = document.createElement('div');
    wrapper.className = 'overlarge';
    table.parentNode.insertBefore(wrapper, table);
    wrapper.appendChild(table);
  }

})();
</script>
  <h2 class="no-num no-ref heading settled" id="references"><span class="content">References</span><a class="self-link" href="#references"></a></h2>
  <h3 class="no-num no-ref heading settled" id="informative"><span class="content">Informative References</span><a class="self-link" href="#informative"></a></h3>
  <dl>
   <dt id="biblio-attr">[ATTR]
   <dd><a href="https://gcc.gnu.org/onlinedocs/gcc-8.2.0/gcc/Common-Function-Attributes.html">Common Function Attributes</a>. URL: <a href="https://gcc.gnu.org/onlinedocs/gcc-8.2.0/gcc/Common-Function-Attributes.html">https://gcc.gnu.org/onlinedocs/gcc-8.2.0/gcc/Common-Function-Attributes.html</a>
   <dt id="biblio-codesearch">[CODESEARCH]
   <dd>Andrew Tomazos. <a href="https://codesearch.isocpp.org">Code search engine website</a>. URL: <a href="https://codesearch.isocpp.org">https://codesearch.isocpp.org</a>
   <dt id="biblio-fmt">[FMT]
   <dd>Victor Zverovich et al. <a href="https://github.com/fmtlib/fmt">The fmt library</a>. URL: <a href="https://github.com/fmtlib/fmt">https://github.com/fmtlib/fmt</a>
   <dt id="biblio-n4412">[N4412]
   <dd>Jens Maurer. <a href="http://open-std.org/JTC1/SC22/WG21/docs/papers/2015/n4412.html">N4412: Shortcomings of iostreams</a>. URL: <a href="http://open-std.org/JTC1/SC22/WG21/docs/papers/2015/n4412.html">http://open-std.org/JTC1/SC22/WG21/docs/papers/2015/n4412.html</a>
   <dt id="biblio-p0323">[P0323]
   <dd>JF Bastien; Vicente Botet. <a href="https://wg21.link/p0323">std::expected</a>. URL: <a href="https://wg21.link/p0323">https://wg21.link/p0323</a>
   <dt id="biblio-p0355">[P0355]
   <dd>Howard E. Hinnant; Tomasz Kamiński. <a href="https://wg21.link/p0355">Extending &lt;chrono> to Calendars and Time Zones</a>. URL: <a href="https://wg21.link/p0355">https://wg21.link/p0355</a>
   <dt id="biblio-p0645">[P0645]
   <dd>Victor Zverovich. <a href="https://wg21.link/p0645">Text Formatting</a>. URL: <a href="https://wg21.link/p0645">https://wg21.link/p0645</a>
   <dt id="biblio-p1361">[P1361]
   <dd>Victor Zverovich; Daniela Engert; Howard E. Hinnant. <a href="https://wg21.link/p1361">Integration of chrono with text formatting</a>. URL: <a href="https://wg21.link/p1361">https://wg21.link/p1361</a>
   <dt id="biblio-p1433">[P1433]
   <dd>Hana Dusíková. <a href="https://wg21.link/p1433">Compile Time Regular Expressions</a>. URL: <a href="https://wg21.link/p1433">https://wg21.link/p1433</a>
   <dt id="biblio-p1664">[P1664]
   <dd>JeanHeyd Meneide; Hannes Hauswedell. <a href="https://wg21.link/p1664">reconstructible_range - a concept for putting ranges back together</a>. URL: <a href="https://wg21.link/p1664">https://wg21.link/p1664</a>
   <dt id="biblio-parse">[PARSE]
   <dd><a href="https://pypi.org/project/parse/">Python `parse` package</a>. URL: <a href="https://pypi.org/project/parse/">https://pypi.org/project/parse/</a>
   <dt id="biblio-scnlib">[SCNLIB]
   <dd>Elias Kosunen. <a href="https://github.com/eliaskosunen/scnlib">scnlib: scanf for modern C++</a>. URL: <a href="https://github.com/eliaskosunen/scnlib">https://github.com/eliaskosunen/scnlib</a>
  </dl>