<!doctype html><html lang="en">
 <head>
  <meta content="text/html; charset=utf-8" http-equiv="Content-Type">
  <meta content="width=device-width, initial-scale=1, shrink-to-fit=no" name="viewport">
  <title>P1629R1: Transcoding the 🌐 - Standard Text Encoding</title>
<style data-fill-with="stylesheet">/******************************************************************************
 *                   Style sheet for the W3C specifications                   *
 *
 * Special classes handled by this style sheet include:
 *
 * Indices
 *   - .toc for the Table of Contents (<ol class="toc">)
 *     + <span class="secno"> for the section numbers
 *   - #toc for the Table of Contents (<nav id="toc">)
 *   - ul.index for Indices (<a href="#ref">term</a><span>, in §N.M</span>)
 *   - table.index for Index Tables (e.g. for properties or elements)
 *
 * Structural Markup
 *   - table.data for general data tables
 *     -> use 'scope' attribute, <colgroup>, <thead>, and <tbody> for best results !
 *     -> use <table class='complex data'> for extra-complex tables
 *     -> use <td class='long'> for paragraph-length cell content
 *     -> use <td class='pre'> when manual line breaks/indentation would help readability
 *   - dl.switch for switch statements
 *   - ol.algorithm for algorithms (helps to visualize nesting)
 *   - .figure and .caption (HTML4) and figure and figcaption (HTML5)
 *     -> .sidefigure for right-floated figures
 *   - ins/del
 *
 * Code
 *   - pre and code
 *
 * Special Sections
 *   - .note       for informative notes             (div, p, span, aside, details)
 *   - .example    for informative examples          (div, p, pre, span)
 *   - .issue      for issues                        (div, p, span)
 *   - .assertion  for assertions                    (div, p, span)
 *   - .advisement for loud normative statements     (div, p, strong)
 *   - .annoying-warning for spec obsoletion notices (div, aside, details)
 *
 * Definition Boxes
 *   - pre.def   for WebIDL definitions
 *   - table.def for tables that define other entities (e.g. CSS properties)
 *   - dl.def    for definition lists that define other entitles (e.g. HTML elements)
 *
 * Numbering
 *   - .secno for section numbers in .toc and headings (<span class='secno'>3.2</span>)
 *   - .marker for source-inserted example/figure/issue numbers (<span class='marker'>Issue 4</span>)
 *   - ::before styled for CSS-generated issue/example/figure numbers:
 *     -> Documents wishing to use this only need to add
 *        figcaption::before,
 *        .caption::before { content: "Figure "  counter(figure) " ";  }
 *        .example::before { content: "Example " counter(example) " "; }
 *        .issue::before   { content: "Issue "   counter(issue) " ";   }
 *
 * Header Stuff (ignore, just don't conflict with these classes)
 *   - .head for the header
 *   - .copyright for the copyright
 *
 * Miscellaneous
 *   - .overlarge for things that should be as wide as possible, even if
 *     that overflows the body text area. This can be used on an item or
 *     on its container, depending on the effect desired.
 *     Note that this styling basically doesn't help at all when printing,
 *     since A4 paper isn't much wider than the max-width here.
 *     It's better to design things to fit into a narrower measure if possible.
 *   - js-added ToC jump links (see fixup.js)
 *
 ******************************************************************************/

/******************************************************************************/
/*                                   Body                                     */
/******************************************************************************/

	body {
		counter-reset: example figure issue;

		/* Layout */
		max-width: 50em;               /* limit line length to 50em for readability   */
		margin: 0 auto;                /* center text within page                     */
		padding: 1.6em 1.5em 2em 50px; /* assume 16px font size for downlevel clients */
		padding: 1.6em 1.5em 2em calc(26px + 1.5em); /* leave space for status flag     */

		/* Typography */
		line-height: 1.5;
		font-family: sans-serif;
		widows: 2;
		orphans: 2;
		word-wrap: break-word;
		overflow-wrap: break-word;
		hyphens: auto;

		/* Colors */
		color: black;
		background: white top left fixed no-repeat;
		background-size: 25px auto;
	}


/******************************************************************************/
/*                         Front Matter & Navigation                          */
/******************************************************************************/

/** Header ********************************************************************/

	div.head { margin-bottom: 1em }
	div.head hr { border-style: solid; }

	div.head h1 {
		font-weight: bold;
		margin: 0 0 .1em;
		font-size: 220%;
	}

	div.head h2 { margin-bottom: 1.5em;}

/** W3C Logo ******************************************************************/

	.head .logo {
		float: right;
		margin: 0.4rem 0 0.2rem .4rem;
	}

	.head img[src*="logos/W3C"] {
		display: block;
		border: solid #1a5e9a;
		border-width: .65rem .7rem .6rem;
		border-radius: .4rem;
		background: #1a5e9a;
		color: white;
		font-weight: bold;
	}

	.head a:hover > img[src*="logos/W3C"],
	.head a:focus > img[src*="logos/W3C"] {
		opacity: .8;
	}

	.head a:active > img[src*="logos/W3C"] {
		background: #c00;
		border-color: #c00;
	}

	/* see also additional rules in Link Styling section */

/** Copyright *****************************************************************/

	p.copyright,
	p.copyright small { font-size: small }

/** Back to Top / ToC Toggle **************************************************/

	@media print {
		#toc-nav {
			display: none;
		}
	}
	@media not print {
		#toc-nav {
			position: fixed;
			z-index: 2;
			bottom: 0; left: 0;
			margin: 0;
			min-width: 1.33em;
			border-top-right-radius: 2rem;
			box-shadow: 0 0 2px;
			font-size: 1.5em;
			color: black;
		}
		#toc-nav > a {
			display: block;
			white-space: nowrap;

			height: 1.33em;
			padding: .1em 0.3em;
			margin: 0;

			background: white;
			box-shadow: 0 0 2px;
			border: none;
			border-top-right-radius: 1.33em;
			background: white;
		}
		#toc-nav > #toc-jump {
			padding-bottom: 2em;
			margin-bottom: -1.9em;
		}

		#toc-nav > a:hover,
		#toc-nav > a:focus {
			background: #f8f8f8;
		}
		#toc-nav > a:not(:hover):not(:focus) {
			color: #707070;
		}

		/* statusbar gets in the way on keyboard focus; remove once browsers fix */
		#toc-nav > a[href="#toc"]:not(:hover):focus:last-child {
			padding-bottom: 1.5rem;
		}

		#toc-nav:not(:hover) > a:not(:focus) > span + span {
			/* Ideally this uses :focus-within on #toc-nav */
			display: none;
		}
		#toc-nav > a > span + span {
			padding-right: 0.2em;
		}

		#toc-toggle-inline {
			vertical-align: 0.05em;
			font-size: 80%;
			color: gray;
			color: hsla(203,20%,40%,.7);
			border-style: none;
			background: transparent;
			position: relative;
		}
		#toc-toggle-inline:hover:not(:active),
		#toc-toggle-inline:focus:not(:active) {
			text-shadow: 1px 1px silver;
			top: -1px;
			left: -1px;
		}

		#toc-nav :active {
			color: #C00;
		}
	}

/** ToC Sidebar ***************************************************************/

	/* Floating sidebar */
	@media screen {
		body.toc-sidebar #toc {
			position: fixed;
			top: 0; bottom: 0;
			left: 0;
			width: 23.5em;
			max-width: 80%;
			max-width: calc(100% - 2em - 26px);
			overflow: auto;
			padding: 0 1em;
			padding-left: 42px;
			padding-left: calc(1em + 26px);
			background: inherit;
			background-color: #f7f8f9;
			z-index: 1;
			box-shadow: -.1em 0 .25em rgba(0,0,0,.1) inset;
		}
		body.toc-sidebar #toc h2 {
			margin-top: .8rem;
			font-variant: small-caps;
			font-variant: all-small-caps;
			text-transform: lowercase;
			font-weight: bold;
			color: gray;
			color: hsla(203,20%,40%,.7);
		}
		body.toc-sidebar #toc-jump:not(:focus) {
			width: 0;
			height: 0;
			padding: 0;
			position: absolute;
			overflow: hidden;
		}
	}
	/* Hide main scroller when only the ToC is visible anyway */
	@media screen and (max-width: 28em) {
		body.toc-sidebar {
			overflow: hidden;
		}
	}

	/* Sidebar with its own space */
	@media screen and (min-width: 78em) {
		body:not(.toc-inline) #toc {
			position: fixed;
			top: 0; bottom: 0;
			left: 0;
			width: 23.5em;
			overflow: auto;
			padding: 0 1em;
			padding-left: 42px;
			padding-left: calc(1em + 26px);
			background: inherit;
			background-color: #f7f8f9;
			z-index: 1;
			box-shadow: -.1em 0 .25em rgba(0,0,0,.1) inset;
		}
		body:not(.toc-inline) #toc h2 {
			margin-top: .8rem;
			font-variant: small-caps;
			font-variant: all-small-caps;
			text-transform: lowercase;
			font-weight: bold;
			color: gray;
			color: hsla(203,20%,40%,.7);
		}

		body:not(.toc-inline) {
			padding-left: 29em;
		}
		/* See also Overflow section at the bottom */

		body:not(.toc-inline) #toc-jump:not(:focus) {
			width: 0;
			height: 0;
			padding: 0;
			position: absolute;
			overflow: hidden;
		}
	}
	@media screen and (min-width: 90em) {
		body:not(.toc-inline) {
			margin: 0 4em;
		}
	}

/******************************************************************************/
/*                                Sectioning                                  */
/******************************************************************************/

/** Headings ******************************************************************/

	h1, h2, h3, h4, h5, h6, dt {
		page-break-after: avoid;
		page-break-inside: avoid;
		font: 100% sans-serif;   /* Reset all font styling to clear out UA styles */
		font-family: inherit;    /* Inherit the font family. */
		line-height: 1.2;        /* Keep wrapped headings compact */
		hyphens: manual;         /* Hyphenated headings look weird */
	}

	h2, h3, h4, h5, h6 {
		margin-top: 3rem;
	}

	h1, h2, h3 {
		color: #005A9C;
		background: transparent;
	}

	h1 { font-size: 170%; }
	h2 { font-size: 140%; }
	h3 { font-size: 120%; }
	h4 { font-weight: bold; }
	h5 { font-style: italic; }
	h6 { font-variant: small-caps; }
	dt { font-weight: bold; }

/** Subheadings ***************************************************************/

	h1 + h2,
	#subtitle {
		/* #subtitle is a subtitle in an H2 under the H1 */
		margin-top: 0;
	}
	h2 + h3,
	h3 + h4,
	h4 + h5,
	h5 + h6 {
		margin-top: 1.2em; /* = 1 x line-height */
	}

/** Section divider ***********************************************************/

	:not(.head) > hr {
		font-size: 1.5em;
		text-align: center;
		margin: 1em auto;
		height: auto;
		border: transparent solid 0;
		background: transparent;
	}
	:not(.head) > hr::before {
		content: "\2727\2003\2003\2727\2003\2003\2727";
	}

/******************************************************************************/
/*                            Paragraphs and Lists                            */
/******************************************************************************/

	p {
		margin: 1em 0;
	}

	dd > p:first-child,
	li > p:first-child {
		margin-top: 0;
	}

	ul, ol {
		margin-left: 0;
		padding-left: 2em;
	}

	li {
		margin: 0.25em 0 0.5em;
		padding: 0;
	}

	dl dd {
		margin: 0 0 .5em 2em;
	}

	.head dd + dd { /* compact for header */
		margin-top: -.5em;
	}

	/* Style for algorithms */
	ol.algorithm ol:not(.algorithm),
	.algorithm > ol ol:not(.algorithm) {
	 border-left: 0.5em solid #DEF;
	}

	/* Put nice boxes around each algorithm. */
	[data-algorithm]:not(.heading) {
	  padding: .5em;
	  border: thin solid #ddd; border-radius: .5em;
	  margin: .5em calc(-0.5em - 1px);
	}
	[data-algorithm]:not(.heading) > :first-child {
	  margin-top: 0;
	}
	[data-algorithm]:not(.heading) > :last-child {
	  margin-bottom: 0;
	}

	/* Style for switch/case <dl>s */
	dl.switch > dd > ol.only,
	dl.switch > dd > .only > ol {
	 margin-left: 0;
	}
	dl.switch > dd > ol.algorithm,
	dl.switch > dd > .algorithm > ol {
	 margin-left: -2em;
	}
	dl.switch {
	 padding-left: 2em;
	}
	dl.switch > dt {
	 text-indent: -1.5em;
	 margin-top: 1em;
	}
	dl.switch > dt + dt {
	 margin-top: 0;
	}
	dl.switch > dt::before {
	 content: '\21AA';
	 padding: 0 0.5em 0 0;
	 display: inline-block;
	 width: 1em;
	 text-align: right;
	 line-height: 0.5em;
	}

/** Terminology Markup ********************************************************/


/******************************************************************************/
/*                                 Inline Markup                              */
/******************************************************************************/

/** Terminology Markup ********************************************************/
	dfn   { /* Defining instance */
		font-weight: bolder;
	}
	a > i { /* Instance of term */
		font-style: normal;
	}
	dt dfn code, code.idl {
		font-size: medium;
	}
	dfn var {
		font-style: normal;
	}

/** Change Marking ************************************************************/

	del { color: red;  text-decoration: line-through; }
	ins { color: #080; text-decoration: underline;    }

/** Miscellaneous improvements to inline formatting ***************************/

	sup {
		vertical-align: super;
		font-size: 80%
	}

/******************************************************************************/
/*                                    Code                                    */
/******************************************************************************/

/** General monospace/pre rules ***********************************************/

	pre, code, samp {
		font-family: Menlo, Consolas, "DejaVu Sans Mono", Monaco, monospace;
		font-size: .9em;
		page-break-inside: avoid;
		hyphens: none;
		text-transform: none;
	}
	pre code,
	code code {
		font-size: 100%;
	}

	pre {
		margin-top: 1em;
		margin-bottom: 1em;
		overflow: auto;
	}

/** Inline Code fragments *****************************************************/

  /* Do something nice. */

/******************************************************************************/
/*                                    Links                                   */
/******************************************************************************/

/** General Hyperlinks ********************************************************/

	/* We hyperlink a lot, so make it less intrusive */
	a[href] {
		color: #034575;
		text-decoration: none;
		border-bottom: 1px solid #707070;
		/* Need a bit of extending for it to look okay */
		padding: 0 1px 0;
		margin: 0 -1px 0;
	}
	a:visited {
		border-bottom-color: #BBB;
	}

	/* Use distinguishing colors when user is interacting with the link */
	a[href]:focus,
	a[href]:hover {
		background: #f8f8f8;
		background: rgba(75%, 75%, 75%, .25);
		border-bottom-width: 3px;
		margin-bottom: -2px;
	}
	a[href]:active {
		color: #C00;
		border-color: #C00;
	}

	/* Backout above styling for W3C logo */
	.head .logo,
	.head .logo a {
		border: none;
		text-decoration: none;
		background: transparent;
	}

/******************************************************************************/
/*                                    Images                                  */
/******************************************************************************/

	img {
		border-style: none;
	}

	/* For autogen numbers, add
	   .caption::before, figcaption::before { content: "Figure " counter(figure) ". "; }
	*/

	figure, .figure, .sidefigure {
		page-break-inside: avoid;
		text-align: center;
		margin: 2.5em 0;
	}
	.figure img,    .sidefigure img,    figure img,
	.figure object, .sidefigure object, figure object {
		max-width: 100%;
		margin: auto;
	}
	.figure pre, .sidefigure pre, figure pre {
		text-align: left;
		display: table;
		margin: 1em auto;
	}
	.figure table, figure table {
		margin: auto;
	}
	@media screen and (min-width: 20em) {
		.sidefigure {
			float: right;
			width: 50%;
			margin: 0 0 0.5em 0.5em
		}
	}
	.caption, figcaption, caption {
		font-style: italic;
		font-size: 90%;
	}
	.caption::before, figcaption::before, figcaption > .marker {
		font-weight: bold;
	}
	.caption, figcaption {
		counter-increment: figure;
	}

	/* DL list is indented 2em, but figure inside it is not */
	dd > .figure, dd > figure { margin-left: -2em }

/******************************************************************************/
/*                             Colored Boxes                                  */
/******************************************************************************/

	.issue, .note, .example, .assertion, .advisement, blockquote {
		padding: .5em;
		border: .5em;
		border-left-style: solid;
		page-break-inside: avoid;
	}
	span.issue, span.note {
		padding: .1em .5em .15em;
		border-right-style: solid;
	}

	.issue,
	.note,
	.example,
	.advisement,
	.assertion,
	blockquote {
		margin: 1em auto;
	}
	.note  > p:first-child,
	.issue > p:first-child,
	blockquote > :first-child {
		margin-top: 0;
	}
	blockquote > :last-child {
		margin-bottom: 0;
	}

/** Blockquotes ***************************************************************/

	blockquote {
		border-color: silver;
	}

/** Open issue ****************************************************************/

	.issue {
		border-color: #E05252;
		background: #FBE9E9;
		counter-increment: issue;
		overflow: auto;
	}
	.issue::before, .issue > .marker {
		text-transform: uppercase;
		color: #AE1E1E;
		padding-right: 1em;
		text-transform: uppercase;
	}
	/* Add .issue::before { content: "Issue " counter(issue) " "; } for autogen numbers,
	   or use class="marker" to mark up the issue number in source. */

/** Example *******************************************************************/

	.example {
		border-color: #E0CB52;
		background: #FCFAEE;
		counter-increment: example;
		overflow: auto;
		clear: both;
	}
	.example::before, .example > .marker {
		text-transform: uppercase;
		color: #827017;
		min-width: 7.5em;
		display: block;
	}
	/* Add .example::before { content: "Example " counter(example) " "; } for autogen numbers,
	   or use class="marker" to mark up the example number in source. */

/** Non-normative Note ********************************************************/

	.note {
		border-color: #52E052;
		background: #E9FBE9;
		overflow: auto;
	}

	.note::before, .note > .marker,
	details.note > summary::before,
	details.note > summary > .marker {
		text-transform: uppercase;
		display: block;
		color: hsl(120, 70%, 30%);
	}
	/* Add .note::before { content: "Note"; } for autogen label,
	   or use class="marker" to mark up the label in source. */

	details.note > summary {
		display: block;
		color: hsl(120, 70%, 30%);
	}
	details.note[open] > summary {
		border-bottom: 1px silver solid;
	}

/** Assertion Box *************************************************************/
	/*  for assertions in algorithms */

	.assertion {
		border-color: #AAA;
		background: #EEE;
	}

/** Advisement Box ************************************************************/
	/*  for attention-grabbing normative statements */

	.advisement {
		border-color: orange;
		border-style: none solid;
		background: #FFEECC;
	}
	strong.advisement {
		display: block;
		text-align: center;
	}
	.advisement > .marker {
		color: #B35F00;
	}

/** Spec Obsoletion Notice ****************************************************/
	/* obnoxious obsoletion notice for older/abandoned specs. */

	details {
		display: block;
	}
	summary {
		font-weight: bolder;
	}

	.annoying-warning:not(details),
	details.annoying-warning:not([open]) > summary,
	details.annoying-warning[open] {
		background: #fdd;
		color: red;
		font-weight: bold;
		padding: .75em 1em;
		border: thick red;
		border-style: solid;
		border-radius: 1em;
	}
	.annoying-warning :last-child {
		margin-bottom: 0;
	}

@media not print {
	details.annoying-warning[open] {
		position: fixed;
		left: 1em;
		right: 1em;
		bottom: 1em;
		z-index: 1000;
	}
}

	details.annoying-warning:not([open]) > summary {
		text-align: center;
	}

/** Entity Definition Boxes ***************************************************/

	.def {
		padding: .5em 1em;
		background: #DEF;
		margin: 1.2em 0;
		border-left: 0.5em solid #8CCBF2;
	}

/******************************************************************************/
/*                                    Tables                                  */
/******************************************************************************/

	th, td {
		text-align: left;
		text-align: start;
	}

/** Property/Descriptor Definition Tables *************************************/

	table.def {
		/* inherits .def box styling, see above */
		width: 100%;
		border-spacing: 0;
	}

	table.def td,
	table.def th {
		padding: 0.5em;
		vertical-align: baseline;
		border-bottom: 1px solid #bbd7e9;
	}

	table.def > tbody > tr:last-child th,
	table.def > tbody > tr:last-child td {
		border-bottom: 0;
	}

	table.def th {
		font-style: italic;
		font-weight: normal;
		padding-left: 1em;
		width: 3em;
	}

	/* For when values are extra-complex and need formatting for readability */
	table td.pre {
		white-space: pre-wrap;
	}

	/* A footnote at the bottom of a def table */
	table.def           td.footnote {
		padding-top: 0.6em;
	}
	table.def           td.footnote::before {
		content: " ";
		display: block;
		height: 0.6em;
		width: 4em;
		border-top: thin solid;
	}

/** Data tables (and properly marked-up index tables) *************************/
	/*
		 <table class="data"> highlights structural relationships in a table
		 when correct markup is used (e.g. thead/tbody, th vs. td, scope attribute)

		 Use class="complex data" for particularly complicated tables --
		 (This will draw more lines: busier, but clearer.)

		 Use class="long" on table cells with paragraph-like contents
		 (This will adjust text alignment accordingly.)
		 Alternately use class="longlastcol" on tables, to have the last column assume "long".
	*/

	table {
		word-wrap: normal;
		overflow-wrap: normal;
		hyphens: manual;
	}

	table.data,
	table.index {
		margin: 1em auto;
		border-collapse: collapse;
		border: hidden;
		width: 100%;
	}
	table.data caption,
	table.index caption {
		max-width: 50em;
		margin: 0 auto 1em;
	}

	table.data td,  table.data th,
	table.index td, table.index th {
		padding: 0.5em 1em;
		border-width: 1px;
		border-color: silver;
		border-top-style: solid;
	}

	table.data thead td:empty {
		padding: 0;
		border: 0;
	}

	table.data  thead,
	table.index thead,
	table.data  tbody,
	table.index tbody {
		border-bottom: 2px solid;
	}

	table.data colgroup,
	table.index colgroup {
		border-left: 2px solid;
	}

	table.data  tbody th:first-child,
	table.index tbody th:first-child  {
		border-right: 2px solid;
		border-top: 1px solid silver;
		padding-right: 1em;
	}

	table.data th[colspan],
	table.data td[colspan] {
		text-align: center;
	}

	table.complex.data th,
	table.complex.data td {
		border: 1px solid silver;
		text-align: center;
	}

	table.data.longlastcol td:last-child,
	table.data td.long {
	 vertical-align: baseline;
	 text-align: left;
	}

	table.data img {
		vertical-align: middle;
	}


/*
Alternate table alignment rules

	table.data,
	table.index {
		text-align: center;
	}

	table.data  thead th[scope="row"],
	table.index thead th[scope="row"] {
		text-align: right;
	}

	table.data  tbody th:first-child,
	table.index tbody th:first-child  {
		text-align: right;
	}

Possible extra rowspan handling

	table.data  tbody th[rowspan]:not([rowspan='1']),
	table.index tbody th[rowspan]:not([rowspan='1']),
	table.data  tbody td[rowspan]:not([rowspan='1']),
	table.index tbody td[rowspan]:not([rowspan='1']) {
		border-left: 1px solid silver;
	}

	table.data  tbody th[rowspan]:first-child,
	table.index tbody th[rowspan]:first-child,
	table.data  tbody td[rowspan]:first-child,
	table.index tbody td[rowspan]:first-child{
		border-left: 0;
		border-right: 1px solid silver;
	}
*/

/******************************************************************************/
/*                                  Indices                                   */
/******************************************************************************/


/** Table of Contents *********************************************************/

	.toc a {
		/* More spacing; use padding to make it part of the click target. */
		padding-top: 0.1rem;
		/* Larger, more consistently-sized click target */
		display: block;
		/* Reverse color scheme */
		color: black;
		border-color: #3980B5;
		border-bottom-width: 3px !important;
		margin-bottom: 0px !important;
	}
	.toc a:visited {
		border-color: #054572;
	}
	.toc a:not(:focus):not(:hover) {
		/* Allow colors to cascade through from link styling */
		border-bottom-color: transparent;
	}

	.toc, .toc ol, .toc ul, .toc li {
		list-style: none; /* Numbers must be inlined into source */
		/* because generated content isn't search/selectable and markers can't do multilevel yet */
		margin:  0;
		padding: 0;
		line-height: 1.1rem; /* consistent spacing */
	}

	/* ToC not indented until third level, but font style & margins show hierarchy */
	.toc > li             { font-weight: bold;   }
	.toc > li li          { font-weight: normal; }
	.toc > li li li       { font-size:   95%;    }
	.toc > li li li li    { font-size:   90%;    }
	.toc > li li li li .secno { font-size: 85%; }
	.toc > li li li li li { font-size:   85%;    }
	.toc > li li li li li .secno { font-size: 100%; }

	/* @supports not (display:grid) { */
		.toc > li             { margin: 1.5rem 0;    }
		.toc > li li          { margin: 0.3rem 0;    }
		.toc > li li li       { margin-left: 2rem;   }

		/* Section numbers in a column of their own */
		.toc .secno {
			float: left;
			width: 4rem;
			white-space: nowrap;
		}

		.toc li {
			clear: both;
		}

		:not(li) > .toc              { margin-left:  5rem; }
		.toc .secno                  { margin-left: -5rem; }
		.toc > li li li .secno       { margin-left: -7rem; }
		.toc > li li li li .secno    { margin-left: -9rem; }
		.toc > li li li li li .secno { margin-left: -11rem; }

		/* Tighten up indentation in narrow ToCs */
		@media (max-width: 30em) {
			:not(li) > .toc              { margin-left:  4rem; }
			.toc .secno                  { margin-left: -4rem; }
			.toc > li li li              { margin-left:  1rem; }
			.toc > li li li .secno       { margin-left: -5rem; }
			.toc > li li li li .secno    { margin-left: -6rem; }
			.toc > li li li li li .secno { margin-left: -7rem; }
		}
	/* } */

	@supports (display:grid) and (display:contents) {
		/* Use #toc over .toc to override non-@supports rules. */
		#toc {
			display: grid;
			align-content: start;
			grid-template-columns: auto 1fr;
			grid-column-gap: 1rem;
			column-gap: 1rem;
			grid-row-gap: .6rem;
			row-gap: .6rem;
		}
		#toc h2 {
			grid-column: 1 / -1;
			margin-bottom: 0;
		}
		#toc ol,
		#toc li,
		#toc a {
			display: contents;
			/* Switch <a> to subgrid when supported */
		}
		#toc span {
			margin: 0;
		}
		#toc > .toc > li > a > span {
			/* The spans of the top-level list,
			   comprising the first items of each top-level section. */
			margin-top: 1.1rem;
		}
		#toc#toc .secno { /* Ugh, need more specificity to override base.css */
			grid-column: 1;
			width: auto;
			margin-left: 0;
		}
		#toc .content {
			grid-column: 2;
			width: auto;
			margin-right: 1rem;
		}
		#toc .content:hover {
			background: rgba(75%, 75%, 75%, .25);
			border-bottom: 3px solid #054572;
			margin-bottom: -3px;
		}
		#toc li li li .content {
			margin-left: 1rem;
		}
		#toc li li li li .content {
			margin-left: 2rem;
		}
	}


/** Index *********************************************************************/

	/* Index Lists: Layout */
	ul.index       { margin-left: 0; columns: 15em; text-indent: 1em hanging; }
	ul.index li    { margin-left: 0; list-style: none; break-inside: avoid; }
	ul.index li li { margin-left: 1em }
	ul.index dl    { margin-top: 0; }
	ul.index dt    { margin: .2em 0 .2em 20px;}
	ul.index dd    { margin: .2em 0 .2em 40px;}
	/* Index Lists: Typography */
	ul.index ul,
	ul.index dl { font-size: smaller; }
	@media not print {
		ul.index li span {
			white-space: nowrap;
			color: transparent; }
		ul.index li a:hover + span,
		ul.index li a:focus + span {
			color: #707070;
		}
	}

/** Index Tables *****************************************************/
	/* See also the data table styling section, which this effectively subclasses */

	table.index {
		font-size: small;
		border-collapse: collapse;
		border-spacing: 0;
		text-align: left;
		margin: 1em 0;
	}

	table.index td,
	table.index th {
		padding: 0.4em;
	}

	table.index tr:hover td:not([rowspan]),
	table.index tr:hover th:not([rowspan]) {
		background: #f7f8f9;
	}

	/* The link in the first column in the property table (formerly a TD) */
	table.index th:first-child a {
		font-weight: bold;
	}

/******************************************************************************/
/*                                    Print                                   */
/******************************************************************************/

	@media print {
		/* Pages have their own margins. */
		html {
			margin: 0;
		}
		/* Serif for print. */
		body {
			font-family: serif;
		}
	}
	@page {
		margin: 1.5cm 1.1cm;
	}

/******************************************************************************/
/*                                    Legacy                                  */
/******************************************************************************/

	/* This rule is inherited from past style sheets. No idea what it's for. */
	.hide { display: none }



/******************************************************************************/
/*                             Overflow Control                               */
/******************************************************************************/

	.figure .caption, .sidefigure .caption, figcaption {
		/* in case figure is overlarge, limit caption to 50em */
		max-width: 50rem;
		margin-left: auto;
		margin-right: auto;
	}
	.overlarge {
		/* Magic to create good table positioning:
		   "content column" is 50ems wide at max; less on smaller screens.
		   Extra space (after ToC + content) is empty on the right.

		   1. When table < content column, centers table in column.
		   2. When content < table < available, left-aligns.
		   3. When table > available, fills available + scroll bar.
		*/ 
		display: grid;
		grid-template-columns: minmax(0, 50em);
	}
	.overlarge > table {
		/* limit preferred width of table */
		max-width: 50em;
		margin-left: auto;
		margin-right: auto;
	}

	@media (min-width: 55em) {
		.overlarge {
			margin-right: calc(13px + 26.5rem - 50vw);
			max-width: none;
		}
	}
	@media screen and (min-width: 78em) {
		body:not(.toc-inline) .overlarge {
			/* 30.5em body padding 50em content area */
			margin-right: calc(40em - 50vw) !important;
		}
	}
	@media screen and (min-width: 90em) {
		body:not(.toc-inline) .overlarge {
			/* 4em html margin 30.5em body padding 50em content area */
			margin-right: calc(84.5em - 100vw) !important;
		}
	}

	@media not print {
		.overlarge {
			overflow-x: auto;
			/* See Lea Verou's explanation background-attachment:
			 * http://lea.verou.me/2012/04/background-attachment-local/
			 *
			background: top left  / 4em 100% linear-gradient(to right,  #ffffff, rgba(255, 255, 255, 0)) local,
			            top right / 4em 100% linear-gradient(to left, #ffffff, rgba(255, 255, 255, 0)) local,
			            top left  / 1em 100% linear-gradient(to right,  #c3c3c5, rgba(195, 195, 197, 0)) scroll,
			            top right / 1em 100% linear-gradient(to left, #c3c3c5, rgba(195, 195, 197, 0)) scroll,
			            white;
			background-repeat: no-repeat;
			*/
		}
	}
</style>
<style type="text/css">
    table, th, td {
      border: 1px solid black;
      border-collapse: collapse;
      vertical-align: top;
    }
    th, td {
      border-left: none;
      border-right: none;
      padding: 0px 10px;
    }
    th {
      text-align: center;
    }

    del { background: #fcc; color: #000; text-decoration: line-through; }
    ins { background: #cfc; color: #000; }
    blockquote .highlight:not(.idl) { background: initial; margin: initial; padding: 0.5em }
    blockquote ul { background: inherit; }
    blockquote code.highlight:not(.idl) { padding: initial; }
    blockquote c-[a] { color: inherit; } /* Keyword.Declaration */
    blockquote c-[b] { color: inherit; } /* Keyword.Type */
    blockquote c-[c] { color: inherit; } /* Comment */
    blockquote c-[d] { color: inherit; } /* Comment.Multiline */
    blockquote c-[e] { color: inherit; } /* Name.Attribute */
    blockquote c-[f] { color: inherit; } /* Name.Tag */
    blockquote c-[g] { color: inherit; } /* Name.Variable */
    blockquote c-[k] { color: inherit; } /* Keyword */
    blockquote c-[l] { color: inherit; } /* Literal */
    blockquote c-[m] { color: inherit; } /* Literal.Number */
    blockquote c-[n] { color: inherit; } /* Name */
    blockquote c-[o] { color: inherit; } /* Operator */
    blockquote c-[p] { color: inherit; } /* Punctuation */
    blockquote c-[s] { color: inherit; } /* Literal.String */
    blockquote c-[t] { color: inherit; } /* Literal.String.Single */
    blockquote c-[u] { color: inherit; } /* Literal.String.Double */
    blockquote c-[cp] { color: inherit; } /* Comment.Preproc */
    blockquote c-[c1] { color: inherit; } /* Comment.Single */
    blockquote c-[cs] { color: inherit; } /* Comment.Special */
    blockquote c-[kc] { color: inherit; } /* Keyword.Constant */
    blockquote c-[kn] { color: inherit; } /* Keyword.Namespace */
    blockquote c-[kp] { color: inherit; } /* Keyword.Pseudo */
    blockquote c-[kr] { color: inherit; } /* Keyword.Reserved */
    blockquote c-[ld] { color: inherit; } /* Literal.Date */
    blockquote c-[nc] { color: inherit; } /* Name.Class */
    blockquote c-[no] { color: inherit; } /* Name.Constant */
    blockquote c-[nd] { color: inherit; } /* Name.Decorator */
    blockquote c-[ni] { color: inherit; } /* Name.Entity */
    blockquote c-[ne] { color: inherit; } /* Name.Exception */
    blockquote c-[nf] { color: inherit; } /* Name.Function */
    blockquote c-[nl] { color: inherit; } /* Name.Label */
    blockquote c-[nn] { color: inherit; } /* Name.Namespace */
    blockquote c-[py] { color: inherit; } /* Name.Property */
    blockquote c-[ow] { color: inherit; } /* Operator.Word */
    blockquote c-[mb] { color: inherit; } /* Literal.Number.Bin */
    blockquote c-[mf] { color: inherit; } /* Literal.Number.Float */
    blockquote c-[mh] { color: inherit; } /* Literal.Number.Hex */
    blockquote c-[mi] { color: inherit; } /* Literal.Number.Integer */
    blockquote c-[mo] { color: inherit; } /* Literal.Number.Oct */
    blockquote c-[sb] { color: inherit; } /* Literal.String.Backtick */
    blockquote c-[sc] { color: inherit; } /* Literal.String.Char */
    blockquote c-[sd] { color: inherit; } /* Literal.String.Doc */
    blockquote c-[se] { color: inherit; } /* Literal.String.Escape */
    blockquote c-[sh] { color: inherit; } /* Literal.String.Heredoc */
    blockquote c-[si] { color: inherit; } /* Literal.String.Interpol */
    blockquote c-[sx] { color: inherit; } /* Literal.String.Other */
    blockquote c-[sr] { color: inherit; } /* Literal.String.Regex */
    blockquote c-[ss] { color: inherit; } /* Literal.String.Symbol */
    blockquote c-[vc] { color: inherit; } /* Name.Variable.Class */
    blockquote c-[vg] { color: inherit; } /* Name.Variable.Global */
    blockquote c-[vi] { color: inherit; } /* Name.Variable.Instance */
    blockquote c-[il] { color: inherit; } /* Literal.Number.Integer.Long */
  </style>
  <meta content="Bikeshed version ae6def1cbca03b321b2ad730ac2f51eeba21ed81" name="generator">
  <link href="https://isocpp.org/favicon.ico" rel="icon">
  <meta content="3faccb45098288f1a02dc290acea16e83aa17038" name="document-revision">
<style>
pre {
  margin-top: 0px;
  margin-bottom: 0px;
}
.ins, ins, ins *, span.ins, span.ins * {
  background-color: rgb(200, 250, 200);
  color: rgb(0, 136, 0);
  text-decoration: underline;
}
.del, del, del *, span.del, span.del * {
  background-color: rgb(250, 200, 200);
  color: rgb(255, 0, 0);
  text-decoration: line-through;
  text-decoration-color: rgb(255, 0, 0);
}
math, span.math {
  font-family: serif;
  font-style: italic;
}
ul {
  list-style-type: "— ";
}
blockquote {
  counter-reset: paragraph;
}
div.numbered, div.newnumbered {
  margin-left: 2em;
  margin-top: 1em;
  margin-bottom: 1em;
}
div.numbered:before, div.newnumbered:before {
  position: absolute;
  margin-left: -2em;
  display-style: block;
}
div.numbered:before {
  content: counter(paragraph);
  counter-increment: paragraph;
}
div.newnumbered:before {
  content: "�";
}
div.numbered ul, div.newnumbered ul {
  counter-reset: list_item;
}
div.numbered li, div.newnumbered li {
  margin-left: 3em;
}
div.numbered li:before, div.newnumbered li:before {
  position: absolute;
  margin-left: -4.8em;
  display-style: block;
}
div.numbered li:before {
  content: "(" counter(paragraph) "." counter(list_item) ")";
  counter-increment: list_item;
}
div.newnumbered li:before {
  content: "(�." counter(list_item) ")";
  counter-increment: list_item;
}
</style>
<style>/* style-md-lists */

/* This is a weird hack for me not yet following the commonmark spec
   regarding paragraph and lists. */
[data-md] > :first-child {
    margin-top: 0;
}
[data-md] > :last-child {
    margin-bottom: 0;
}</style>
<style>/* style-counters */

body {
    counter-reset: example figure issue;
}
.issue {
    counter-increment: issue;
}
.issue:not(.no-marker)::before {
    content: "Issue " counter(issue);
}

.example {
    counter-increment: example;
}
.example:not(.no-marker)::before {
    content: "Example " counter(example);
}
.invalid.example:not(.no-marker)::before,
.illegal.example:not(.no-marker)::before {
    content: "Invalid Example" counter(example);
}

figcaption {
    counter-increment: figure;
}
figcaption:not(.no-marker)::before {
    content: "Figure " counter(figure) " ";
}</style>
<style>/* style-syntax-highlighting */

.highlight:not(.idl) { background: hsl(24, 20%, 95%); }
code.highlight { padding: .1em; border-radius: .3em; }
pre.highlight, pre > code.highlight { display: block; padding: 1em; margin: .5em 0; overflow: auto; border-radius: 0; }
c-[a] { color: #990055 } /* Keyword.Declaration */
c-[b] { color: #990055 } /* Keyword.Type */
c-[c] { color: #708090 } /* Comment */
c-[d] { color: #708090 } /* Comment.Multiline */
c-[e] { color: #0077aa } /* Name.Attribute */
c-[f] { color: #669900 } /* Name.Tag */
c-[g] { color: #222222 } /* Name.Variable */
c-[k] { color: #990055 } /* Keyword */
c-[l] { color: #000000 } /* Literal */
c-[m] { color: #000000 } /* Literal.Number */
c-[n] { color: #0077aa } /* Name */
c-[o] { color: #999999 } /* Operator */
c-[p] { color: #999999 } /* Punctuation */
c-[s] { color: #a67f59 } /* Literal.String */
c-[t] { color: #a67f59 } /* Literal.String.Single */
c-[u] { color: #a67f59 } /* Literal.String.Double */
c-[cp] { color: #708090 } /* Comment.Preproc */
c-[c1] { color: #708090 } /* Comment.Single */
c-[cs] { color: #708090 } /* Comment.Special */
c-[kc] { color: #990055 } /* Keyword.Constant */
c-[kn] { color: #990055 } /* Keyword.Namespace */
c-[kp] { color: #990055 } /* Keyword.Pseudo */
c-[kr] { color: #990055 } /* Keyword.Reserved */
c-[ld] { color: #000000 } /* Literal.Date */
c-[nc] { color: #0077aa } /* Name.Class */
c-[no] { color: #0077aa } /* Name.Constant */
c-[nd] { color: #0077aa } /* Name.Decorator */
c-[ni] { color: #0077aa } /* Name.Entity */
c-[ne] { color: #0077aa } /* Name.Exception */
c-[nf] { color: #0077aa } /* Name.Function */
c-[nl] { color: #0077aa } /* Name.Label */
c-[nn] { color: #0077aa } /* Name.Namespace */
c-[py] { color: #0077aa } /* Name.Property */
c-[ow] { color: #999999 } /* Operator.Word */
c-[mb] { color: #000000 } /* Literal.Number.Bin */
c-[mf] { color: #000000 } /* Literal.Number.Float */
c-[mh] { color: #000000 } /* Literal.Number.Hex */
c-[mi] { color: #000000 } /* Literal.Number.Integer */
c-[mo] { color: #000000 } /* Literal.Number.Oct */
c-[sb] { color: #a67f59 } /* Literal.String.Backtick */
c-[sc] { color: #a67f59 } /* Literal.String.Char */
c-[sd] { color: #a67f59 } /* Literal.String.Doc */
c-[se] { color: #a67f59 } /* Literal.String.Escape */
c-[sh] { color: #a67f59 } /* Literal.String.Heredoc */
c-[si] { color: #a67f59 } /* Literal.String.Interpol */
c-[sx] { color: #a67f59 } /* Literal.String.Other */
c-[sr] { color: #a67f59 } /* Literal.String.Regex */
c-[ss] { color: #a67f59 } /* Literal.String.Symbol */
c-[vc] { color: #0077aa } /* Name.Variable.Class */
c-[vg] { color: #0077aa } /* Name.Variable.Global */
c-[vi] { color: #0077aa } /* Name.Variable.Instance */
c-[il] { color: #000000 } /* Literal.Number.Integer.Long */
</style>
<style>/* style-selflinks */

.heading, .issue, .note, .example, li, dt {
    position: relative;
}
a.self-link {
    position: absolute;
    top: 0;
    left: calc(-1 * (3.5rem - 26px));
    width: calc(3.5rem - 26px);
    height: 2em;
    text-align: center;
    border: none;
    transition: opacity .2s;
    opacity: .5;
}
a.self-link:hover {
    opacity: 1;
}
.heading > a.self-link {
    font-size: 83%;
}
li > a.self-link {
    left: calc(-1 * (3.5rem - 26px) - 2em);
}
dfn > a.self-link {
    top: auto;
    left: auto;
    opacity: 0;
    width: 1.5em;
    height: 1.5em;
    background: gray;
    color: white;
    font-style: normal;
    transition: opacity .2s, background-color .2s, color .2s;
}
dfn:hover > a.self-link {
    opacity: 1;
}
dfn > a.self-link:hover {
    color: black;
}

a.self-link::before            { content: "¶"; }
.heading > a.self-link::before { content: "§"; }
dfn > a.self-link::before      { content: "#"; }</style>
<style>/* style-autolinks */

.css.css, .property.property, .descriptor.descriptor {
    color: #005a9c;
    font-size: inherit;
    font-family: inherit;
}
.css::before, .property::before, .descriptor::before {
    content: "‘";
}
.css::after, .property::after, .descriptor::after {
    content: "’";
}
.property, .descriptor {
    /* Don't wrap property and descriptor names */
    white-space: nowrap;
}
.type { /* CSS value <type> */
    font-style: italic;
}
pre .property::before, pre .property::after {
    content: "";
}
[data-link-type="property"]::before,
[data-link-type="propdesc"]::before,
[data-link-type="descriptor"]::before,
[data-link-type="value"]::before,
[data-link-type="function"]::before,
[data-link-type="at-rule"]::before,
[data-link-type="selector"]::before,
[data-link-type="maybe"]::before {
    content: "‘";
}
[data-link-type="property"]::after,
[data-link-type="propdesc"]::after,
[data-link-type="descriptor"]::after,
[data-link-type="value"]::after,
[data-link-type="function"]::after,
[data-link-type="at-rule"]::after,
[data-link-type="selector"]::after,
[data-link-type="maybe"]::after {
    content: "’";
}

[data-link-type].production::before,
[data-link-type].production::after,
.prod [data-link-type]::before,
.prod [data-link-type]::after {
    content: "";
}

[data-link-type=element],
[data-link-type=element-attr] {
    font-family: Menlo, Consolas, "DejaVu Sans Mono", monospace;
    font-size: .9em;
}
[data-link-type=element]::before { content: "<" }
[data-link-type=element]::after  { content: ">" }

[data-link-type=biblio] {
    white-space: pre;
}</style>
 <body class="h-entry">
  <div class="head">
   <p data-fill-with="logo"></p>
   <h1 class="p-name no-ref" id="title">P1629R1<br>Transcoding the 🌐 - Standard Text Encoding</h1>
   <h2 class="no-num no-toc no-ref heading settled" id="subtitle"><span class="content">Published Proposal, <time class="dt-updated" datetime="2020-03-02">2020-03-02</time></span></h2>
   <div data-fill-with="spec-metadata">
    <dl>
     <dt>Authors:
     <dd>
      <dd class="editor p-author h-card vcard"><a class="p-name fn u-email email" href="mailto:phdofthehouse@gmail.com">JeanHeyd Meneide</a>
     <dd>
      <dd class="editor p-author h-card vcard"><a class="p-name fn u-email email" href="mailto:shepherd@soasis.org">Shepherd (Shepherd's Oasis)</a>
     <dt>Project:
     <dd>ISO/IEC JTC1/SC22/WG21 14882: Programming Language — C++
     <dt>Audience:
     <dd>?
     <dt>Latest:
     <dd><a href="https://thephd.github.io/vendor/future_cxx/papers/d1629.html">https://thephd.github.io/vendor/future_cxx/papers/d1629.html</a>
    </dl>
   </div>
   <div data-fill-with="warning"></div>
   <hr title="Separator for header">
  </div>
  <div class="p-summary" data-fill-with="abstract">
   <h2 class="no-num no-toc no-ref heading settled" id="abstract"><span class="content">Abstract</span></h2>
   <p>The standard lacks facilities for transcoding text from one form into another, leaving a serious barrier to entry for individuals who want to process text in any sensible manner in the Standard Library. This paper explores and proposes a static interface for encoding that can be used and built upon for the creation of higher-level abstractions.</p>
  </div>
  <nav data-fill-with="table-of-contents" id="toc">
   <h2 class="no-num no-toc no-ref" id="contents">Table of Contents</h2>
   <ol class="toc" role="directory">
    <li>
     <a href="#changelog"><span class="secno">1</span> <span class="content">Revision History</span></a>
     <ol class="toc">
      <li><a href="#changelog-r1"><span class="secno">1.1</span> <span class="content">Revision 1 - March 2<sup>nd</sup>, 2020</span></a>
      <li><a href="#changelog-r0"><span class="secno">1.2</span> <span class="content">Revision 0 - June 17<sup>th</sup>, 2019</span></a>
     </ol>
    <li>
     <a href="#motivation"><span class="secno">2</span> <span class="content">Motivation</span></a>
     <ol class="toc">
      <li>
       <a href="#motivation-basic"><span class="secno">2.1</span> <span class="content">The Basic Ideas</span></a>
       <ol class="toc">
        <li><a href="#motivation-basic-execution"><span class="secno">2.1.1</span> <span class="content">Reading "Execution Encoding" Data</span></a>
        <li><a href="#motivation-basic-networking"><span class="secno">2.1.2</span> <span class="content">Networking with Boost.Beast</span></a>
       </ol>
      <li><a href="#motivation-problems"><span class="secno">2.2</span> <span class="content">Current Problems</span></a>
      <li><a href="#motivation-objectives"><span class="secno">2.3</span> <span class="content">Statement of Objectives</span></a>
     </ol>
    <li>
     <a href="#design"><span class="secno">3</span> <span class="content">Design</span></a>
     <ol class="toc">
      <li><a href="#design-definitions"><span class="secno">3.1</span> <span class="content">Definitions</span></a>
      <li>
       <a href="#design-low-level"><span class="secno">3.2</span> <span class="content">Low-Level</span></a>
       <ol class="toc">
        <li><a href="#design-low-level-error_codes"><span class="secno">3.2.1</span> <span class="content">Error Codes</span></a>
        <li>
         <a href="#design-low-level-results"><span class="secno">3.2.2</span> <span class="content">Result Types</span></a>
         <ol class="toc">
          <li><a href="#design-low-level-results-ranges"><span class="secno">3.2.2.1</span> <span class="content">Input and Output Ranges</span></a>
          <li><a href="#design-low-level-results-error_handler"><span class="secno">3.2.2.2</span> <span class="content">Error Handling: Allow All The Options</span></a>
         </ol>
        <li>
         <a href="#design-low-level-encodings"><span class="secno">3.2.3</span> <span class="content">The Encoding Object</span></a>
         <ol class="toc">
          <li><a href="#design-low-level-encodings-standard"><span class="secno">3.2.3.1</span> <span class="content">Encodings Provided by the Standard</span></a>
          <li><a href="#design-low-level-encodings-variant"><span class="secno">3.2.3.2</span> <span class="content">UTF Encodings: variants?</span></a>
          <li><a href="#design-low-level-encodings-encoding_scheme"><span class="secno">3.2.3.3</span> <span class="content">Encoding Schemes: Byte-Based</span></a>
          <li><a href="#design-low-level-encodings-default"><span class="secno">3.2.3.4</span> <span class="content">Default Encodings</span></a>
         </ol>
        <li><a href="#design-low-level-stateful"><span class="secno">3.2.4</span> <span class="content">Stateful Objects, or Stateful Parameters?</span></a>
       </ol>
      <li>
       <a href="#design-high-level"><span class="secno">3.3</span> <span class="content">High Level</span></a>
       <ol class="toc">
        <li>
         <a href="#design-high-level-free"><span class="secno">3.3.1</span> <span class="content">Eager Free Functions</span></a>
         <ol class="toc">
          <li><a href="#design-high-level-free-decode"><span class="secno">3.3.1.1</span> <span class="content">Free Function <code class="highlight"><c- n>decode</c-></code></span></a>
          <li><a href="#design-high-level-free-encode"><span class="secno">3.3.1.2</span> <span class="content">Free Function <code class="highlight"><c- n>encode</c-></code></span></a>
          <li><a href="#design-high-level-free-transcode"><span class="secno">3.3.1.3</span> <span class="content">Free Function <code class="highlight"><c- n>transcode</c-></code></span></a>
          <li><a href="#design-high-level-free-validate"><span class="secno">3.3.1.4</span> <span class="content">Free Function <code class="highlight"><c- n>validate</c-></code></span></a>
          <li><a href="#design-high-level-free-count"><span class="secno">3.3.1.5</span> <span class="content">Free Functions <code class="highlight"><c- n>decode_count</c-></code> and <code class="highlight"><c- n>encode_count</c-></code></span></a>
         </ol>
        <li><a href="#design-high-level-safety"><span class="secno">3.3.2</span> <span class="content">Safety with the Free Functions</span></a>
        <li>
         <a href="#design-high-level-ranges"><span class="secno">3.3.3</span> <span class="content">Improving Usability for Low-Memory Environments: Ranges</span></a>
         <ol class="toc">
          <li><a href="#design-high-level-ranges-decode"><span class="secno">3.3.3.1</span> <span class="content"><code class="highlight"><c- n>decode_view</c-></code> and <code class="highlight"><c- n>decode_iterator</c-></code></span></a>
          <li><a href="#design-high-level-ranges-encode"><span class="secno">3.3.3.2</span> <span class="content"><code class="highlight"><c- n>encode_view</c-></code> and <code class="highlight"><c- n>encode_iterator</c-></code></span></a>
          <li><a href="#design-high-level-ranges-transcode"><span class="secno">3.3.3.3</span> <span class="content"><code class="highlight"><c- n>transcode_view</c-></code> and <code class="highlight"><c- n>transcode_iterator</c-></code></span></a>
         </ol>
       </ol>
      <li>
       <a href="#design-speed"><span class="secno">3.4</span> <span class="content">The Need for Speed</span></a>
       <ol class="toc">
        <li>
         <a href="#design-speed-customization"><span class="secno">3.4.1</span> <span class="content">Speed and Flexibility for Everyone: Customization Points</span></a>
         <ol class="toc">
          <li><a href="#design-speed-customization-transcode_one"><span class="secno">3.4.1.1</span> <span class="content">One-by-one Transcoding Shortcuts</span></a>
          <li><a href="#design-speed-customization-free_transcoding"><span class="secno">3.4.1.2</span> <span class="content">Customizability: Transcoding Free Functions</span></a>
          <li><a href="#design-speed-customization-free_validation_count"><span class="secno">3.4.1.3</span> <span class="content">Customizability: Validating and Counting Free Functions</span></a>
         </ol>
       </ol>
     </ol>
    <li>
     <a href="#implementation"><span class="secno">4</span> <span class="content">Implementation Experience</span></a>
     <ol class="toc">
      <li><a href="#implementation-previous"><span class="secno">4.1</span> <span class="content">Previous Work</span></a>
      <li><a href="#implementation-visible"><span class="secno">4.2</span> <span class="content">Current Work</span></a>
     </ol>
    <li>
     <a href="#faq"><span class="secno">5</span> <span class="content">FAQ</span></a>
     <ol class="toc">
      <li><a href="#faq-max_code_points"><span class="secno">5.1</span> <span class="content">Question: Why is there a <code class="highlight"><c- n>max_code_points</c-></code> value? Won’t you only ever output a single unicode code point?</span></a>
      <li><a href="#faq-old_unicode"><span class="secno">5.2</span> <span class="content">Question: What about Old Unicode Encodings / Private Use Area Encodings?</span></a>
      <li><a href="#faq-encode_decode_transcoding"><span class="secno">5.3</span> <span class="content">Question: It can be faster to bulk-decode, then bulk-encode instead of one-by-one transcoding. Why not that design?</span></a>
      <li><a href="#faq-normalization"><span class="secno">5.4</span> <span class="content">Question: Where is the specification for <code class="highlight"><c- n>normalization_view</c-><c- o>&lt;</c-><c- n>nfkc</c-><c- o>></c-></code> and <code class="highlight"><c- n>normalize</c-><c- p>(...)</c-></code>?</span></a>
      <li><a href="#faq-text_types"><span class="secno">5.5</span> <span class="content">Question: Where is the specification for <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>basic_text</c-></code> and <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>basic_text_view</c-></code>?</span></a>
     </ol>
    <li><a href="#acknowledgements"><span class="secno">6</span> <span class="content">Acknowledgements</span></a>
    <li>
     <a href="#references"><span class="secno"></span> <span class="content">References</span></a>
     <ol class="toc">
      <li><a href="#informative"><span class="secno"></span> <span class="content">Informative References</span></a>
     </ol>
   </ol>
  </nav>
  <main>
   <blockquote>
    <p>"In these meetings, these conferences, we only see a little. C++ is not done in the light. The majority of C++ is not done publicly. Most C++ is done privately, in the dark, and that is where it matters most."</p>
    <p>– Daniela K. Engert, November 14<sup>th</sup>, 2019</p>
   </blockquote>
   <h2 class="heading settled" data-level="1" id="changelog"><span class="secno">1. </span><span class="content">Revision History</span><a class="self-link" href="#changelog"></a></h2>
   <h3 class="heading settled" data-level="1.1" id="changelog-r1"><span class="secno">1.1. </span><span class="content">Revision 1 - March 2<sup>nd</sup>, 2020</span><a class="self-link" href="#changelog-r1"></a></h3>
   <ul>
    <li data-md>
     <p>Thoroughly improve <a href="#motivation">§ 2 Motivation</a>.</p>
     <ul>
      <li data-md>
       <p>Explicit state goals and non-goals in the <a href="#motivation-objectives">§ 2.3 Statement of Objectives</a>.</p>
     </ul>
    <li data-md>
     <p>Rewrite most of paper to more thoroughly explain the API, especially the <a href="#design-high-level">§ 3.3 High Level</a> section with <code class="highlight"><c- n>validate</c-></code>, <code class="highlight"><c- n>decode_count</c-></code>, <code class="highlight"><c- n>encode_count</c-></code>, and more APIs.</p>
     <ul>
      <li data-md>
       <p>Include drastically improve the explanation for the free functions in <a href="#design-high-level-free">§ 3.3.1 Eager Free Functions</a>.</p>
      <li data-md>
       <p>Emphasize the need for ranges in <a href="#design-high-level-ranges">§ 3.3.3 Improving Usability for Low-Memory Environments: Ranges</a>.</p>
     </ul>
    <li data-md>
     <p>Add new descriptions in the low-level API regarding error handling in <a href="#design-low-level-results-error_handler">§ 3.2.2.2 Error Handling: Allow All The Options</a>.</p>
    <li data-md>
     <p>Describe customization points in full in <a href="#design-speed-customization">§ 3.4.1 Speed and Flexibility for Everyone: Customization Points</a>.</p>
    <li data-md>
     <p>The <a href="#implementation-visible">Implementation</a> is now hidden, after <a href="https://ThePhD.github.io/assets/snd/Crystal%20Carry%20Invisibility.mp3">doing a magic trick</a>. Contact the author for access.</p>
    <li data-md>
     <p>Add <a href="#faq">§ 5 FAQ</a>.</p>
    <li data-md>
     <p>Going no-where, targeted at no-one.</p>
   </ul>
   <h3 class="heading settled" data-level="1.2" id="changelog-r0"><span class="secno">1.2. </span><span class="content">Revision 0 - June 17<sup>th</sup>, 2019</span><a class="self-link" href="#changelog-r0"></a></h3>
   <ul>
    <li data-md>
     <p>Initial release of exploratory paper.</p>
   </ul>
   <h2 class="heading settled" data-level="2" id="motivation"><span class="secno">2. </span><span class="content">Motivation</span><a class="self-link" href="#motivation"></a></h2>
   <p>It’s 2020 and Unicode is still barely supported in both the C and C++ standards.</p>
   <p>From the POSIX standard requiring a single-byte encoding by default, heavy limitations placed in <code class="highlight"><c- n>codecvt</c-></code> facets in C and C++, and the utter lack of UTF8/16/32 multi-unit conversion functions by the standard, the programming languages that have shaped the face of development in operating systems, embedded devices and mobile applications has pushed forward a world that is incredibly unfriendly to a world of text beyond ASCII English. Developers frequently roll their own solutions, and almost every major codebase -- from Chrome to Firefox, Qt to Copperspice, and more -- all have their own variations of hand-crafted text processing. With no standard implementation in C++ and libraries split between various third party implementations plus ICU, it is increasingly difficult and error-prone to handle what is the basic means of communication between people on the planet using C++.</p>
   <p>This paper aims to explore the design space for both extremely high performing transcoding (encoding and decoding) as well as a flexible one-by-one interface for more careful and meticulous text processing. This proposal arises from industry experience in large codebases and best-practice open source explorations with <a data-link-type="biblio" href="#biblio-libogonek">[libogonek]</a>, <a data-link-type="biblio" href="#biblio-icu">[icu]</a>, <a data-link-type="biblio" href="#biblio-boosttext">[boost.text]</a> and <a data-link-type="biblio" href="#biblio-text_view">[text_view]</a> while also building on the concepts and design choices found in both <a data-link-type="biblio" href="#biblio-range-v3">[range-v3]</a> and pre-existing text encoding solutions such as Windows’s <code class="highlight"><c- n>WideCharToMultiByte</c-></code> interfaces, *nix utility iconv, and more.</p>
   <p>The ultimate goal is to allow an interface that is correct by default but capable of being fast both by Standard Library implementer efforts but also program overridable ADL free functions. It will produce interfaces for encoding, decoding, and transcoding in eager and lazy forms.</p>
   <h3 class="heading settled" data-level="2.1" id="motivation-basic"><span class="secno">2.1. </span><span class="content">The Basic Ideas</span><a class="self-link" href="#motivation-basic"></a></h3>
   <p>While some of these types aren’t contained in this paper, the end goal is to enable the following to be possible:</p>
<pre class="language-cpp highlight"><c- cp>#include</c-> &lt;encoding> // this proposal
<c- cp>#include</c-> &lt;text>     // future proposal

<c- b>int</c-> <c- nf>main</c-> <c- p>(</c-><c- b>int</c-><c- p>,</c-> <c- b>char</c-><c- o>*</c-><c- p>[])</c-> <c- p>{</c->
	<c- k>using</c-> <c- k>namespace</c-> <c- n>std</c-><c- o>::</c-><c- n>literals</c-><c- p>;</c->
	<c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>u8text</c-> <c- n>my_text</c->
		<c- o>=</c-> <c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>transcode</c-><c- p>(</c->“안녕하세요 👋”<c- n>sv</c-><c- p>,</c-> <c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>utf8</c-><c- p>{});</c->
	<c- n>std</c-><c- o>::</c-><c- n>cout</c-> <c- o>&lt;&lt;</c-> <c- n>my_text</c-> <c- o>&lt;&lt;</c-> <c- n>std</c-><c- o>::</c-><c- n>endl</c-><c- p>;</c-> <c- c1>// prints 안녕하세요 👋 to a capable console</c->
	<c- n>std</c-><c- o>::</c-><c- n>cout</c-> <c- o>&lt;&lt;</c-> <c- n>std</c-><c- o>::</c-><c- n>hex</c-><c- p>;</c->
	<c- k>for</c-> <c- p>(</c-><c- k>const</c-> <c- k>auto</c-><c- o>&amp;</c-> <c- nl>cp</c-> <c- p>:</c-> <c- n>my_text</c-><c- p>)</c-> <c- p>{</c->
		<c- n>std</c-><c- o>::</c-><c- n>cout</c-> <c- o>&lt;&lt;</c-> <c- k>static_cast</c-><c- o>&lt;</c-><c- b>uint32_t</c-><c- o>></c-><c- p>(</c-><c- n>cp</c-><c- p>)</c-> <c- o>&lt;&lt;</c-> “ “<c- p>;</c->
	<c- p>}</c->
	<c- c1>// 0000c548 0000b155 0000d558 0000c138 0000c694 00000020 0001f44b</c->
	<c- k>return</c-> <c- mi>0</c-><c- p>;</c->
<c- p>}</c->
</pre>
   <p>This paper is in support of reaching this goal. The following examples are more concretely tied to this proposal in particular.</p>
   <h4 class="heading settled" data-level="2.1.1" id="motivation-basic-execution"><span class="secno">2.1.1. </span><span class="content">Reading "Execution Encoding" Data</span><a class="self-link" href="#motivation-basic-execution"></a></h4>
   <p>The following is an example of opening a file handle on Windows after converting from the execution encoding of the system <code class="highlight"><c- n>argv</c-></code> to the wide arguments for <code class="highlight"><c- n>CreateFileW</c-></code>.</p>
<pre class="language-cpp highlight"><c- cp>#define WINDOWS_LEAN_AND_MEAN 1</c->
<c- cp>#include</c-> &lt;windows.h>

<c- cp>#include</c-> &lt;encoding> // this proposal
<c- cp>#include</c-> &lt;iostream>

<c- b>int</c-> <c- nf>main</c-> <c- p>(</c-><c- b>int</c-> <c- n>argc</c-><c- p>,</c-> <c- b>char</c-><c- o>*</c-> <c- n>argv</c-><c- p>[])</c-> <c- p>{</c->

	<c- k>if</c-> <c- p>(</c-><c- n>argc</c-> <c- o>&lt;</c-> <c- mi>2</c-><c- p>)</c-> <c- p>{</c->
		<c- n>std</c-><c- o>::</c-><c- n>cerr</c-> <c- o>&lt;&lt;</c-> <c- s>"Path unspecified: exiting."</c-> <c- o>&lt;&lt;</c-> <c- n>std</c-><c- o>::</c-><c- n>endl</c-><c- p>;</c->
		<c- k>return</c-> <c- o>-</c-><c- mi>1</c-><c- p>;</c->
	<c- p>}</c->

	<c- n>std</c-><c- o>::</c-><c- n>wstring</c-> <c- n>path_as_wstr</c-> <c- o>=</c-> <c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>transcode</c-><c- p>(</c->
		<c- n>std</c-><c- o>::</c-><c- n>string_view</c-><c- p>(</c-><c- n>argv</c-><c- p>[</c-><c- mi>1</c-><c- p>]),</c-> <c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>wide_execution</c-><c- p>{});</c->
	
	<c- c1>// Interop with Windows</c->
	<c- n>std</c-><c- o>::</c-><c- n>unique_ptr</c-><c- o>&lt;</c-><c- n>HANDLE</c-><c- p>,</c-> <c- n>FileHandleDeleter</c-><c- o>></c-> <c- n>target_file</c-> <c- o>=</c->
		<c- n>CreateFileW</c-><c- p>(</c-><c- n>path_as_wstr</c-><c- p>.</c-><c- n>data</c-><c- p>(),</c-> <c- n>GENERIC_WRITE</c-><c- p>,</c->
			<c- mi>0</c-><c- p>,</c-> NULL<c- p>,</c-> <c- n>CREATE_ALWAYS</c-><c- p>,</c->
			<c- n>FILE_ATTRIBUTE_NORMAL</c-><c- p>);</c->
	
	<c- k>if</c-> <c- p>(</c-><c- o>!</c-><c- n>target_file</c-><c- p>)</c-> <c- p>{</c->
		<c- c1>// GetLastError(), etc...</c->
		<c- k>return</c-> <c- o>-</c-><c- mi>2</c-><c- p>;</c->
	<c- p>}</c->

	<c- d>/* Use File... */</c->

	<c- k>return</c-> <c- mi>0</c-><c- p>;</c->
<c- p>}</c->
</pre>
   <p>This paper directly enables such a use case.</p>
   <h4 class="heading settled" data-level="2.1.2" id="motivation-basic-networking"><span class="secno">2.1.2. </span><span class="content">Networking with Boost.Beast</span><a class="self-link" href="#motivation-basic-networking"></a></h4>
   <p>The following is an example using this proposal to do a byte-based read off the network of a UTF-16 Big Endian payload in any machine.</p>
<pre class="language-cpp highlight"><c- cp>#include</c-> &lt;boost/beast.hpp>
<c- cp>#include</c-> &lt;boost/beast/http.hpp>
<c- cp>#include</c-> &lt;boost/asio/ip/tcp.hpp>

<c- cp>#include</c-> &lt;iostream>
<c- cp>#include</c-> &lt;encoding> // this proposal

<c- k>namespace</c-> <c- n>beast</c-> <c- o>=</c-> <c- n>boost</c-><c- o>::</c-><c- n>beast</c-><c- p>;</c->
<c- k>namespace</c-> <c- n>http</c-> <c- o>=</c-> <c- n>beast</c-><c- o>::</c-><c- n>http</c-><c- p>;</c->
<c- k>using</c-> <c- n>tcp</c-> <c- o>=</c-> <c- n>boost</c-><c- o>::</c-><c- n>asio</c-><c- o>::</c-><c- n>ip</c-><c- o>::</c-><c- n>tcp</c-><c- p>;</c->
<c- k>using</c-> <c- n>results_type</c-> <c- o>=</c-> <c- n>tcp</c-><c- o>::</c-><c- n>resolver</c-><c- o>::</c-><c- n>results_type</c-><c- p>;</c->

<c- k>class</c-> <c- nc>session</c-> <c- o>:</c-> <c- k>public</c-> <c- n>std</c-><c- o>::</c-><c- n>enable_shared_from_this</c-><c- o>&lt;</c-><c- n>session</c-><c- o>></c-> <c- p>{</c->
	<c- d>/* ... */</c->
	<c- n>http</c-><c- o>::</c-><c- n>request</c-><c- o>&lt;</c-><c- n>http</c-><c- o>::</c-><c- n>empty_body</c-><c- o>></c-> <c- n>req_</c-><c- p>;</c->
	<c- n>std</c-><c- o>::</c-><c- n>vector</c-><c- o>&lt;</c-><c- n>std</c-><c- o>::</c-><c- n>byte</c-><c- o>></c-> <c- n>res_body_</c-><c- p>;</c->
	<c- n>http</c-><c- o>::</c-><c- n>response</c-><c- o>&lt;</c-><c- n>http</c-><c- o>::</c-><c- n>vector_body</c-><c- o>&lt;</c-><c- n>std</c-><c- o>::</c-><c- nl>byte</c-><c- p>:</c-><c- o>></c-> <c- n>res_</c-><c- p>;</c->
	<c- n>std</c-><c- o>::</c-><c- n>u8string</c-> <c- n>converted_body_</c-><c- p>;</c->
	
	<c- d>/* ... */</c->
	
	<c- b>void</c-> <c- nf>on_connect</c-><c- p>(</c-><c- n>beast</c-><c- o>::</c-><c- n>error_codeec</c-><c- p>,</c-> <c- n>results_type</c-><c- o>::</c-><c- n>endpoint_type</c-><c- p>);</c->
	<c- b>void</c-> <c- nf>on_resolve</c-><c- p>(</c-><c- n>beast</c-><c- o>::</c-><c- n>error_code</c-> <c- n>ec</c-><c- p>,</c-> <c- n>results_type</c-> <c- n>results</c-><c- p>);</c->

	<c- d>/* ... */</c->

	<c- b>void</c-> <c- nf>on_read</c-><c- p>(</c-><c- n>beast</c-><c- o>::</c-><c- n>error_code</c-> <c- n>ec</c-><c- p>,</c-> <c- n>std</c-><c- o>::</c-><c- b>size_t</c-> <c- n>bytes_transferred</c-><c- p>)</c-> <c- p>{</c->
		<c- k>if</c-> <c- p>(</c-><c- n>ec</c-><c- p>)</c-> <c- p>{</c->
			<c- n>log_fail</c-><c- p>(</c-><c- n>ec</c-><c- p>,</c-> u8<c- s>"read failed"</c-><c- p>);</c->
			<c- k>return</c-><c- p>;</c->
		<c- p>}</c->

		<c- n>std</c-><c- o>::</c-><c- n>span</c-><c- o>&lt;</c-><c- n>std</c-><c- o>::</c-><c- n>byte</c-><c- o>></c-> <c- n>bytes</c-><c- p>(</c-><c- n>res_body_</c-><c- p>.</c-><c- n>data</c-><c- p>(),</c-> <c- n>bytes_transferred</c-><c- p>);</c->
		<c- n>std</c-><c- o>::</c-><c- n>ranges</c-><c- o>::</c-><c- n>unbounded_view</c-> <c- n>output</c-><c- p>(</c-><c- n>std</c-><c- o>::</c-><c- n>back_inserter</c-><c- p>(</c-><c- n>converted_body_</c-><c- p>));</c->

		<c- c1>// utf16, but big endian</c->
		<c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>encoding_scheme</c-><c- o>&lt;</c->
			<c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>utf16</c-><c- p>,</c->
			<c- n>std</c-><c- o>::</c-><c- n>endian</c-><c- o>::</c-><c- n>big</c->
		<c- o>></c-> <c- n>from_encoding</c-><c- p>{};</c->

		<c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>utf8</c-> <c- n>to_encoding</c-><c- p>{};</c->
		
		<c- c1>// transcode from bytes that are UTF16, Big Endian,</c->
		<c- c1>// into unbounded output</c->
		<c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>transcode</c-><c- p>(</c-><c- n>bytes</c-><c- p>,</c-> <c- n>output</c-><c- p>,</c-> <c- n>from_encoding</c-><c- p>,</c-> <c- n>to_encoding</c-><c- p>);</c->
		<c- n>std</c-><c- o>::</c-><c- n>clog</c-> <c- o>&lt;&lt;</c-> <c- n>converted_body_</c-> <c- o>&lt;&lt;</c-> <c- n>std</c-><c- o>::</c-><c- n>endl</c-><c- p>;</c->
		
		<c- d>/* Commit / clean up, etc. */</c->
	<c- p>}</c->
<c- p>};</c->
</pre>
   <p>This paper directly enables such a use case.</p>
   <h3 class="heading settled" data-level="2.2" id="motivation-problems"><span class="secno">2.2. </span><span class="content">Current Problems</span><a class="self-link" href="#motivation-problems"></a></h3>
   <blockquote>
    <p>I don’t write any software which runs only in English. I’m tired of writing the same code different ways all the time just to display a handful of strings. Lately, I just skip C++ for anything that displays UI -- it’s so much easier in every other modern language.</p>
   </blockquote>
   <blockquote>
    <p>This is REQUIRED for using C++ with any software which needs to run in multiple languages, without rolling your own code. I’m tired of writing this from scratch for every separate project (cannot share code for most of them), using different underlying libraries for each (as licensing and processing requirements vary, I can’t just pick one library and use it everywhere). Unfortunately, I have no confidence the ISO committee understands the problem well enough, given how it patted itself on the back so much for adding u8"", u"", and U"" a while back. Real-world software which runs in multiple languages never hard-codes strings...</p>
   </blockquote>
   <blockquote>
    <p>Norway has its own character set which is a variant of ISO-8859-10 with modifications to a couple of characters. This proposal would ease the transition for existing software when C++ gets (better/more coherent) support for Unicode.</p>
   </blockquote>
   <blockquote>
    <p>The standard : "Oh yeah hey dudes <code class="highlight"><c- n>codecvt</c-></code> is deprecated but we didn’t feel like writing an alternative so good luck yolo".</p>
   </blockquote>
   <blockquote>
    <p>– <a href="https://herbsutter.com/2019/07/25/survey-results-your-top-five-iso-c-feature-proposals/">Herb Sutter’s "Top 5 C++ Proposals" Survey, Survey Respondent</a></p>
   </blockquote>
   <p>Text in the Standard is a desert wasteland.</p>
   <p>After pulling <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>wstring_convert</c-></code> from the language (for a very good reason, yes), users were left with no proper utilities to convert Unicode to Unicode, or convert execution / wide execution text to Unicode and back. People reach out for ICU, but the API -- while extremely fast -- is opaque and not the friendliest to use. <code class="highlight"><c- n>iconv</c-></code> is not easy to build everywhere, and applications ages ago have shipped all manner of ad-hoc solutions (or not) to the text problem without working together or sharing their libraries with the whole ecosystem. As text -- and particularly, the encoding of text -- stands as one of the greatest barriers to Systems Programming languages being more diverse and friendly, there is a strong obligation to provide a standard solution that is capable of lasting the next 40 years unmodified.</p>
   <p>The use cases for text encoding are vast. From: basic processing of user-entered data; sanitization of scripts; domain name protection in browsers; text conversions when working with legacy systems or differing new/Unicode systems; supplying the components that can be successfully used with industry-standard FreeType/Harfbuzz and DirectWrite; talking properly to legacy GDI applications; communicating string data in JSON; receiving market data from the Chinese Exchange in GB18030; converting and preserving government data in digital records; handling data generated by logs in a multitude of languages; handling user names without mangling; and hundreds of dozens of other use cases, the need for text practically writes itself.</p>
   <h3 class="heading settled" data-level="2.3" id="motivation-objectives"><span class="secno">2.3. </span><span class="content">Statement of Objectives</span><a class="self-link" href="#motivation-objectives"></a></h3>
   <p>Part of this proposal is identifying exactly how those needs should be served. The primary objectives of this proposal, therefore, is as follows:</p>
   <ul>
    <li data-md>
     <p>Users should be able to define their own encodings for their own encodings. Jonathan Wakely’s time is not worth EBCDIC, but IBM will certainly be very invested in making sure EBCDIC and its code pages is well-implemented and optimized. Put another way: company-specific and user-specific problems should be specific to them and not exported to the whole ecosystem, and they should be able t handle their problems effectively and efficiently without throwing the C++ Standard in the trash.</p>
    <li data-md>
     <p>Locale-based <code class="highlight"><c- b>char</c-></code> and <code class="highlight"><c- b>wchar_t</c-></code> encodings belong to the C and C++ implementation. If users need to guess about the locale’s encoding and (probably extremely wrongly) pick something rather than using this API, then the API is a failure.</p>
    <li data-md>
     <p>The standard library should be able to cannibalize all existing legacy encodings and -- by way of leading design -- encourage and promote the use of Unicode in the user’s code. <span style="color: white">Embrace. Extend. Extinguish.</span></p>
    <li data-md>
     <p>The standard library (and its implementers) do not have time to implement every new, old, and existing encoding. Put bluntly: CJ Johnson’s brilliance and Stephan T. Lavavej’s passion is better spent improving their respective libraries and fixing bugs, not implementing EBCDIC or ISO/IEC 2022 CN, extended variant 2.</p>
    <li data-md>
     <p>Unicode is the one and only language the standard speaks in its higher level text algorithms and functionality: legacy encodings must convert to Unicode to work with functionality built beyond this proposal. Future proposals will never need to concern themselves with encodings after this proposal is done.</p>
    <li data-md>
     <p>Users may choose not to convert to Unicode, but they will need to spend the time and effort working out that trade off with their environment. The standard library will never have to care about text that willingly and deliberately exits the Unicode system.</p>
    <li data-md>
     <p>Safety is not optional. Code that performs unsafe operations should require explicit opt-in and easily searchable patterns and names that make it clear the user has made a deliberate choice to open themselves up to vulnerabilities such as Undefined Behavior.</p>
    <li data-md>
     <p>Performance is not optional, and <a href="https://fasterthanli.me/blog/2020/i-want-off-mr-golangs-wild-ride/">correctness isn’t a tender suggestion achievable with insane workarounds</a>.</p>
    <li data-md>
     <p>Simple function calls should be simple, but if the user wants to pry open the details they should be able to do so incrementally with ease.</p>
    <li data-md>
     <p>Nobody has time to reimplement all of iconv, especially the library developers. The interface should allow implementers to substitute a backend for certain encodings that takes advantage of pre-existing Operating System, Widely-Available Library, or similar functionality.</p>
    <li data-md>
     <p>Users should be able to do everything implementers can without undue clash between user functionality and implementer internal handling and extensions.</p>
    <li data-md>
     <p>Octets -- delivered over the network, from IPC, or similar -- are an important input case that must be handled.</p>
    <li data-md>
     <p>The design must be viable for low-memory environments, and prioritize zero allocation if a user cares enough to invest the time into the API with that goal.</p>
    <li data-md>
     <p>At no point should we be introducing new container types for this functionality. Container wrappers / adaptors and range wrapper / adaptors are enough.</p>
   </ul>
   <h2 class="heading settled" data-level="3" id="design"><span class="secno">3. </span><span class="content">Design</span><a class="self-link" href="#design"></a></h2>
   <p>The current design has been the culmination of a few years of collaborative and independent research, starting with the earliest papers from Mark Boyall’s <a data-link-type="biblio" href="#biblio-n3574">[n3574]</a>, Tom Honermann’s <a data-link-type="biblio" href="#biblio-p0244r2">[p0244r2]</a>, study of ICU’s interface, and finally the musings, experience and work of R. Martinho Fernandes in <a data-link-type="biblio" href="#biblio-libogonek">[libogonek]</a>. Current and future optimizations are considered to ensure that fast paths are not blocked in the interface proposed for standardization. With <a data-link-type="biblio" href="#biblio-boosttext">[boost.text]</a> showing an interface with a nailed down internally used UTF-8 encoding, Markus Sherer’s participation in SG16 meetings, Henri Sivonen’s feedback on blog posts and mailing lists, and Bob Steagall’s <a data-link-type="biblio" href="#biblio-fast-utf8">work in writing a fast UTF8 decoder</a> this paper absorbs a wealth of knowledge to get reach a flexible interface that enables high-throughput.</p>
   <p>In reading, implementing, working with and consuming all of these designs, the author of this paper, independent implementers, and several SG16 members have come to the following core tenants:</p>
   <ul>
    <li data-md>
     <p>strong types for code units allow selecting proper default encodings for these interfaces;</p>
    <li data-md>
     <p>iterators and ranges are a huge interface win for working with text but are impossible to provide the fastest possible way to encode/decode/transcode text;</p>
    <li data-md>
     <p>and, avoid creating new vocabulary: improve working with original containers and imposing well-formedness constraints upon them rather than designing new containers from the ground up.</p>
   </ul>
   <p>Given these tenants, the following interface choices have arisen for this paper. Each section will describe a piece of the interface, its goals, and how it works. A low-level encoding interface and its plumbing and core types will be described first, followed by a high level interface that makes the low level easy to use. Both are imperative to cover the full design space that exists together, and the use cases today.</p>
   <h3 class="heading settled" data-level="3.1" id="design-definitions"><span class="secno">3.1. </span><span class="content">Definitions</span><a class="self-link" href="#design-definitions"></a></h3>
   <p>Some handy definitions here which will be used liberally applied to template parameters and other things to shorten the specification.</p>
   <ul>
    <li data-md>
     <p>Unicode Code Point: the 21-bit value (often represented as a 32-bit number for implementation-related reasons) that represents a code point from the Unicode Standard. Specifically, it is the range of integers 0 to 0x10FFFF inclusive.</p>
    <li data-md>
     <p>Unicode Scalar Value: the 21-bit value that represents a code point from the Unicode Standard, but without Surrogate Unicode Code Point values. Specifically, it is the ranges of integers 0 to 0xD7FF and 0xE000 to 0x10FFFF inclusive.</p>
    <li data-md>
     <p><code class="highlight"><c- n>unicode_code_point</c-></code>: a type in C++ that represent at Unicode Code Point. Alias of <code class="highlight"><c- b>char32_t</c-></code>.</p>
    <li data-md>
     <p><code class="highlight"><c- n>unicode_scalar_value</c-></code>: a type for C++ that represents. Strong typedefs that supports all the same operations as <code class="highlight"><c- b>char32_t</c-></code>.</p>
    <li data-md>
     <p><code class="highlight"><c- k>using</c-> <c- n>UEncoding</c-> <c- o>=</c-> <c- n>std</c-><c- o>::</c-><c- n>remove_cvref_t</c-><c- o>&lt;</c-><c- n>Encoding</c-><c- o>></c-></code> given the existence of a template parameter <code class="highlight"><c- n>Encoding</c-></code>.</p>
    <li data-md>
     <p><code class="highlight"><c- k>using</c-> <c- n>UToEncoding</c-> <c- o>=</c-> <c- n>std</c-><c- o>::</c-><c- n>remove_cvref_t</c-><c- o>&lt;</c-><c- n>FromEncoding</c-><c- o>></c-></code> given the existence of a template parameter <code class="highlight"><c- n>FromEncoding</c-></code>.</p>
    <li data-md>
     <p><code class="highlight"><c- k>using</c-> <c- n>UFromEncoding</c-> <c- o>=</c-> <c- n>std</c-><c- o>::</c-><c- n>remove_cvref_t</c-><c- o>&lt;</c-><c- n>ToEncoding</c-><c- o>></c-></code> given the existence of a template parameter <code class="highlight"><c- n>ToEncoding</c-></code>.</p>
    <li data-md>
     <p><code class="highlight"><c- k>template</c-> <c- o>&lt;</c-><c- k>typename</c-> <c- n>T</c-><c- o>></c-> <c- k>using</c-> <c- n>encoding_state_t</c-> <c- o>=</c-> <c- k>typename</c-> <c- n>std</c-><c- o>::</c-><c- n>remove_cvref_t</c-><c- o>&lt;</c-><c- n>T</c-><c- o>>::</c-><c- n>state</c-><c- p>;</c-></code>.</p>
    <li data-md>
     <p><code class="highlight"><c- k>template</c-> <c- o>&lt;</c-><c- k>typename</c-> <c- n>T</c-><c- o>></c-> <c- k>using</c-> <c- n>encoding_code_unit_t</c-> <c- o>=</c-> <c- k>typename</c-> <c- n>std</c-><c- o>::</c-><c- n>remove_cvref_t</c-><c- o>&lt;</c-><c- n>T</c-><c- o>>::</c-><c- n>code_unit</c-><c- p>;</c-></code>.</p>
    <li data-md>
     <p><code class="highlight"><c- k>template</c-> <c- o>&lt;</c-><c- k>typename</c-> <c- n>T</c-><c- o>></c-> <c- k>using</c-> <c- n>encoding_code_point_t</c-> <c- o>=</c-> <c- k>typename</c-> <c- n>std</c-><c- o>::</c-><c- n>remove_cvref_t</c-><c- o>&lt;</c-><c- n>T</c-><c- o>>::</c-><c- n>code_point</c-><c- p>;</c-></code>: this is the <code class="highlight"><c- n>code_point</c-></code> type definition for a given type <code class="highlight"><c- n>T</c-></code>, ignoring cv-qualifiers.</p>
    <li data-md>
     <p><code class="highlight"><c- n>is_self</c-><c- o>-</c-><c- n>state_encoding_v</c-><c- o>&lt;</c-><c- n>T</c-><c- o>></c-></code>: a boolean trait that tells whether or not an encoding uses itself as the state type, rather than a separate state type.</p>
   </ul>
<pre class="language-cpp highlight"><c- k>template</c-> <c- o>&lt;</c-><c- k>typename</c-> <c- n>T</c-><c- o>></c->
<c- kr>inline</c-> <c- k>constexpr</c-> <c- b>bool</c-> <c- n>is_self_state_encoding_v</c->
	<c- o>=</c-> <c- n>std</c-><c- o>::</c-><c- n>is_same_v</c-><c- o>&lt;</c-><c- n>std</c-><c- o>::</c-><c- n>remove_cvref_t</c-><c- o>&lt;</c-><c- n>T</c-><c- o>></c-><c- p>,</c-> <c- n>encoding_state_t</c-><c- o>&lt;</c-><c- n>T</c-><c- o>>></c-><c- p>;</c->
</pre>
   <ul>
    <li data-md>
     <p><code class="highlight"><c- n>range_of</c-><c- o>&lt;</c-><c- n>T</c-><c- o>></c-></code>: is a concept defining that there is a range whose iterator produces a <code class="highlight"><c- n>value_type</c-></code> of <code class="highlight"><c- n>T</c-></code>. For example, <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>vector</c-><c- o>&lt;</c-><c- b>int</c-><c- o>></c-></code> and <code class="highlight"><c- b>int</c-><c- p>[</c-><c- mi>1</c-><c- p>]</c-></code> model concept-constrained parameter or return type of <code class="highlight"><c- k>const</c-> <c- n>range_of</c-><c- o>&lt;</c-><c- b>int</c-><c- o>></c-> <c- k>auto</c-><c- o>&amp;</c-></code>.</p>
   </ul>
<pre class="language-cpp highlight"><c- k>template</c-> <c- o>&lt;</c-><c- k>typename</c-> <c- n>R</c-><c- p>,</c-> <c- k>typename</c-> <c- n>T</c-><c- o>></c->
<c- n>concept</c-> <c- n>range_of</c-> <c- o>=</c-> <c- n>std</c-><c- o>::</c-><c- n>ranges</c-><c- o>::</c-><c- n>range</c-><c- o>&lt;</c-><c- n>std</c-><c- o>::</c-><c- n>remove_cvref_t</c-><c- o>&lt;</c-><c- n>R</c-><c- o>>></c-> <c- o>&amp;&amp;</c->
	<c- n>std</c-><c- o>::</c-><c- n>is_same_v</c-><c- o>&lt;</c-><c- n>std</c-><c- o>::</c-><c- n>ranges</c-><c- o>::</c-><c- n>range_value_t</c-><c- o>&lt;</c-><c- n>std</c-><c- o>::</c-><c- n>remove_cvref_t</c-><c- o>&lt;</c-><c- n>R</c-><c- o>>></c-><c- p>,</c-> <c- n>T</c-><c- o>></c-><c- p>;</c->
</pre>
   <ul>
    <li data-md>
     <p><code class="highlight"><c- n>contiguous_range_of</c-><c- o>&lt;</c-><c- n>T</c-><c- o>></c-></code>: is a concept defining that there is a range whose iterator produces a <code class="highlight"><c- n>value_type</c-></code> of <code class="highlight"><c- n>T</c-></code>. For example, <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>span</c-><c- o>&lt;</c-><c- b>double</c-><c- o>></c-></code> and <code class="highlight"><c- b>double</c-><c- p>[</c-><c- mi>1</c-><c- p>]</c-></code> model concept-constrained parameter or return type of <code class="highlight"><c- k>const</c-> <c- n>contiguous_range_of</c-><c- o>&lt;</c-><c- b>double</c-><c- o>></c-> <c- k>auto</c-><c- o>&amp;</c-></code>.</p>
   </ul>
<pre class="language-cpp highlight"><c- k>template</c-> <c- o>&lt;</c-><c- k>typename</c-> <c- n>R</c-><c- p>,</c-> <c- k>typename</c-> <c- n>T</c-><c- o>></c->
<c- n>concept</c-> <c- n>contiguous_range_of</c-> <c- o>=</c-> <c- n>std</c-><c- o>::</c-><c- n>ranges</c-><c- o>::</c-><c- n>contiguous_range</c-><c- o>&lt;</c-><c- n>std</c-><c- o>::</c-><c- n>remove_cvref_t</c-><c- o>&lt;</c-><c- n>R</c-><c- o>>></c-> <c- o>&amp;&amp;</c->
	<c- n>std</c-><c- o>::</c-><c- n>is_same_v</c-><c- o>&lt;</c-><c- n>std</c-><c- o>::</c-><c- n>ranges</c-><c- o>::</c-><c- n>range_value_t</c-><c- o>&lt;</c-><c- n>std</c-><c- o>::</c-><c- n>remove_cvref_t</c-><c- o>&lt;</c-><c- n>R</c-><c- o>>></c-><c- p>,</c-> <c- n>T</c-><c- o>></c-><c- p>;</c->
</pre>
   <h3 class="heading settled" data-level="3.2" id="design-low-level"><span class="secno">3.2. </span><span class="content">Low-Level</span><a class="self-link" href="#design-low-level"></a></h3>
   <p>The high-level interfaces must be built on <em>something</em>: it cannot be magically willed into existence. There is quite a bit of plumbing that goes into the low-level interfaces, most of which will be boilerplate to users but will serve keen use and importance to several library developers and standard library implementers.</p>
   <h4 class="heading settled" data-level="3.2.1" id="design-low-level-error_codes"><span class="secno">3.2.1. </span><span class="content">Error Codes</span><a class="self-link" href="#design-low-level-error_codes"></a></h4>
   <p>There is some boilerplate that needs to be taken care of before building our encoding, decoding, transcoding and similar functionality begins. First and foremost is the error codes and result types that will go in and out of our encoding functions. The error code enumeration is <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>encoding_errc</c-></code>. It lists all the reasons an encoding or decoding operation can fail:</p>
<pre class="language-cpp highlight"><c- k>namespace</c-> <c- n>std</c-> <c- p>{</c-> <c- k>namespace</c-> <c- n>text</c-> <c- p>{</c->

	<c- k>enum</c-> <c- k>class</c-> <c- nc>encoding_errc</c-> <c- o>:</c-> <c- b>int</c-> <c- p>{</c->
		<c- c1>// just fine</c->
		<c- n>ok</c-> <c- o>=</c-> <c- mh>0x00</c-><c- p>,</c->
		<c- c1>// input contains ill-formed sequences</c->
		<c- n>invalid_sequence</c-> <c- o>=</c-> <c- mh>0x01</c-><c- p>,</c->
		<c- c1>// input contains incomplete sequences</c->
		<c- n>incomplete_sequence</c-> <c- o>=</c-> <c- mh>0x02</c-><c- p>,</c->
		<c- c1>// output cannot receive all the completed </c->
		<c- c1>// code units</c->
		<c- n>insufficient_output_space</c-> <c- o>=</c-> <c- mh>0x03</c-><c- p>,</c->
		<c- c1>// sequence can be encoded but resulting </c->
		<c- c1>// code point is invalid (e.g., encodes a lone surrogate)</c->
		<c- n>invalid_output</c-> <c- o>=</c-> <c- mh>0x04</c-><c- p>,</c->
		<c- c1>// input contains overlong encoding sequence </c->
		<c- c1>// (e.g. for utf8)</c->
		<c- n>overlong_sequence</c-> <c- o>=</c-> <c- mh>0x05</c-><c- p>,</c->
		<c- c1>// leading code unit is wrong</c->
		<c- n>invalid_leading_sequence</c-> <c- o>=</c-> <c- mh>0x06</c-><c- p>,</c->
		<c- c1>// leading code units were correct, trailing</c->
		<c- c1>// code units were wrong</c->
		<c- n>invalid_trailing_sequence</c-> <c- o>=</c-> <c- mh>0x07</c->
	<c- p>};</c->

<c- p>}}</c->
</pre>
   <p>The comments give some small amount of examples about what each one means. The reason 0 is used to signal success is very simple: the next part of the API creates an encoding_error_category class and hooks up the machinery for a <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>error_condition</c-></code>:</p>
<pre class="language-cpp highlight"><c- k>namespace</c-> <c- n>std</c-> <c- p>{</c->

	<c- k>template</c-> <c- o>&lt;></c->
	<c- k>class</c-> <c- nc>is_error_condition_enum</c-><c- o>&lt;</c-><c- n>encoding_errc</c-><c- o>></c-> <c- o>:</c-> <c- n>true_type</c-> <c- p>{};</c->

	<c- k>class</c-> <c- nc>encoding_error_category</c-> <c- o>:</c-> <c- k>public</c-> <c- n>error_category</c-> <c- p>{</c->
	<c- k>public</c-><c- o>:</c->
		<c- k>constexpr</c-> <c- n>encoding_error_category</c-><c- p>()</c-> <c- k>noexcept</c-><c- p>;</c->

		<c- k>virtual</c-> <c- k>const</c-> <c- b>char</c-><c- o>*</c-> <c- nf>name</c-><c- p>()</c-> <c- k>const</c-> <c- k>noexcept</c-> <c- k>override</c-><c- p>;</c->
		<c- k>virtual</c-> <c- n>string</c-> <c- nf>message</c-><c- p>(</c-><c- b>int</c-> <c- n>condition</c-><c- p>)</c-> <c- k>const</c-> <c- k>override</c-><c- p>;</c->
	<c- p>};</c->

<c- p>}</c->
</pre>
   <p>This allows the creation of a <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>error_condition</c-></code>, which is used as an all-encompassing text error code for the standard.</p>
   <h4 class="heading settled" data-level="3.2.2" id="design-low-level-results"><span class="secno">3.2.2. </span><span class="content">Result Types</span><a class="self-link" href="#design-low-level-results"></a></h4>
   <p>The result types are the glue that help users who use the low level interface loop through their text properly. It returns updated ranges of both the input and output to indicate how far things have been moved along, on top of an error_code and whether or not the result came from an error being handled:</p>
<pre class="language-cpp highlight"><c- k>namespace</c-> <c- n>std</c-> <c- p>{</c-> <c- k>namespace</c-> <c- n>text</c-> <c- p>{</c->

	<c- k>template</c-> <c- o>&lt;</c-><c- k>typename</c-> <c- n>Input</c-><c- p>,</c-> <c- k>typename</c-> <c- n>Output</c-><c- p>,</c-> <c- k>typename</c-> <c- n>State</c-><c- o>></c->
	<c- k>class</c-> <c- nc>encode_result</c-> <c- p>{</c->
		<c- n>Input</c-> <c- n>input</c-><c- p>;</c->
		<c- n>Output</c-> <c- n>output</c-><c- p>;</c->
		<c- n>State</c-><c- o>&amp;</c-> <c- n>state</c-><c- p>;</c->
		<c- n>encoding_errc</c-> <c- n>error_code</c-><c- p>;</c->
		<c- b>bool</c-> <c- n>handled_error</c-><c- p>;</c->

		<c- k>template</c-> <c- o>&lt;</c-><c- k>typename</c-> <c- n>InRange</c-><c- p>,</c-> <c- k>typename</c-> <c- n>OutRange</c-><c- p>,</c-> <c- k>typename</c-> <c- n>EncodingState</c-><c- o>></c->
		<c- k>constexpr</c-> <c- n>encode_result</c-><c- p>(</c-><c- n>InRange</c-><c- o>&amp;&amp;</c-> <c- n>input</c-><c- p>,</c-> <c- n>OutRange</c-><c- o>&amp;&amp;</c-> <c- n>output</c-><c- p>,</c-> 
			<c- n>EncodingState</c-><c- o>&amp;&amp;</c-> <c- n>state</c-><c- p>,</c-> <c- n>encoding_errc</c-> <c- n>error_code</c-> <c- o>=</c-> <c- n>encoding_errc</c-><c- o>::</c-><c- n>ok</c-><c- p>);</c->

		<c- k>template</c-> <c- o>&lt;</c-><c- k>typename</c-> <c- n>InRange</c-><c- p>,</c-> <c- k>typename</c-> <c- n>OutRange</c-><c- p>,</c-> <c- k>typename</c-> <c- n>EncodingState</c-><c- o>></c->
		<c- k>constexpr</c-> <c- n>encode_result</c-><c- p>(</c-><c- n>InRange</c-><c- o>&amp;&amp;</c-> <c- n>input</c-><c- p>,</c-> <c- n>OutRange</c-><c- o>&amp;&amp;</c-> <c- n>output</c-><c- p>,</c-> 
			<c- n>EncodingState</c-><c- o>&amp;&amp;</c-> <c- n>state</c-><c- p>,</c-> <c- n>encoding_errc</c-> <c- n>error_code</c-><c- p>,</c-> <c- b>bool</c-> <c- n>handled_error</c-><c- p>);</c->

		<c- k>constexpr</c-> <c- n>std</c-><c- o>::</c-><c- n>error_condition</c-> <c- n>error</c-><c- p>()</c-> <c- k>const</c-><c- p>;</c->
	<c- p>};</c->

	<c- k>template</c-> <c- o>&lt;</c-><c- k>typename</c-> <c- n>Input</c-><c- p>,</c-> <c- k>typename</c-> <c- n>Output</c-><c- p>,</c-> <c- k>typename</c-> <c- n>State</c-><c- o>></c->
	<c- k>class</c-> <c- nc>decode_result</c-> <c- p>{</c->
		<c- n>Input</c-> <c- n>input</c-><c- p>;</c->
		<c- n>Output</c-> <c- n>output</c-><c- p>;</c->
		<c- n>State</c-><c- o>&amp;</c-> <c- n>state</c-><c- p>;</c->
		<c- n>encoding_errc</c-> <c- n>error_code</c-><c- p>;</c->
		<c- b>bool</c-> <c- n>handled_error</c-><c- p>;</c->

		<c- k>template</c-> <c- o>&lt;</c-><c- k>typename</c-> <c- n>InRange</c-><c- p>,</c-> <c- k>typename</c-> <c- n>OutRange</c-><c- p>,</c-> <c- k>typename</c-> <c- n>EncodingState</c-><c- o>></c->
		<c- k>constexpr</c-> <c- n>decode_result</c-><c- p>(</c-><c- n>InRange</c-><c- o>&amp;&amp;</c-> <c- n>input</c-><c- p>,</c-> <c- n>OutRange</c-><c- o>&amp;&amp;</c-> <c- n>output</c-><c- p>,</c-> 
			<c- n>EncodingState</c-><c- o>&amp;&amp;</c-> <c- n>state</c-><c- p>,</c-> <c- n>encoding_errc</c-> <c- n>error_code</c-> <c- o>=</c-> <c- n>encoding_errc</c-><c- o>::</c-><c- n>ok</c-><c- p>);</c->

		<c- k>template</c-> <c- o>&lt;</c-><c- k>typename</c-> <c- n>InRange</c-><c- p>,</c-> <c- k>typename</c-> <c- n>OutRange</c-><c- p>,</c-> <c- k>typename</c-> <c- n>EncodingState</c-><c- o>></c->
		<c- k>constexpr</c-> <c- n>decode_result</c-><c- p>(</c-><c- n>InRange</c-><c- o>&amp;&amp;</c-> <c- n>input</c-><c- p>,</c-> <c- n>OutRange</c-><c- o>&amp;&amp;</c-> <c- n>output</c-><c- p>,</c-> 
			<c- n>EncodingState</c-><c- o>&amp;&amp;</c-> <c- n>state</c-><c- p>,</c-> <c- n>encoding_errc</c-> <c- n>error_code</c-><c- p>,</c-> <c- b>bool</c-> <c- n>handled_error</c-><c- p>);</c->

		<c- k>constexpr</c-> <c- n>std</c-><c- o>::</c-><c- n>error_condition</c-> <c- n>error</c-><c- p>()</c-> <c- k>const</c-><c- p>;</c->
	<c- p>};</c->

	<c- k>template</c-> <c- o>&lt;</c-><c- k>typename</c-> <c- n>Input</c-><c- p>,</c-> <c- k>typename</c-> <c- n>Output</c-><c- p>,</c-> <c- k>typename</c-> <c- n>FromState</c-><c- p>,</c-> <c- k>typename</c-> <c- n>ToState</c-><c- o>></c->
	<c- k>class</c-> <c- nc>transcode_result</c-> <c- p>{</c->
		<c- n>Input</c-> <c- n>input</c-><c- p>;</c->
		<c- n>Output</c-> <c- n>output</c-><c- p>;</c->
		<c- n>FromState</c-><c- o>&amp;</c-> <c- n>state</c-><c- p>;</c->
		<c- n>ToState</c-><c- o>&amp;</c-> <c- n>state</c-><c- p>;</c->
		<c- n>encoding_errc</c-> <c- n>error_code</c-><c- p>;</c->
		<c- b>bool</c-> <c- n>handled_error</c-><c- p>;</c->

		<c- k>template</c-> <c- o>&lt;</c-><c- k>typename</c-> <c- n>InRange</c-><c- p>,</c-> <c- k>typename</c-> <c- n>OutRange</c-><c- p>,</c->
			<c- k>typename</c-> <c- n>FromEncodingState</c-><c- p>,</c-> <c- k>typename</c-> <c- n>ToEncodingState</c-><c- o>></c->
		<c- k>constexpr</c-> <c- n>decode_result</c-><c- p>(</c-><c- n>InRange</c-><c- o>&amp;&amp;</c-> <c- n>input</c-><c- p>,</c-> <c- n>OutRange</c-><c- o>&amp;&amp;</c-> <c- n>output</c-><c- p>,</c->
			<c- n>FromEncodingState</c-><c- o>&amp;&amp;</c-> <c- n>from_state</c-><c- p>,</c-> <c- n>ToEncodingState</c-><c- o>&amp;&amp;</c-> <c- n>to_state</c-><c- p>,</c->
			<c- n>encoding_errc</c-> <c- n>error_code</c-> <c- o>=</c-> <c- n>encoding_errc</c-><c- o>::</c-><c- n>ok</c-><c- p>);</c->

		<c- k>template</c-> <c- o>&lt;</c-><c- k>typename</c-> <c- n>InRange</c-><c- p>,</c-> <c- k>typename</c-> <c- n>OutRange</c-><c- p>,</c->
			<c- k>typename</c-> <c- n>FromEncodingState</c-><c- p>,</c-> <c- k>typename</c-> <c- n>ToEncodingState</c-><c- o>></c->
		<c- k>constexpr</c-> <c- n>decode_result</c-><c- p>(</c-><c- n>InRange</c-><c- o>&amp;&amp;</c-> <c- n>input</c-><c- p>,</c-> <c- n>OutRange</c-><c- o>&amp;&amp;</c-> <c- n>output</c-><c- p>,</c->
			<c- n>FromEncodingState</c-><c- o>&amp;&amp;</c-> <c- n>from_state</c-><c- p>,</c-> <c- n>ToEncodingState</c-><c- o>&amp;&amp;</c-> <c- n>to_state</c-><c- p>,</c->
			<c- n>encoding_errc</c-> <c- n>error_code</c-><c- p>,</c-> <c- b>bool</c-> <c- n>handled_error</c-><c- p>);</c->

		<c- k>constexpr</c-> <c- n>std</c-><c- o>::</c-><c- n>error_condition</c-> <c- n>error</c-><c- p>()</c-> <c- k>const</c-><c- p>;</c->
	<c- p>};</c->

	<c- k>template</c-> <c- o>&lt;</c-><c- k>typename</c-> <c- n>Input</c-><c- p>,</c-> <c- k>typename</c-> <c- n>State</c-><c- o>></c->
	<c- k>struct</c-> <c- n>validate_result</c-> <c- p>{</c->
		<c- n>Input</c-> <c- n>input</c-><c- p>;</c->
		<c- b>bool</c-> <c- n>valid</c-><c- p>;</c->
		<c- n>State</c-><c- o>&amp;</c-> <c- n>state</c-><c- p>;</c->

		<c- k>template</c-> <c- o>&lt;</c-><c- k>typename</c-> <c- n>ArgInput</c-><c- p>,</c-> <c- k>typename</c-> <c- n>ArgState</c-><c- o>></c->
		<c- k>constexpr</c-> <c- n>validate_result</c-><c- p>(</c-><c- n>ArgInput</c-><c- o>&amp;&amp;</c-> <c- n>input</c-><c- p>,</c-> <c- b>bool</c-> <c- n>is_valid</c-><c- p>,</c-> <c- n>ArgState</c-><c- o>&amp;&amp;</c-> <c- n>state</c-><c- p>);</c->
	<c- p>};</c->

	<c- k>template</c-> <c- o>&lt;</c-><c- k>typename</c-> <c- n>Input</c-><c- p>,</c-> <c- k>typename</c-> <c- n>State</c-><c- o>></c->
	<c- k>struct</c-> <c- n>count_result</c-> <c- p>{</c->
		<c- n>Input</c-> <c- n>input</c-><c- p>;</c->
		<c- b>size_t</c-> <c- n>count</c-><c- p>;</c->
		<c- n>State</c-><c- o>&amp;</c-> <c- n>state</c-><c- p>;</c->
		<c- n>encoding_error</c-> <c- n>error_code</c-><c- p>;</c->
		<c- b>bool</c-> <c- n>handled_error</c-><c- p>;</c->

		<c- k>template</c-> <c- o>&lt;</c-><c- k>typename</c-> <c- n>ArgInput</c-><c- p>,</c-> <c- k>typename</c-> <c- n>ArgState</c-><c- o>></c->
		<c- k>constexpr</c-> <c- n>count_result</c-><c- p>(</c-><c- n>ArgInput</c-><c- o>&amp;&amp;</c-> <c- n>input</c-><c- p>,</c-> <c- b>size_t</c-> <c- n>count</c-><c- p>,</c-> <c- n>ArgState</c-><c- o>&amp;&amp;</c-> <c- n>state</c-><c- p>,</c->
			<c- n>encoding_error</c-> <c- n>error_code</c-> <c- o>=</c-> <c- n>encoding_error</c-><c- o>::</c-><c- n>ok</c-><c- p>);</c->

		<c- k>template</c-> <c- o>&lt;</c-><c- k>typename</c-> <c- n>ArgInput</c-><c- p>,</c-> <c- k>typename</c-> <c- n>ArgState</c-><c- o>></c->
		<c- k>constexpr</c-> <c- n>count_result</c-><c- p>(</c-><c- n>ArgInput</c-><c- o>&amp;&amp;</c-> <c- n>input</c-><c- p>,</c-> <c- b>size_t</c-> <c- n>count</c-><c- p>,</c-> <c- n>ArgState</c-><c- o>&amp;&amp;</c-> <c- n>state</c-><c- p>,</c->
			<c- n>encoding_error</c-> <c- n>error_code</c-><c- p>,</c-> <c- b>bool</c-> <c- n>handled_error</c-><c- p>);</c->
	<c- p>};</c->

<c- p>}}</c->
</pre>
   <p>There is a lot to unpack here. There are two essentially identical structures: <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>encode_result</c-></code> and <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>decode_result</c-></code>. These contain the input range, the output range, a reference to the encoding’s current state, the error code and whether or not the error handler was invoked. The <code class="highlight"><c- b>bool</c-> <c- n>error_handled</c-></code> is important because some error handlers may change the <code class="highlight"><c- n>error_code</c-></code> member to <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>encoding_errc</c-><c- o>::</c-><c- n>ok</c-></code>, indicating that things are fine (e.g., a replacement character was successfully inserted into the output stream to replace some bad input).</p>
   <p class="note" role="note"><span>Note:</span> Having 2 differently-named types with much the same interface is paramount to allow an <code class="highlight"><c- n>error_handler</c-></code> callable to know how to interpret some errors and whether to try to insert code units into the output stream or code points into the output stream (encoding means code units into output, decoding means code points into the output). If the structures were merged, this information would be lost at compile-time and have to attempt to coerce that information out by examining the <code class="highlight"><c- n>value_type</c-></code> and <code class="highlight"><c- n>reference</c-></code> types of the output or input range. Unfortunately, even that is not foolproof because neither the input range or output ranges need to exactly dereference to exactly <code class="highlight"><c- n>Encoding</c-><c- o>::</c-><c- n>code_unit</c-></code> or <code class="highlight"><c- n>Encoding</c-><c- o>::</c-><c- n>code_point</c-></code> types, just things convertible to / from them.</p>
   <p><code class="highlight"><c- n>transcode_result</c-></code> is a joint type for operations which go from <code class="highlight"><c- n>code_unit</c-></code> ➝ <code class="highlight"><c- n>code_point</c-></code> and then <code class="highlight"><c- n>code_point</c-></code> ➝ <code class="highlight"><c- n>code_unit</c-></code>, assuming the <code class="highlight"><c- n>code_point</c-></code> types are compatible between the two encodings deployed for the transformation.</p>
   <h5 class="heading settled" data-level="3.2.2.1" id="design-low-level-results-ranges"><span class="secno">3.2.2.1. </span><span class="content">Input and Output Ranges</span><a class="self-link" href="#design-low-level-results-ranges"></a></h5>
   <p>These are essentially the ranges moved forward as much or as little as the encoding needed to for reading from the input, converting, and writing to the output. It also solves the problem of obtaining maximal speed based on checking if the destination is filled or if the input is exhausted: <code class="highlight"><c- n>unbounded_view</c-></code> works well since its comparison sentinel always returns the literal "false" bool on comparison, meaning that any compiler beyond the typical <code class="highlight"><c- o>-</c-><c- n>O0</c-></code> / <code class="highlight"><c- o>/</c-><c- n>Od</c-></code> / etc. levels of optimization will cull any <code class="highlight"><c- n>it</c-> <c- o>==</c-> <c- n>last</c-></code> comparison branches out of code.</p>
   <p>The decoding result and encoding result types both return the input and output range passed to encoding and decoding functions in the structure itself. This represents the changed ranges. In the event where the range cannot be successfully reconstructed from itself using the iterator and sentinel, a <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>ranges</c-><c- o>::</c-><c- n>subrange</c-><c- o>&lt;</c-><c- n>Iterator</c-><c- p>,</c-> <c- n>Sentinel</c-><c- o>></c-></code> will be returned instead.</p>
   <h5 class="heading settled" data-level="3.2.2.2" id="design-low-level-results-error_handler"><span class="secno">3.2.2.2. </span><span class="content">Error Handling: Allow All The Options</span><a class="self-link" href="#design-low-level-results-error_handler"></a></h5>
   <p>This is a low-level interface. As such, accommodating different error handling strategies is necessary. There are several ways to report errors used in both the C and C++ standard libraries, from throwing errors, to <code class="highlight"><c- n>error_code</c-></code> out parameters, to integral return values and even complex return structures. Choosing a scheme here is difficult given the large breadth and depth of error handling history in C++, and while the standard library shows a clear bias towards throwing exceptions it would not be prudent to throw all the time. Requiring exceptions may exclude hard and soft real-time programming environments wherein these encoding structures will be needed. Exceptions also have an intrinsic problem in this domain, as described a little bit below in this section.</p>
   <p>To accommodate the wide breadth of C++ programming environments and ecosystems, error reporting will be done through an error handler, which can be any type of callable that matches the desired interface. The standard will provide 4 of these error handlers:</p>
<pre class="language-cpp highlight"><c- k>namespace</c-> <c- n>std</c-> <c- p>{</c-> <c- k>namespace</c-> <c- n>text</c-> <c- p>{</c->

	<c- k>class</c-> <c- nc>replacement_handler</c-><c- p>;</c->
	<c- k>class</c-> <c- nc>throw_handler</c-><c- p>;</c->
	<c- k>class</c-> <c- nc>assume_valid_handler</c-><c- p>;</c->
	<c- k>class</c-> <c- nc>default_handler</c-><c- p>;</c->

<c- p>}}</c->
</pre>
   <p>The interface for an error handler looks like the below example error handler:</p>
<pre class="language-cpp highlight"><c- k>namespace</c-> <c- n>std</c-> <c- p>{</c-> <c- k>namespace</c-> <c- n>text</c-> <c- p>{</c->

	<c- k>class</c-> <c- nc>example_error_handler</c-> <c- p>{</c->
		<c- k>template</c-> <c- o>&lt;</c-><c- k>typename</c-> <c- n>Encoding</c-><c- p>,</c-> <c- k>typename</c-> <c- n>InputRange</c-><c- p>,</c->
			<c- k>typename</c-> <c- n>OutputRange</c-><c- p>,</c-> <c- k>typename</c-> <c- n>State</c-><c- p>,</c->
			<c- n>contiguous_range_of</c-><c- o>&lt;</c-><c- n>encoding_code_point_t</c-><c- o>&lt;</c-><c- n>Encoding</c-><c- o>>></c-> <c- n>Progress</c-><c- o>></c->
		<c- k>constexpr</c-> <c- k>auto</c-> <c- k>operator</c-><c- p>()(</c-><c- k>const</c-> <c- n>Encoding</c-><c- o>&amp;</c-> <c- n>encoding</c-><c- p>,</c->
			<c- n>encode_result</c-><c- o>&lt;</c-><c- n>InputRange</c-><c- p>,</c-> <c- n>OutputRange</c-><c- p>,</c-> <c- n>State</c-><c- o>></c-> <c- n>result</c-><c- p>,</c->
			<c- k>const</c-> <c- n>Progress</c-><c- o>&amp;</c-> <c- n>progress</c-><c- p>)</c-> <c- k>const</c-> <c- p>{</c->
			<c- d>/* morph result, log, throw error, etc. ... */</c->
			<c- k>return</c-> <c- n>result</c-><c- p>;</c->
		<c- p>}</c->

		<c- k>template</c-> <c- o>&lt;</c-><c- k>typename</c-> <c- n>Encoding</c-><c- p>,</c-> <c- k>typename</c-> <c- n>InputRange</c-><c- p>,</c->
			<c- k>typename</c-> <c- n>OutputRange</c-><c- p>,</c-> <c- k>typename</c-> <c- n>State</c-><c- p>,</c->
			<c- n>contiguous_range_of</c-><c- o>&lt;</c-><c- n>encoding_code_unit_t</c-><c- o>&lt;</c-><c- n>Encoding</c-><c- o>>></c-> <c- n>Progress</c-><c- o>></c->
		<c- k>constexpr</c-> <c- k>auto</c-> <c- k>operator</c-><c- p>()(</c-><c- k>const</c-> <c- n>Encoding</c-><c- o>&amp;</c-> <c- n>encoding</c-><c- p>,</c->
			<c- n>decode_result</c-><c- o>&lt;</c-><c- n>InputRange</c-><c- p>,</c-> <c- n>OutputRange</c-><c- p>,</c-> <c- n>State</c-><c- o>></c-> <c- n>result</c-><c- p>,</c->
			<c- k>const</c-> <c- n>Progress</c-><c- o>&amp;</c-> <c- n>progress</c-><c- p>)</c-> <c- k>const</c-> <c- p>{</c->
			<c- d>/* morph result, log, throw error, etc. ... */</c->
			<c- k>return</c-> <c- n>result</c-><c- p>;</c->
		<c- p>}</c->
	<c- p>};</c->

<c- p>}}</c->
</pre>
   <p>The specification here is a value-based one. <code class="highlight"><c- n>encoding</c-></code> is a reference to the encoding which threw the error. <code class="highlight"><c- n>current_result</c-></code> is passed to the error handler and it represents an <code class="highlight"><c- n>encode</c-></code> or <code class="highlight"><c- n>decode</c-></code> function’s current progress. The <code class="highlight"><c- n>result</c-></code> types provide the current input range, the current output range, a reference to the current state, and the type of error encountered according to the <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>encoding_errc</c-></code>. Finally, the <code class="highlight"><c- n>progress</c-></code> object is a <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>contiguous_range</c-></code> passed from the encoder with the code points or code units already read from the input range. (This is important for e.g. reading from one-way iterators like <code class="highlight"><c- n>istream_iterator</c-></code>, where it is impossible to go back and recover information consumed by the algorithm.) The error handler is then responsible for performing any modifications it wants to the result type, before returning the modified result to be propagated back by the encoding interface.</p>
   <p>There are a few things that can be done in the commented code shown above. First and foremost is that someone could look at <code class="highlight"><c- n>current_result</c-><c- p>.</c-><c- n>error</c-><c- p>()</c-></code> and simply throw a hand-tailored exception. This would bubble out of the function and let the caller decide what to do.</p>
   <p class="note" role="note"><span>Note:</span> Throwing is <strong>explicitly not recommended by default</strong> by prominent vendors and implementers (Mozilla, Apple, the Unicode Consortium, WHATWG, etc.). Ill-formed text is common. Text from misbehaving programs -- 40 years of them -- is a frequent kind of user and machine input. It is extremely easy to provoke a Denial of Service Attack (DoS Attack) if an application throws an error on malformed input that the application author did not consider.</p>
   <p>The default error handler will be the <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>default_handler</c-></code>, as hinted by the name. The <code class="highlight"><c- n>default_handler</c-></code> is a "strong typedef" over the <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>replacement_handler</c-></code>, done for the purposes of <a href="#design-high-level-safety">safety in the higher-level API</a>.</p>
   <p>The <code class="highlight"><c- n>replacement_handler</c-></code> will look inside <code class="highlight"><c- n>Encoding</c-></code> to see if the expression <code class="highlight"><c- n>encoding</c-><c- p>.</c-><c- n>replacement_code_points</c-><c- p>()</c-></code> or <code class="highlight"><c- n>encoding</c-><c- p>.</c-><c- n>replacement_code_units</c-><c- p>()</c-></code> is well-formed. If so, it will take the range returned from that function and will attempt to insert it into the <code class="highlight"><c- n>output</c-></code> range. Specifically:</p>
   <ul>
    <li data-md>
     <p>On a failure in <code class="highlight"><c- n>decode_one</c-></code>:</p>
     <ul>
      <li data-md>
       <p>If the output is at its end, return the result as-is.</p>
      <li data-md>
       <p>If the expression <code class="highlight"><c- k>decltype</c-><c- p>(</c-><c- k>auto</c-><c- p>)</c-> <c- n>replacement_points</c-> <c- o>=</c-> <c- n>encoding</c-><c- p>.</c-><c- n>replacement_code_points</c-><c- p>();</c-></code> is well-formed, then <code class="highlight"><c- n>replacement_points</c-></code> is iterated over and code points are inserted into the output range in linear ascending order, if there is space. If there is not enough space, return the result as-is. Note that this may write partial data to the range if <code class="highlight"><c- n>replacement_points</c-></code> contains more than one code point.</p>
      <li data-md>
       <p>Otherwise, if the <code class="highlight"><c- n>code_point</c-></code> type is a Unicode Code Point type (<code class="highlight"><c- b>char32_t</c-></code>, <code class="highlight"><c- n>unicode_code_point</c-></code>, <code class="highlight"><c- n>unicode_scalar_value</c-></code>), an array of <code class="highlight"><c- p>{</c-> '<c- n>U</c->\<c- n>uFFFD</c->' <c- p>}</c-></code> is assumed to be the replacement characters for the standard error handlers.</p>
      <li data-md>
       <p>Otherwise, if the expression <code class="highlight"><c- k>decltype</c-><c- p>(</c-><c- k>auto</c-><c- p>)</c-> <c- n>replacement_units</c-> <c- o>=</c-> <c- n>encoding</c-><c- p>.</c-><c- n>replacement_code_units</c-><c- p>();</c-></code> is well-formed, then <code class="highlight"><c- n>replacement_units</c-></code> is passed to a call to <code class="highlight"><c- k>auto</c-> <c- n>intermediate_result</c-> <c- o>=</c-> <c- n>encoding</c-><c- p>.</c-><c- n>decode_one</c-><c- p>(</c-><c- n>replacement_units</c-><c- p>,</c-> <c- n>result</c-><c- p>.</c-><c- n>output</c-><c- p>,</c-> <c- d>/* implementation-defined pass-through handler */</c-><c- p>,</c-> <c- n>result</c-><c- p>.</c-><c- n>state</c-><c- p>);</c-></code>. If <code class="highlight"><c- n>intermediate_result</c-><c- p>.</c-><c- n>error_code</c-></code> is not equal to <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>encoding_errc</c-><c- o>::</c-><c- n>ok</c-></code>, then return the original result. Note that this may write partial data to the range if the decode operation needs to write more than one code point to the <code class="highlight"><c- n>output</c-></code>.</p>
      <li data-md>
       <p>Otherwise, the program is ill-formed.</p>
     </ul>
    <li data-md>
     <p>On a failure in <code class="highlight"><c- n>encode_one</c-></code>:</p>
     <ul>
      <li data-md>
       <p>If the output is at its end, return the <code class="highlight"><c- n>result</c-></code> as-is.</p>
      <li data-md>
       <p>If the expression <code class="highlight"><c- k>decltype</c-><c- p>(</c-><c- k>auto</c-><c- p>)</c-> <c- n>replacement_units</c-> <c- o>=</c-> <c- n>encoding</c-><c- p>.</c-><c- n>replacement_code_units</c-><c- p>();</c-></code> is well-formed, then <code class="highlight"><c- n>replacement_units</c-></code> is iterated over and code points are inserted into the output range in linear ascending order, if there is space. If there is not enough space, return the <code class="highlight"><c- n>result</c-></code> as-is. Note that this may write partial data to the <code class="highlight"><c- n>result</c-><c- p>.</c-><c- n>output</c-></code> range if <code class="highlight"><c- n>replacement_units</c-></code> contains more than one unit but the output reaches its limit.</p>
      <li data-md>
       <p>Otherwise, if the <code class="highlight"><c- n>code_point</c-></code> type is a Unicode Code Point type (<code class="highlight"><c- b>char32_t</c-></code>, <code class="highlight"><c- n>unicode_code_point</c-></code>, <code class="highlight"><c- n>unicode_scalar_value</c-></code>), an array of <code class="highlight"><c- p>{</c-> '<c- n>U</c->\<c- n>uFFFD</c->' <c- p>}</c-></code> is assumed to be the replacement characters for the standard error handlers.</p>
      <li data-md>
       <p>Otherwise, if the expression <code class="highlight"><c- k>decltype</c-><c- p>(</c-><c- k>auto</c-><c- p>)</c-> <c- n>replacement_points</c-> <c- o>=</c-> <c- n>encoding</c-><c- p>.</c-><c- n>replacement_code_points</c-><c- p>();</c-></code> is well-formed, then <code class="highlight"><c- n>replacement_points</c-></code> is passed to a call to <code class="highlight"><c- k>auto</c-> <c- n>intermediate_result</c-> <c- o>=</c-> <c- n>encoding</c-><c- p>.</c-><c- n>encode_one</c-><c- p>(</c-><c- n>replacement_points</c-><c- p>,</c-> <c- n>result</c-><c- p>.</c-><c- n>output</c-><c- p>,</c-> <c- d>/* implementation-defined pass-through handler */</c-><c- p>,</c-> <c- n>result</c-><c- p>.</c-><c- n>state</c-><c- p>);</c-></code>. If <code class="highlight"><c- n>intermediate_result</c-><c- p>.</c-><c- n>error_code</c-></code> is not equal to <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>encoding_errc</c-><c- o>::</c-><c- n>ok</c-></code>, then return the original result. Note that this may write partial data to the range if the encode operation needs to write more than one code point to the <code class="highlight"><c- n>result</c-><c- p>.</c-><c- n>output</c-></code>.</p>
      <li data-md>
       <p>Otherwise, the program is ill-formed.</p>
     </ul>
   </ul>
   <p>If successful, the error code on the result will be corrected to say "everything is fine" (<code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>encoding_errc</c-><c- o>::</c-><c- n>ok</c-></code>) and then returned from the function. This allows algorithms continue looping over input with the replacement characters inserted. If there is no room in the output, then the error is returned untouched.</p>
   <p>For performance reasons and flexibility, the error callable must have a way to ensure that the user and implementation can agree on whether or not Undefined Behavior is invoked by assuming that the text is valid. <a data-link-type="biblio" href="#biblio-libogonek">[libogonek]</a> made an object of type <code class="highlight"><c- n>assume_valid_t</c-></code>. This paper provides the same here: an error handler of <code class="highlight"><c- n>assume_valid_handler</c-></code> means that the implementation will eliminate all of its checks and subsequent calls to the error handling interface. A user must provide the <code class="highlight"><c- n>assume_valid_handler</c-></code> to achieve this behavior: it will never be the default behavior because it is error-prone and dangerous and only to be performed with explicit user consent.</p>
   <p>This is notably important: Rust attempted to force that every string constructed ever was valid UTF-8 and rigorously checked this pre- and post-condition. Doing this check was so obscenely expensive that they needed to introduce a new function to <code class="highlight"><c- n>escape</c-><c- p>(...)</c-></code> some UTF-8 text so it would not be checked if the user knew the text was in the proper encoding.</p>
   <h4 class="heading settled" data-level="3.2.3" id="design-low-level-encodings"><span class="secno">3.2.3. </span><span class="content">The Encoding Object</span><a class="self-link" href="#design-low-level-encodings"></a></h4>
   <p>It is no great surprise that there is not enough library implementers prepared to standardize the entirety of what the WHATWG specifies in its encoding specification, let alone enough to handle every rogue request for a new encoding object type in C++ Standard. A system must be developed that provides flexibility for the end-user that does not require them writing a paper and getting into a 1-2 year long process of herding a proposal through the notoriously slow Committee, just to have support for X encoding or Y feature. There is also less and less (read: almost none) tolerance for adding whacky extension to libraries like libstdc++ or libc++, and MSVC has only recently open-sourced (with no appetite for shoveling more semi-abandonware legacy library extensions into their codebase at the time of writing).</p>
   <p>Encoding objects provide flexibility that enable us to consume the entire encoding space without needing to tax the Standard Library. It enables other people to plug into the system and provides the flexibility they need, and only standardize when <strong>interoperability</strong> and <strong>redundant implementation</strong> becomes a burden to the greater C++ ecosystem. This frees up Billy O’Neal, Jonathan Wakely, Louis Dionne, their successors, and the dozens of other standard library contributors and implementers to focus on producing high quality code, rather than scrambling to implementing four or five dozen encodings because one company, somewhere, made an at-the-time-it-seemed-okay choice in 2005 about how to store their text.</p>
   <p>Given our result types and error handlers, the interface for the encoding object itself can be defined. Here is the example encoding illustrating the interface:</p>
<pre class="language-cpp highlight"><c- k>namespace</c-> <c- n>std</c-> <c- p>{</c-> <c- k>namespace</c-> <c- n>text</c-> <c- p>{</c->

	<c- c1>// NOTE: exemplary encoding</c->
	<c- c1>// for expository purposes</c->
	<c- c1>// containing all the types</c->
	<c- k>class</c-> <c- nc>example_locale_encoding</c-> <c- p>{</c->
		<c- k>class</c-> <c- nc>example_state</c-> <c- p>{</c->
			<c- n>std</c-><c- o>::</c-><c- b>mbstate_t</c-> <c- n>multibyte_state</c-><c- p>;</c->
		<c- p>};</c->

		<c- c1>// REQUIRED: member types and variables</c->
		<c- k>using</c-> <c- n>code_point</c-> <c- o>=</c-> <c- b>char32_t</c-><c- p>;</c->
		<c- k>using</c-> <c- n>code_unit</c-> <c- o>=</c-> <c- b>char</c-><c- p>;</c->

		<c- k>using</c-> <c- n>state</c-> <c- o>=</c-> <c- n>example_state</c-><c- p>;</c->
		
		<c- k>static</c-> <c- k>constexpr</c-> <c- b>size_t</c-> <c- n>max_code_unit_sequence</c-> <c- o>=</c-> <c- n>MB_LEN_MAX</c-><c- p>;</c->
		<c- k>static</c-> <c- k>constexpr</c-> <c- b>size_t</c-> <c- n>max_code_point_sequence</c-> <c- o>=</c-> <c- mi>1</c-><c- p>;</c->

		<c- c1>// OPTIONAL: member types and variables</c->
		<c- k>using</c-> <c- n>is_encoding_injective</c-> <c- o>=</c-> <c- n>std</c-><c- o>::</c-><c- n>false_type</c-><c- p>;</c->
		<c- k>using</c-> <c- n>is_decoding_injective</c-> <c- o>=</c-> <c- n>std</c-><c- o>::</c-><c- n>true_type</c-><c- p>;</c->

		<c- c1>// REQUIRED: functions</c->
		<c- k>template</c-> <c- o>&lt;</c-><c- k>typename</c-> <c- n>In</c-><c- p>,</c-> <c- k>typename</c-> <c- n>Out</c-><c- p>,</c-> <c- k>typename</c-> <c- n>Handler</c-><c- o>></c->
		<c- n>decode_result</c-><c- o>&lt;</c-><c- n>In</c-><c- p>,</c-> <c- n>Out</c-><c- p>,</c-> <c- n>state</c-><c- o>></c-> <c- n>decode</c-><c- p>(</c->
			<c- n>In</c-><c- o>&amp;&amp;</c-> <c- n>in_range</c-><c- p>,</c-> 
			<c- n>Out</c-><c- o>&amp;&amp;</c-> <c- n>out_range</c-><c- p>,</c->
			<c- n>Handler</c-><c- o>&amp;&amp;</c-> <c- n>handler</c-><c- p>,</c->
			<c- n>state</c-><c- o>&amp;</c-> <c- n>current_state</c->
		<c- p>);</c->

		<c- k>template</c-> <c- o>&lt;</c-><c- k>typename</c-> <c- n>In</c-><c- p>,</c-> <c- k>typename</c-> <c- n>Out</c-><c- p>,</c-> <c- k>typename</c-> <c- n>Handler</c-><c- o>></c->
		<c- n>encode_result</c-><c- o>&lt;</c-><c- n>In</c-><c- p>,</c-> <c- n>Out</c-><c- p>,</c-> <c- n>state</c-><c- o>></c-> <c- n>encode</c-><c- p>(</c->
			<c- n>In</c-><c- o>&amp;&amp;</c-> <c- n>in_range</c-><c- p>,</c-> 
			<c- n>Out</c-><c- o>&amp;&amp;</c-> <c- n>out_range</c-><c- p>,</c->
			<c- n>Handler</c-><c- o>&amp;&amp;</c-> <c- n>handler</c-><c- p>,</c->
			<c- n>state</c-><c- o>&amp;</c-> <c- n>current_state</c->
		<c- p>);</c->

		<c- c1>// OPTIONAL: functions</c->
		<c- k>constexpr</c-> <c- k>const</c-> <c- n>range_of</c-><c- o>&lt;</c-><c- n>code_point</c-><c- o>></c-> <c- k>auto</c-><c- o>&amp;</c->
		<c- n>replacement_code_points</c-> <c- p>()</c-> <c- k>const</c-> <c- k>noexcept</c-><c- p>;</c->

		<c- k>constexpr</c-> <c- k>const</c-> <c- n>range_of</c-><c- o>&lt;</c-><c- n>code_unit</c-><c- o>></c-> <c- k>auto</c-><c- o>&amp;</c->
		<c- n>replacement_code_points</c-> <c- p>()</c-> <c- k>const</c-> <c- k>noexcept</c-><c- p>;</c->
	<c- p>};</c->
<c- p>}}</c->
</pre>
   <p>There are many pieces of this encoding object. Some of them fit the purposes explained above. As an overview, given an <code class="highlight"><c- n>Encoding</c-></code> type such as <code class="highlight"><c- n>example_locale_encoding</c-></code>, the following type definitions, static member variables, and functions are required:</p>
   <ul>
    <li data-md>
     <p><code class="highlight"><c- n>code_unit</c-></code> and <code class="highlight"><c- n>code_point</c-></code> type definitions let us know what an Encoding’s inputs and outputs will be from its functions. It also helps us tell if 2 encodings can be transcoded from one another by having at least the <code class="highlight"><c- n>code_point</c-></code> in common.</p>
    <li data-md>
     <p><code class="highlight"><c- n>state</c-></code> allows a user to instantiate the type and control any parameters for manipulating stateful or shift-state encodings.</p>
     <ul>
      <li data-md>
       <p>If <code class="highlight"><c- n>is_encoding_self_state_t</c-><c- o>&lt;</c-><c- n>Encoding</c-><c- o>></c-></code> is false (the encoding does not name itself as its state type), <code class="highlight"><c- n>encoding_state_t</c-><c- o>&lt;</c-><c- n>Encoding</c-><c- o>></c-></code> must be default-constructible and default construction results in .</p>
      <li data-md>
       <p>If <code class="highlight"><c- n>is_encoding_self_state_t</c-><c- o>&lt;</c-><c- n>Encoding</c-><c- o>></c-></code> is true (the encoding names itself as its state type), then the encoding may not be default-constructible.</p>
     </ul>
    <li data-md>
     <p><code class="highlight"><c- n>max_code_unit_sequence</c-></code> and <code class="highlight"><c- n>max_code_point_sequence</c-></code> represent integral values which inform users of the encoding the necessary size of a buffer to handle at least one full, encoded sequence of conde units and one full, decoded sequence of code points. In most cases, <code class="highlight"><c- n>max_code_point_sequence</c-></code> will be <code class="highlight"><c- mi>1</c-></code>, but there are cases where this is not the case (e.g., the Tamil Standard Code for Information Interchange (TASCII)).</p>
    <li data-md>
     <p><code class="highlight"><c- n>decode</c-></code> and <code class="highlight"><c- n>encode</c-></code> are fundamental functions which convert one full unit of complete, indivisible information from one representation to the other. Specifically, <code class="highlight"><c- n>decode</c-></code> converts from <code class="highlight"><c- n>code_unit</c-></code>s to <code class="highlight"><c- n>code_point</c-></code>s, and <code class="highlight"><c- n>encode</c-></code> converts from <code class="highlight"><c- n>code_point</c-></code>s to <code class="highlight"><c- n>code_unit</c-></code>s. <code class="highlight"><c- n>In</c-></code> is an input range, <code class="highlight"><c- n>Out</c-></code> is an output range, and <code class="highlight"><c- n>handler</c-></code> is an error handler as defined in <a href="#design-low-level-results-error_handler">§ 3.2.2.2 Error Handling: Allow All The Options</a>.</p>
   </ul>
   <p>Optionally, some additional type definitions and functions help with safety, error handling (for replacement), and more:</p>
   <ul>
    <li data-md>
     <p><code class="highlight"><c- n>is_encoding_injective</c-></code> and <code class="highlight"><c- n>is_decoding_injective</c-></code> indicate whether or not the encode or decode operations provide a lossless map from the code_point to code_unit or vice-versa, respectively. This is important when using high-level conversion facilities: <a href="#design-high-level-safety">compile-time diagnostics</a> can be issued for conversions that are lossy. This ensures that users who do lossy conversions must specify an <code class="highlight"><c- n>error_handler</c-></code> from the standard or one of their own making and know what they are getting into with bad encodings.</p>
    <li data-md>
     <p><code class="highlight"><c- n>replacement_code_points</c-></code> is a function that returns a range to be entered into the output if an error occurs during a <code class="highlight"><c- n>decode</c-></code> call and the error handler used is the <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>default_handler</c-></code> or <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>replacement_handler</c-></code>. This provides encodings a simple way to plug in replacement code points that are not the same as the default replacement character used is, which is <code class="highlight">\<c- n>uFFFD</c-></code> (�). This can be defined to be an empty range (not recommended but possible).</p>
    <li data-md>
     <p><code class="highlight"><c- n>replacement_code_units</c-></code> is a function that returns a range to be entered into the output if an error occurs during an <code class="highlight"><c- n>encode</c-></code> call and the error handler used is the <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>default_handler</c-></code> or <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>replacement_handler</c-></code>. Note that not all encodings can handle the entirety of the Unicode Code Point space, let alone <code class="highlight">\<c- n>uFFFD</c-></code> (�). This can be defined to return an empty range (not recommended, but possible).</p>
   </ul>
   <h5 class="heading settled" data-level="3.2.3.1" id="design-low-level-encodings-standard"><span class="secno">3.2.3.1. </span><span class="content">Encodings Provided by the Standard</span><a class="self-link" href="#design-low-level-encodings-standard"></a></h5>
   <p>The primary reason for the standard to provide an encoding is to ensure that it produces a way for applications to communicate with one another. As a baseline, the standard should support all the encodings it ships with its string literal types. On top of that, there is an important base-level optimization when working with strictly ASCII text that can be implemented with UTF8 which would most library implementers are interested in shipping. This means that the following encodings will be shipped by the standard library:</p>
<pre class="language-cpp highlight"><c- c1>// header: &lt;encoding></c->

<c- k>namespace</c-> <c- n>std</c-> <c- p>{</c-> <c- k>namespace</c-> <c- n>text</c-> <c- p>{</c->

	<c- k>using</c-> <c- n>unicode_code_point</c-> <c- o>=</c-> <c- b>char32_t</c-><c- p>;</c->
	<c- k>class</c-> <c- nc>unicode_scalar_value</c-><c- p>;</c->

	<c- k>template</c-> <c- o>&lt;</c-><c- k>typename</c-> <c- n>CharT</c-><c- o>></c->
	<c- k>class</c-> <c- nc>basic_utf8</c-><c- p>;</c->
	<c- k>template</c-> <c- o>&lt;</c-><c- k>typename</c-> <c- n>CharT</c-><c- o>></c->
	<c- k>class</c-> <c- nc>basic_utf16</c-><c- p>;</c->
	<c- k>template</c-> <c- o>&lt;</c-><c- k>typename</c-> <c- n>CharT</c-><c- o>></c->
	<c- k>class</c-> <c- nc>basic_utf32</c-><c- p>;</c->

	<c- k>template</c-> <c- o>&lt;</c-><c- k>typename</c-> <c- n>Encoding</c-><c- p>,</c->
		<c- n>std</c-><c- o>::</c-><c- n>endian</c-> <c- n>endianness</c-> <c- o>=</c-> <c- n>std</c-><c- o>::</c-><c- n>endian</c-><c- o>::</c-><c- n>native</c-><c- p>,</c->
		<c- k>typename</c-> <c- n>Byte</c-> <c- o>=</c-> <c- n>std</c-><c- o>::</c-><c- n>byte</c-><c- o>></c->
	<c- k>class</c-> <c- nc>encoding_scheme</c-><c- p>;</c->

	<c- k>class</c-> <c- nc>ascii</c-><c- p>;</c->
	<c- k>using</c-> <c- n>utf8</c-> <c- o>=</c-> <c- n>basic_utf8</c-><c- o>&lt;</c-><c- n>char8_t</c-><c- o>></c-><c- p>;</c->
	<c- k>using</c-> <c- n>utf16</c-> <c- o>=</c-> <c- n>basic_utf16</c-><c- o>&lt;</c-><c- b>char16_t</c-><c- o>></c-><c- p>;</c->
	<c- k>using</c-> <c- n>utf32</c-> <c- o>=</c-> <c- n>basic_utf32</c-><c- o>&lt;</c-><c- b>char32_t</c-><c- o>></c-><c- p>;</c->
	<c- k>class</c-> <c- nc>narrow_literal</c-><c- p>;</c->
	<c- k>class</c-> <c- nc>wide_literal</c-><c- p>;</c->
	<c- k>class</c-> <c- nc>narrow_execution</c-><c- p>;</c->
	<c- k>class</c-> <c- nc>wide_execution</c-><c- p>;</c->

<c- p>}}</c->
</pre>
   <p>All of <code class="highlight"><c- n>ascii</c-></code>, <code class="highlight"><c- n>utf8</c-></code>, <code class="highlight"><c- n>utf16</c-></code>, <code class="highlight"><c- n>utf32</c-></code>, <code class="highlight"><c- n>narrow_literal</c-></code>, and <code class="highlight"><c- n>wide_literal</c-></code> correspond directly and obviously to what they name. These six encodings are also <code class="highlight"><c- k>constexpr</c-></code>-capable encodings in that they can be called at compile-time and used inside of contexts with other <code class="highlight"><c- k>constexpr</c-></code> functions, such as within <code class="highlight"><c- k>static_assert</c-></code>s.</p>
   <p>Both <code class="highlight"><c- n>narrow_execution</c-></code> and <code class="highlight"><c- n>wide_execution</c-></code> represent the dynamic locale-based encoding that is used as the default encoding for C library functions. They are key encodings for interoperating with locale-dependent narrow execution encoding data as well as locale-dependent wide execution encoding data. It is imperative the standard ships these because only the implementation knows the runtime narrow or wide execution encoding. <code class="highlight"><c- n>encoding_scheme</c-></code>'s supremely helpful utility is described <a href="#design-low-level-encodings-encoding_scheme">is described below</a>.</p>
   <p>These represent the core 9 encodings must be shipped with the standard, no matter what.</p>
   <p><code class="highlight"><c- n>ascii</c-></code> holds a special place here because it is a direct subset of <code class="highlight"><c- n>utf8</c-></code>. If an individual knows their text is in purely ASCII ahead of time and they work in UTF8, this information can be used to bit-blast (<code class="highlight"><c- n>memcpy</c-></code>) the data from UTF8 to ASCII. It is best the standard is given this ability an not require hundreds of users to remake this very basic functionality in <a href="#design-speed-customization">customization points</a>.</p>
   <h5 class="heading settled" data-level="3.2.3.2" id="design-low-level-encodings-variant"><span class="secno">3.2.3.2. </span><span class="content">UTF Encodings: variants?</span><a class="self-link" href="#design-low-level-encodings-variant"></a></h5>
   <p>There are many variants of encodings like UTF8 and UTF16. These include <a data-link-type="biblio" href="#biblio-wtf8">[wtf8]</a> or <a data-link-type="biblio" href="#biblio-cesu8">[cesu8]</a> and are useful for internal processing and interoperability with certain systems, like direct interfacing with Java or communication with an Oracle database. However, almost none of these are publicly recommend as interchange formats: both CESU-8 and WTF-8 are documented and used internally for legacy reasons. In some cases, they also represent security vulnerabilities if they are used in interchange for the internet. This makes them less and less desirable to provide VIA the standard. However, it is worth acknowledging that supporting WTF-8 and CESU-8 as encodings will ease individuals who need to roll such encodings for their applications.</p>
   <p>More pressingly, there is a wide body of code that operates with <code class="highlight"><c- b>char</c-></code> as the code unit for their UTF8 encodings. This is also subtly wrong, because on a handful of systems <code class="highlight"><c- b>char</c-></code> is not unsigned, but signed. Math and bit characteristics for these types are wrong for the typical operations performed in UTF8 encoders and decoders (and many people -- including Markus Scherer that spends a lot of time with ICU -- just wish <code class="highlight"><c- b>char</c-></code> was unsigned since it would have saved a lot of time from bugs). On one hand, providing variants that allow someone to pick something like the code unit for UTF16 or UTF8 would make it easier to have text types which play nice with the Windows APIs or existing code bases. The interface would look something like this...</p>
<pre class="language-cpp highlight"><c- k>namespace</c-> <c- n>std</c-> <c- p>{</c-> <c- k>namespace</c-> <c- n>text</c-> <c- p>{</c->

	<c- k>template</c-> <c- o>&lt;</c-><c- k>typename</c-> <c- n>CharT</c-><c- p>,</c-> <c- b>bool</c-> <c- n>encode_null</c-><c- p>,</c-> <c- b>bool</c-> <c- n>encode_lone_surrogates</c-><c- o>></c->
	<c- k>class</c-> <c- nc>basic_utf8</c-><c- p>;</c->

	<c- k>using</c-> <c- n>utf8</c-> <c- o>=</c-> <c- n>basic_utf8</c-><c- o>&lt;</c-><c- n>char8_t</c-><c- p>,</c-> false<c- p>,</c-> false<c- o>></c-><c- p>;</c->

	<c- k>template</c-> <c- o>&lt;</c-><c- k>typename</c-> <c- n>CharT</c-><c- p>,</c-> <c- b>bool</c-> <c- n>allow_lone_surrogates</c-><c- o>></c->
	<c- k>class</c-> <c- nc>basic_utf16</c-><c- p>;</c->

	<c- k>using</c-> <c- n>utf16</c-> <c- o>=</c-> <c- n>basic_utf8</c-><c- o>&lt;</c-><c- b>char16_t</c-><c- p>,</c-> false<c- o>></c-><c- p>;</c->

<c- p>}}</c->
</pre>
   <p>And externally, libraries and applications could add their own using statements and type definitions for the purposes of internal interoperation:</p>
<pre class="language-cpp highlight"><c- k>namespace</c-> <c- n>my_app</c-> <c- p>{</c->

	<c- k>using</c-> <c- n>compat_utf8</c-> <c- o>=</c-> <c- n>std</c-><c- o>::</c-><c- n>basic_utf8</c-><c- o>&lt;</c-><c- b>char</c-><c- p>,</c-> false<c- p>,</c-> false<c- o>></c-><c- p>;</c->
	<c- k>using</c-> <c- n>mutf8</c-> <c- o>=</c-> <c- n>std</c-><c- o>::</c-><c- n>basic_utf8</c-><c- o>&lt;</c-><c- n>char8_t</c-><c- p>,</c-> true<c- p>,</c-> false<c- o>></c-><c- p>;</c->
	<c- k>using</c-> <c- n>filesystem16</c-> <c- o>=</c-> <c- n>std</c-><c- o>::</c-><c- n>basic_utf16</c-><c- o>&lt;</c-><c- b>wchar_t</c-><c- p>,</c-> true<c- o>></c-><c- p>;</c->

<c- p>}</c->
</pre>
   <p>There is clear utility that can be had here. But, this is not going to be looked into too deeply for the first iterations of this proposal. If there is a need, users are strongly encouraged to chime in (speak up) quickly so that this feature can be added to the proposal before later progression stages.</p>
   <p>Finally, there is a plan that for early C++26, the full gamut of WHATWG encodings will be added to the standard, since this covers the minimal viable set of encodings that is required for communicating across the internet and through messaging mediums such as e-mail successfully.</p>
   <h5 class="heading settled" data-level="3.2.3.3" id="design-low-level-encodings-encoding_scheme"><span class="secno">3.2.3.3. </span><span class="content">Encoding Schemes: Byte-Based</span><a class="self-link" href="#design-low-level-encodings-encoding_scheme"></a></h5>
   <p>Unicode specifies what are called Encoding Schemes for the encodings whose code unit size exceeds a single byte. This is essentially UTF16 and UTF32, of which there is UTF16 Little Endian (UTF16-LE), UTF16 Big Endian (UTF16-BE), UTF32 Little Endian (UTF32-LE), and UTF32 Big Endian (UTF32-BE). Encoding schemes can be generically handled without creating extremely specific encodings by creating an <code class="highlight"><c- n>encoding_scheme</c-><c- o>&lt;</c-><c- p>...</c-><c- o>></c-></code> template. It will look much like so:</p>
<pre class="language-cpp highlight"><c- c1>// header: &lt;encoding></c->

<c- k>namespace</c-> <c- n>std</c-> <c- p>{</c-> <c- k>namespace</c-> <c- n>text</c-> <c- p>{</c->

	<c- k>template</c-> <c- o>&lt;</c-><c- k>typename</c-> <c- n>Encoding</c-><c- p>,</c->
		<c- n>std</c-><c- o>::</c-><c- n>endian</c-> <c- n>endianness</c-> <c- o>=</c-> <c- n>std</c-><c- o>::</c-><c- n>endian</c-><c- o>::</c-><c- n>native</c-><c- p>,</c->
		<c- k>typename</c-> <c- n>Byte</c-> <c- o>=</c-> <c- n>std</c-><c- o>::</c-><c- n>byte</c-><c- o>></c->
	<c- k>class</c-> <c- nc>encoding_scheme</c-><c- p>;</c->

<c- p>}}</c->
</pre>
   <p>This is a transformative encoding type that takes the source endianness and translates it to the native endianness. It has an identical interface to the <code class="highlight"><c- n>Encoding</c-></code> type passed in, with the caveat that the <code class="highlight"><c- n>code_unit</c-></code> member type is the same as <code class="highlight"><c- n>Byte</c-></code>. The <code class="highlight"><c- n>Byte</c-></code> type being configurable important because there are many interfaces which interoperate using <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>byte</c-></code>, <code class="highlight"><c- b>unsigned</c-> <c- b>char</c-></code>, and <code class="highlight"><c- b>char</c-></code> in the ecosystem. Furthermore, others have realized they can get better performance from their code by avoiding aliasing types altogether and using <code class="highlight"><c- k>enum</c-> <c- nl>octet</c-> <c- p>:</c-> <c- b>unsigned</c-> <c- b>char</c-> <c- p>{};</c-></code> with the necessary definitions to make it usable.</p>
   <p>All <code class="highlight"><c- n>encoding_scheme</c-></code> does is call the same <code class="highlight"><c- n>encode</c-></code> or <code class="highlight"><c- n>decode</c-></code> function with small wrappers around the passed-in ranges that takes bytes and composes them into the internal <code class="highlight"><c- n>encoding_code_unit_t</c-><c- o>&lt;</c-><c- n>Encoding</c-><c- o>></c-></code> type, or when writing out takes an <code class="highlight"><c- n>encoding_code_unit_t</c-><c- o>&lt;</c-><c- n>Encoding</c-><c- o>></c-></code> type and writes it out into its byte-based form.</p>
   <p>A few SG16 members have frequently advocated that the base input and outputs for all types matching the <code class="highlight"><c- n>Encoding</c-></code> concept should be byte-based. This paper disagrees with that supposition and instead goes the route of providing this wrapping encoding scheme. The benefit here is flexibility and independence from byte ordering at the <code class="highlight"><c- n>Encoding</c-></code> level: the <code class="highlight"><c- n>encoding_scheme</c-></code> becomes the layer at which such a concern is both concentrated and isolated. Now, <em>no</em> encoding needs to duplicate its interface at all, while still retaining strong and separately named types that one can perform additional optimization on.</p>
   <p><strong class="advisement"> Writing mostly-duplicate encoding object types for <code class="highlight"><c- n>utf16_be</c-></code>, <code class="highlight"><c- n>utf16_le</c-></code>, and other such shenanigans is a thorough and fundamental waste of everyone’s time.</strong></p>
   <p>This direction is far less boilerplate, and  has also already seen implementation experience in <a data-link-type="biblio" href="#biblio-libogonek">[libogonek]</a>'s <a data-link-type="biblio" href="#biblio-libogonek-encoding_scheme">[libogonek-encoding_scheme]</a> type. Users have not complained. It has also proved to be implementable by simply decomposing the original input/output ranges into their iterators, and wrapping said iterators with a <code class="highlight"><c- n>__detail</c-><c- o>::</c-><c- n>byte_iterator</c-><c- o>&lt;</c-><c- n>OriginalIterator</c-><c- o>></c-></code>. It has worked well.</p>
   <h5 class="heading settled" data-level="3.2.3.4" id="design-low-level-encodings-default"><span class="secno">3.2.3.4. </span><span class="content">Default Encodings</span><a class="self-link" href="#design-low-level-encodings-default"></a></h5>
   <p>For interactions with encodings, there are times when a default encoding may be inferred from input and output types in <a href="#design-high-level">§ 3.3 High Level</a>'s functions. Thusly, 2 traits provide defaults that can be overridden by the program:</p>
<pre class="language-cpp highlight"><c- c1>// header: &lt;encoding></c->

<c- k>namespace</c-> <c- n>std</c-> <c- p>{</c-> <c- k>namespace</c-> <c- n>text</c-> <c- p>{</c->
	<c- k>template</c-> <c- o>&lt;</c-><c- k>typename</c-> <c- n>T</c-><c- o>></c->
	<c- k>using</c-> <c- n>default_code_unit_encoding_t</c-> <c- o>=</c-> <c- d>/* ... */</c-><c- p>;</c->

	<c- k>template</c-> <c- o>&lt;</c-><c- k>typename</c-> <c- n>T</c-><c- o>></c->
	<c- k>using</c-> <c- n>default_code_point_encoding_t</c-> <c- o>=</c-> <c- d>/* ... */</c-><c- p>;</c->
<c- p>}}</c->
</pre>
   <p>The implementation for the standard will attempt to select one of the following, or fail, for <code class="highlight"><c- n>default_code_unit_encoding_t</c-></code>:</p>
   <ul>
    <li data-md>
     <p><code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>execution</c-></code> if <code class="highlight"><c- n>T</c-></code> is (possibly cv-qualified) <code class="highlight"><c- b>char</c-></code>.</p>
    <li data-md>
     <p><code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>wide_execution</c-></code> if <code class="highlight"><c- n>T</c-></code> is (possibly cv-qualified) <code class="highlight"><c- b>wchar_t</c-></code>.</p>
    <li data-md>
     <p><code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>utf8</c-></code> if <code class="highlight"><c- n>T</c-></code> is (possibly cv-qualified) <code class="highlight"><c- n>char8_t</c-></code>.</p>
    <li data-md>
     <p><code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>utf16</c-></code> if <code class="highlight"><c- n>T</c-></code> is (possibly cv-qualified) <code class="highlight"><c- b>char16_t</c-></code>.</p>
    <li data-md>
     <p><code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>utf32</c-></code> if <code class="highlight"><c- n>T</c-></code> is (possibly cv-qualified) <code class="highlight"><c- b>char32_t</c-></code>, <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>unicode_code_point</c-></code>, or <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>unicode_scalar_value</c-></code>.</p>
    <li data-md>
     <p><code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>encoding_scheme</c-><c- o>&lt;</c-><c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>utf8</c-><c- o>></c-></code> if <code class="highlight"><c- n>T</c-></code> is (possibly cv-qualified) <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>byte</c-></code>.</p>
    <li data-md>
     <p>Otherwise, the program is ill-formed.</p>
   </ul>
   <p>For <code class="highlight"><c- n>default_code_point_encoding_t</c-></code>:</p>
   <ul>
    <li data-md>
     <p><code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>utf8</c-></code> if <code class="highlight"><c- n>T</c-></code> is one of (possibly cv-qualified) <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>unicode_code_point</c-></code>,<code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>unicode_scalar_value</c-></code> or <code class="highlight"><c- b>char32_t</c-></code>.</p>
    <li data-md>
     <p>Otherwise, the program is ill-formed.</p>
   </ul>
   <h4 class="heading settled" data-level="3.2.4" id="design-low-level-stateful"><span class="secno">3.2.4. </span><span class="content">Stateful Objects, or Stateful Parameters?</span><a class="self-link" href="#design-low-level-stateful"></a></h4>
   <p>Stateful objects are good for encapsulation, reuse and transportation. They have been proven in many APIs both C and C++ to provide a good, reentrant API with all relevant details captured on the (sometimes opaque) object itself. After careful evaluation, stateful parameter rather than a wholly stateful object for the function calls in encoding and decoding types are a better choice for this low-level interface. The main and important benefits for having the state be passed to the encoding / decoding function calls as a parameter are that it:</p>
   <ul>
    <li data-md>
     <p>maintains that encoding objects can be cheap to construct, copy and move;</p>
    <li data-md>
     <p>improves the general reusability of encoding objects by allowing state to be massaged into certain configurations by users;</p>
    <li data-md>
     <p>and, allows users to set the state in a public way without having to prescribe a specific API for all encoders to do that.</p>
   </ul>
   <p>The reason for keeping encoding types cheap is that they will be constructed, copied, and moved a lot, especially in the face of the ranges that SG16 is going to be putting a lot of work into (<code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>text_view</c-><c- o>&lt;</c-><c- n>View</c-><c- p>,</c-> <c- n>Encoding</c-><c- p>,</c-> <c- p>...</c-><c- o>></c-></code> in a future paper, <code class="highlight"><c- n>normalization_view</c-><c- o>&lt;</c-><c- n>View</c-><c- p>,</c-> <c- n>NormalizationForm</c-><c- o>></c-></code> in a future paper, <code class="highlight"><c- n>decode_view</c-><c- o>&lt;</c-><c- p>...</c-><c- o>>/</c-><c- n>encode_view</c-><c- o>&lt;</c-><c- p>...</c-><c- o>>/</c-><c- n>transcode_view</c-><c- o>&lt;</c-><c- p>...</c-><c- o>></c-></code> in <a href="#design-high-level-ranges">this paper</a>). Ranges require that they can be constructed in (amortized) constant time; this change allows shifting the construction for what may be potentially expensive state to other places by un-bundling them from <code class="highlight"><c- n>Encoding</c-></code> object construction.</p>
   <p>Consider the case of execution encoding character sets today, which often defer to the current locale. Locale is inherently expensive to construct and use: if the standard has to have an encoding that grabs or creates a <code class="highlight"><c- n>codecvt</c-></code> or <code class="highlight"><c- n>locale</c-></code> member, there will be an immediate loss of a large portion of users over the performance drag during construction of higher-level abstractions that rely on the encoding. It is also notable that this is the same <a data-link-type="biblio" href="#biblio-sol2-wstring_convert">mistake std::wstring_convert shipped with</a> and is one of the largest contributing reasons to its lack of use and subsequent deprecation (on top of its poor implementation in almost every standard library, from the VC++ standard library to libc++).</p>
   <p>In contrast, consider having an explicit parameter. At the cost of making a low-level interface take one more argument, the state can be paid for once and reused in many separate places, allowing a user to front-load the state’s expenses up-front. It also allows the users to set or get the locale ahead of time and reuse it consistently. It also allows for encoding or decoding operations to be reused or restart in the cases of interruptible or incomplete streams, such as network reading or I/O buffering. These are potent use cases wherein such a design decision becomes very helpful.</p>
   <p>Finally, this paradigm makes it far more obvious to the end user when the state is inseparable from the encoding object itself. This is the case with a theoretical <code class="highlight"><c- n>any_encoding</c-></code> and <code class="highlight"><c- n>variant_encoding</c-><c- o>&lt;</c-><c- n>Encoding0</c-><c- p>,</c-> <c- n>Encoding1</c-><c- p>,</c-> <c- p>...,</c-> <c- n>EncodingN</c-><c- o>></c-></code>. The necessary state cannot be separated from the encoding object itself: that information is secret in the encoding. A full video exploration of the space can be <a href="https://youtu.be/FQHofyOgQtM?t=1599">found here</a>. In short: there must be a way to ensure that a user can create an encoding that has state that is erased within the current compile-time framework. This is how we afford those encodings the ability to work without imposing undue burden on the entire system. It is easy to check if the <code class="highlight"><c- n>encoding_state_t</c-><c- o>&lt;</c-><c- n>Encoding</c-><c- o>></c-></code> type is the same as the <code class="highlight"><c- n>Encoding</c-></code> type, and if that is the case make slight adjustments.</p>
   <h3 class="heading settled" data-level="3.3" id="design-high-level"><span class="secno">3.3. </span><span class="content">High Level</span><a class="self-link" href="#design-high-level"></a></h3>
   <p>Working with the lower level facilities for text processing is not a pretty sight. Consider the usage of the <a href="#design-low-level">low-level facilities</a> described above:</p>
<pre class="language-cpp highlight"><c- cp>#include</c-> &lt;encoding>
<c- cp>#include</c-> &lt;iterator>
<c- cp>#include</c-> &lt;span>

<c- b>int</c-> <c- nf>main</c-> <c- p>()</c-> <c- p>{</c->
	<c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>unicode_code_point</c-> <c- n>array_output</c-><c- p>[</c-><c- mi>41</c-><c- p>]{};</c->
	<c- n>std</c-><c- o>::</c-><c- n>u8string_view</c-> <c- n>input</c-> <c- o>=</c-> u8<c- s>"𐌼𐌰𐌲 𐌲𐌻𐌴𐍃 𐌹̈𐍄𐌰𐌽, 𐌽𐌹 𐌼𐌹𐍃 𐍅𐌿 𐌽𐌳𐌰𐌽 𐌱𐍂𐌹𐌲𐌲𐌹𐌸."</c-><c- p>;</c->

	<c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>utf8</c-> <c- n>encoding</c-><c- p>{};</c->

	<c- n>std</c-><c- o>::</c-><c- n>u8string_view</c-> <c- n>working_input</c-> <c- o>=</c-> <c- n>input</c-><c- p>;</c->
	<c- n>std</c-><c- o>::</c-><c- n>span</c-><c- o>&lt;</c-><c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>unicode_code_point</c-><c- o>></c-> <c- n>working_output</c-><c- p>(</c-><c- n>array_output</c-><c- p>);</c->
	<c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>default_handler</c-> <c- n>handler</c-><c- p>{};</c->
	<c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>utf8</c-><c- o>::</c-><c- n>state</c-> <c- n>encoding_state</c-><c- p>{};</c->

	<c- k>for</c-> <c- p>(;;)</c-> <c- p>{</c->
		<c- k>auto</c-> <c- n>result</c-> <c- o>=</c-> <c- n>encoding</c-><c- p>.</c-><c- n>decode</c-><c- p>(</c-><c- n>working_input</c-><c- p>,</c-> <c- n>working_output</c-><c- p>,</c->
			<c- n>handler</c-><c- p>,</c-> <c- n>encoding_state</c-><c- p>);</c->
		<c- k>if</c-> <c- p>(</c-><c- n>result</c-><c- p>.</c-><c- n>error_code</c-> <c- o>!=</c-> <c- n>encoding_errc</c-><c- o>::</c-><c- n>ok</c-><c- p>)</c-> <c- p>{</c->
			<c- c1>// not what we wanted.</c->
			<c- k>return</c-> <c- o>-</c-><c- mi>1</c-><c- p>;</c->
		<c- p>}</c->
		<c- k>if</c-> <c- p>(</c-><c- n>std</c-><c- o>::</c-><c- n>empty</c-><c- p>(</c-><c- n>result</c-><c- p>.</c-><c- n>input</c-><c- p>))</c-> <c- p>{</c->
			<c- k>break</c-><c- p>;</c->
		<c- p>}</c->
		<c- n>working_input</c->  <c- o>=</c-> <c- n>std</c-><c- o>::</c-><c- n>move</c-><c- p>(</c-><c- n>result</c-><c- p>.</c-><c- n>input</c-><c- p>);</c->
		<c- n>working_output</c-> <c- o>=</c-> <c- n>std</c-><c- o>::</c-><c- n>move</c-><c- p>(</c-><c- n>result</c-><c- p>.</c-><c- n>output</c-><c- p>);</c->
	<c- p>}</c->

	<c- n>assert</c-><c- p>(</c-><c- n>std</c-><c- o>::</c-><c- n>u32string_view</c-><c- p>(</c-><c- n>array_output</c-><c- p>)</c-> <c- o>==</c-> U<c- s>"𐌼𐌰𐌲 𐌲𐌻𐌴𐍃 𐌹̈𐍄𐌰𐌽, 𐌽𐌹 𐌼𐌹𐍃 𐍅𐌿 𐌽𐌳𐌰𐌽 𐌱𐍂𐌹𐌲𐌲𐌹𐌸."</c-><c- p>);</c->

	<c- k>return</c-> <c- mi>0</c-><c- p>;</c->
<c- p>}</c->
</pre>
   <p>These low-level facilities -- while powerful and customizable -- do not represent what the average user will -- or should -- be wrangling with. Therefore, the higher-level facilities become incredibly pressing to make these interfaces palatable and sustainable for developers in both the short and long term. Consider the same encoding functionality, boiled down to something far easier to use:</p>
<pre class="language-cpp highlight"><c- n>std</c-><c- o>::</c-><c- n>u32string</c-> <c- n>output</c-> <c- o>=</c-> <c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>decode</c-><c- p>(</c->u8<c- s>"𐌼𐌰𐌲 𐌲𐌻𐌴𐍃 𐌹̈𐍄𐌰𐌽, 𐌽𐌹 𐌼𐌹𐍃 𐍅𐌿 𐌽𐌳𐌰𐌽 𐌱𐍂𐌹𐌲𐌲𐌹𐌸."</c-><c- p>);</c->
<c- n>assert</c-><c- p>(</c-><c- n>output</c-> <c- o>==</c-> U<c- s>"𐌼𐌰𐌲 𐌲𐌻𐌴𐍃 𐌹̈𐍄𐌰𐌽, 𐌽𐌹 𐌼𐌹𐍃 𐍅𐌿 𐌽𐌳𐌰𐌽 𐌱𐍂𐌹𐌲𐌲𐌹𐌸."</c-><c- p>);</c->
</pre>
   <p>This is much simpler and does exactly the same as the above, without all the setup and boilerplate. Of course, taking only the input and giving the output is too <strong>much</strong> of a simplification, so there are a few overloads and variants that will be offered. Particularly, there needs to be 3 sets of free functions: <code class="highlight"><c- n>decode</c-></code>/<code class="highlight"><c- n>decode_into</c-></code>, <code class="highlight"><c- n>encode</c-></code>/<code class="highlight"><c- n>encode_into</c-></code>, and <code class="highlight"><c- n>transcode</c-></code>/<code class="highlight"><c- n>transcode_into</c-></code>. These are high-level functions that perform essentially what is shown above, but with numerous overloads that default a few parameters in the case where they can be figured out.</p>
   <p>Note that, at the core of all these functions, the loop as shown above captures the core of the work. All of these abstractions are built on the 7 basis operations specified in <a href="#design-low-level-encodings">§ 3.2.3 The Encoding Object</a>. Actually getting additional optimizations is, of course, left to the readers and implementers.</p>
   <h4 class="heading settled" data-level="3.3.1" id="design-high-level-free"><span class="secno">3.3.1. </span><span class="content">Eager Free Functions</span><a class="self-link" href="#design-high-level-free"></a></h4>
   <p>The free functions are written in a way to eagerly consume input and output space, unless given an explicit output container which limits its behavior or an error occurs. This is beneficial because many text processing algorithms receive the bulk of their gains by being able to work on multiple code units / code points. Therefore, this layer of the high level API is provided to satisfy the need where input and output space are of little concern.</p>
   <h5 class="heading settled" data-level="3.3.1.1" id="design-high-level-free-decode"><span class="secno">3.3.1.1. </span><span class="content">Free Function <code class="highlight"><c- n>decode</c-></code></span><a class="self-link" href="#design-high-level-free-decode"></a></h5>
   <p>The <code class="highlight"><c- n>decode</c-></code> free function provides a High Level API for decoding text. It allows performance with some degree of flexibility and customization through its parameters, as well as additional improvements with the use of some ADL customization points. The core loops behaves as follows:</p>
   <ol start="0">
    <li data-md>
     <p>Performing an <code class="highlight"><c- k>auto</c-> <c- n>result</c-> <c- o>=</c-> <c- n>encoding</c-><c- p>.</c-><c- n>decode_one</c-><c- p>(...)</c-></code> call using the current target input and output views.</p>
    <li data-md>
     <p>Checking if the return value’s error code is <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>encoding_errc</c-><c- o>::</c-><c- n>ok</c-></code>, and returning the result early if it is not.</p>
    <li data-md>
     <p>Checking <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>ranges</c-><c- o>::</c-><c- n>empty</c-><c- p>(</c-><c- n>result</c-><c- p>.</c-><c- n>input</c-><c- p>)</c-></code>, and returning with a result that has <code class="highlight"><c- n>error_code</c-></code> set to <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>encoding_errc</c-><c- o>::</c-><c- n>ok</c-></code> if it is empty.</p>
    <li data-md>
     <p>Otherwise, go to 0 and use the <code class="highlight"><c- n>result</c-><c- p>.</c-><c- n>input</c-></code> and <code class="highlight"><c- n>result</c-><c- p>.</c-><c- n>output</c-></code> views.</p>
   </ol>
   <p>The surface of the <code class="highlight"><c- n>decode</c-></code> API is as follows:</p>
<pre class="language-cpp highlight"><c- c1>// header: &lt;encoding></c->

<c- k>namespace</c-> <c- n>std</c-> <c- p>{</c-> <c- k>namespace</c-> <c- n>text</c-> <c- p>{</c->

	<c- k>template</c-> <c- o>&lt;</c-><c- k>typename</c-> <c- n>Input</c-><c- p>,</c-> <c- k>typename</c-> <c- n>Output</c-><c- p>,</c-> <c- k>typename</c-> <c- n>Encoding</c-><c- p>,</c->
		<c- k>typename</c-> <c- n>State</c-><c- p>,</c-> <c- k>typename</c-> <c- n>ErrorHandler</c-><c- o>></c->
	<c- k>constexpr</c-> <c- k>auto</c-> <c- n>decode_into</c-><c- p>(</c-><c- n>Input</c-><c- o>&amp;&amp;</c-> <c- n>input</c-><c- p>,</c-> <c- n>Encoding</c-><c- o>&amp;&amp;</c-> <c- n>encoding</c-><c- p>,</c-> 
		<c- n>Output</c-><c- o>&amp;&amp;</c-> <c- n>output</c-><c- p>,</c-> <c- n>ErrorHandler</c-><c- o>&amp;&amp;</c-> <c- n>error_handler</c-><c- p>,</c-> <c- n>State</c-><c- o>&amp;</c-> <c- n>state</c-><c- p>);</c->

	<c- k>template</c-> <c- o>&lt;</c-><c- k>typename</c-> <c- n>Input</c-><c- p>,</c-> <c- k>typename</c-> <c- n>Encoding</c-><c- p>,</c-> <c- k>typename</c-> <c- n>Output</c-><c- p>,</c->
		<c- k>typename</c-> <c- n>ErrorHandler</c-><c- o>></c->
	<c- k>constexpr</c-> <c- k>auto</c-> <c- n>decode_into</c-><c- p>(</c-><c- n>Input</c-><c- o>&amp;&amp;</c-> <c- n>input</c-><c- p>,</c-> <c- n>Encoding</c-><c- o>&amp;&amp;</c-> <c- n>encoding</c-><c- p>,</c->
		<c- n>Output</c-><c- o>&amp;&amp;</c-> <c- n>output</c-><c- p>,</c-> <c- n>ErrorHandler</c-><c- o>&amp;&amp;</c-> <c- n>error_handler</c-><c- p>);</c->

	<c- k>template</c-> <c- o>&lt;</c-><c- k>typename</c-> <c- n>Input</c-><c- p>,</c-> <c- k>typename</c-> <c- n>Encoding</c-><c- p>,</c-> <c- k>typename</c-> <c- n>Output</c-><c- o>></c->
	<c- k>constexpr</c-> <c- k>auto</c-> <c- n>decode_into</c-><c- p>(</c-><c- n>Input</c-><c- o>&amp;&amp;</c-> <c- n>input</c-><c- p>,</c-> <c- n>Encoding</c-><c- o>&amp;&amp;</c-> <c- n>encoding</c-><c- p>,</c->
		<c- n>Output</c-><c- o>&amp;&amp;</c-> <c- n>output</c-><c- p>);</c->

	<c- k>template</c-> <c- o>&lt;</c-><c- k>typename</c-> <c- n>Input</c-><c- p>,</c-> <c- k>typename</c-> <c- n>Output</c-><c- o>></c->
	<c- k>constexpr</c-> <c- k>auto</c-> <c- n>decode_into</c-><c- p>(</c-><c- n>Input</c-><c- o>&amp;&amp;</c-> <c- n>input</c-><c- p>,</c-> <c- n>Output</c-><c- o>&amp;&amp;</c-> <c- n>output</c-><c- p>);</c->

	<c- k>template</c-> <c- o>&lt;</c-><c- k>typename</c-> <c- n>Input</c-><c- p>,</c-> <c- k>typename</c-> <c- n>Encoding</c-><c- p>,</c->
		<c- k>typename</c-> <c- n>ErrorHandler</c-><c- p>,</c-> <c- k>typename</c-> <c- n>State</c-><c- o>></c->
	<c- k>constexpr</c-> <c- k>auto</c-> <c- n>decode</c-><c- p>(</c-><c- n>Input</c-><c- o>&amp;&amp;</c-> <c- n>input</c-><c- p>,</c-> <c- n>Encoding</c-><c- o>&amp;&amp;</c-> <c- n>encoding</c-><c- p>,</c->
		<c- n>ErrorHandler</c-><c- o>&amp;&amp;</c-> <c- n>error_handler</c-><c- p>,</c-> <c- n>State</c-><c- o>&amp;</c-> <c- n>state</c-><c- p>);</c->

	<c- k>template</c-> <c- o>&lt;</c-><c- k>typename</c-> <c- n>Input</c-><c- p>,</c-> <c- k>typename</c-> <c- n>Encoding</c-><c- p>,</c->
		<c- k>typename</c-> <c- n>ErrorHandler</c-><c- o>></c->
	<c- k>constexpr</c-> <c- k>auto</c-> <c- n>decode</c-><c- p>(</c-><c- n>Input</c-><c- o>&amp;&amp;</c-> <c- n>input</c-><c- p>,</c-> <c- n>Encoding</c-><c- o>&amp;&amp;</c-> <c- n>encoding</c-><c- p>,</c->
		<c- n>ErrorHandler</c-><c- o>&amp;&amp;</c-> <c- n>error_handler</c-><c- p>);</c->

	<c- k>template</c-> <c- o>&lt;</c-><c- k>typename</c-> <c- n>Input</c-><c- p>,</c-> <c- k>typename</c-> <c- n>Encoding</c-><c- o>></c->
	<c- k>constexpr</c-> <c- k>auto</c-> <c- n>decode</c-><c- p>(</c-><c- n>Input</c-><c- o>&amp;&amp;</c-> <c- n>input</c-><c- p>,</c-> <c- n>Encoding</c-><c- o>&amp;&amp;</c-> <c- n>encoding</c-><c- p>);</c->

	<c- k>template</c-> <c- o>&lt;</c-><c- k>typename</c-> <c- n>Input</c-><c- o>></c->
	<c- k>constexpr</c-> <c- k>auto</c-> <c- n>decode</c-><c- p>(</c-><c- n>Input</c-><c- o>&amp;&amp;</c-> <c- n>input</c-><c- p>);</c->

<c- p>}}</c->
</pre>
   <p>The order of arguments is chosen based on what users are likely to specify first. In many cases, all that is needed is the input: the encoding can be chosen automatically for the user based on such. For <code class="highlight"><c- n>decode</c-></code>, the <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>default_code_unit_encoding_t</c-><c- o>&lt;</c-><c- n>std</c-><c- o>::</c-><c- n>ranges</c-><c- o>::</c-><c- n>range_value_t</c-><c- o>&lt;</c-><c- n>Input</c-><c- o>>></c-></code> encoding type is picked (see <a href="#design-low-level-encodings-default">§ 3.2.3.4 Default Encodings</a>). Otherwise, the user must specify the <code class="highlight"><c- n>encoding</c-></code> object to use themselves. The third parameter is the error handler, which is defaulted to a parameter of type <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>default_handler</c-></code>. The fourth parameter is the state that is used to do the conversion. Given a type <code class="highlight"><c- n>UEncoding</c-></code> which is <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>remove_cvref_t</c-><c- o>&lt;</c-><c- n>Encoding</c-><c- o>></c-></code>, by default, the following is passed:</p>
   <ul>
    <li data-md>
     <p>If <code class="highlight"><c- n>is_encoding_self_state_t</c-><c- o>&lt;</c-><c- n>Encoding</c-><c- o>></c-></code> is true, then <code class="highlight"><c- n>encoding</c-><c- p>.</c-><c- n>reset_state</c-><c- p>();</c-></code> is called and <code class="highlight"><c- n>encoding</c-></code> is passed as the <code class="highlight"><c- n>State</c-><c- o>&amp;</c-></code> parameter to the appropriate overload.</p>
    <li data-md>
     <p>Otherwise, <code class="highlight"><c- n>encoding_state_t</c-><c- o>&lt;</c-><c- n>Encoding</c-><c- o>></c-><c- p>{}</c-></code> is used as the parameter to the appropriate overload.</p>
   </ul>
   <p>The <code class="highlight"><c- n>decode</c-></code> family of functions returns a <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>basic_string</c-><c- o>&lt;</c-><c- n>encoding_code_unit_t</c-><c- o>&lt;</c-><c- n>Encoding</c-><c- o>>></c-></code> after calling <code class="highlight"><c- n>decode_into</c-></code> with a <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>ranges</c-><c- o>::</c-><c- n>unbounded_view</c-><c- o>&lt;</c-><c- n>std</c-><c- o>::</c-><c- n>back_inserter</c-><c- o>&lt;</c-><c- p>...</c-><c- o>>></c-></code> that fills in the <code class="highlight"><c- n>basic_string</c-></code>. <code class="highlight"><c- n>decode_into</c-></code> returns a <code class="highlight"><c- n>decode_result</c-><c- o>&lt;</c-><c- n>Input</c-><c- p>,</c-> <c- n>Output</c-><c- p>,</c-> <c- n>encoding_state_t</c-><c- o>&lt;</c-><c- n>Encoding</c-><c- o>>></c-></code>.</p>
   <p class="note" role="note"><span>Note:</span> in the current running implementation, there are also separate overloads for <code class="highlight"><c- n>decode</c-></code> that take an extra template parameter at the beginning called <code class="highlight"><c- n>Result</c-></code>, which allows the user to write e.g. <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>decode</c-><c- o>&lt;</c-><c- n>std</c-><c- o>::</c-><c- n>vector</c-><c- o>&lt;</c-><c- n>std</c-><c- o>::</c-><c- b>uint32_t</c-><c- o>>></c-><c- p>(...)</c-></code> and similar. It is not included in this proposal right now but will be added later, for the purposes of allowing different output types with the simpler calls.</p>
   <h5 class="heading settled" data-level="3.3.1.2" id="design-high-level-free-encode"><span class="secno">3.3.1.2. </span><span class="content">Free Function <code class="highlight"><c- n>encode</c-></code></span><a class="self-link" href="#design-high-level-free-encode"></a></h5>
   <p>The <code class="highlight"><c- n>encode</c-></code> free function provides a High Level API for decoding text. It allows performance with some degree of flexibility and customization through its parameters, as well as additional improvements with the use of some ADL customization points. The core loop behaves as follows:</p>
   <ol start="0">
    <li data-md>
     <p>Performing an <code class="highlight"><c- k>auto</c-> <c- n>result</c-> <c- o>=</c-> <c- n>encoding</c-><c- p>.</c-><c- n>encode_one</c-><c- p>(...)</c-></code> call using the current target input and output views.</p>
    <li data-md>
     <p>Checking if the return value’s error code is <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>encoding_errc</c-><c- o>::</c-><c- n>ok</c-></code>, and returning the result early if it is not.</p>
    <li data-md>
     <p>Checking <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>ranges</c-><c- o>::</c-><c- n>empty</c-><c- p>(</c-><c- n>result</c-><c- p>.</c-><c- n>input</c-><c- p>)</c-></code>, and returning with a result that has <code class="highlight"><c- n>error_code</c-></code> set to <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>encoding_errc</c-><c- o>::</c-><c- n>ok</c-></code> if it is empty.</p>
    <li data-md>
     <p>Otherwise, go to 0 and use the <code class="highlight"><c- n>result</c-><c- p>.</c-><c- n>input</c-></code> and <code class="highlight"><c- n>result</c-><c- p>.</c-><c- n>output</c-></code> views.</p>
   </ol>
   <p>The surface of the <code class="highlight"><c- n>encode</c-></code> API is as follows:</p>
<pre class="language-cpp highlight"><c- c1>// header: &lt;encoding></c->

<c- k>namespace</c-> <c- n>std</c-> <c- p>{</c-> <c- k>namespace</c-> <c- n>text</c-> <c- p>{</c->

	<c- k>template</c-> <c- o>&lt;</c-><c- k>typename</c-> <c- n>Input</c-><c- p>,</c-> <c- k>typename</c-> <c- n>Output</c-><c- p>,</c-> <c- k>typename</c-> <c- n>Encoding</c-><c- p>,</c->
		<c- k>typename</c-> <c- n>State</c-><c- p>,</c-> <c- k>typename</c-> <c- n>ErrorHandler</c-><c- o>></c->
	<c- k>constexpr</c-> <c- k>auto</c-> <c- n>encode_into</c-><c- p>(</c-><c- n>Input</c-><c- o>&amp;&amp;</c-> <c- n>input</c-><c- p>,</c-> <c- n>Encoding</c-><c- o>&amp;&amp;</c-> <c- n>encoding</c-><c- p>,</c->
		<c- n>Output</c-><c- o>&amp;&amp;</c-> <c- n>output</c-><c- p>,</c-> <c- n>ErrorHandler</c-><c- o>&amp;&amp;</c-> <c- n>error_handler</c-><c- p>,</c-> <c- n>State</c-><c- o>&amp;</c-> <c- n>state</c-><c- p>);</c->

	<c- k>template</c-> <c- o>&lt;</c-><c- k>typename</c-> <c- n>Input</c-><c- p>,</c-> <c- k>typename</c-> <c- n>Encoding</c-><c- p>,</c-> <c- k>typename</c-> <c- n>Output</c-><c- p>,</c->
		<c- k>typename</c-> <c- n>ErrorHandler</c-><c- o>></c->
	<c- k>constexpr</c-> <c- k>auto</c-> <c- n>encode_into</c-><c- p>(</c-><c- n>Input</c-><c- o>&amp;&amp;</c-> <c- n>input</c-><c- p>,</c-> <c- n>Encoding</c-><c- o>&amp;&amp;</c-> <c- n>encoding</c-><c- p>,</c->
		<c- n>Output</c-><c- o>&amp;&amp;</c-> <c- n>output</c-><c- p>,</c-> <c- n>ErrorHandler</c-><c- o>&amp;&amp;</c-> <c- n>error_handler</c-><c- p>);</c->

	<c- k>template</c-> <c- o>&lt;</c-><c- k>typename</c-> <c- n>Input</c-><c- p>,</c-> <c- k>typename</c-> <c- n>Encoding</c-><c- p>,</c-> <c- k>typename</c-> <c- n>Output</c-><c- o>></c->
	<c- k>constexpr</c-> <c- k>auto</c-> <c- n>encode_into</c-><c- p>(</c-><c- n>Input</c-><c- o>&amp;&amp;</c-> <c- n>input</c-><c- p>,</c-> <c- n>Encoding</c-><c- o>&amp;&amp;</c-> <c- n>encoding</c-><c- p>,</c->
		<c- n>Output</c-><c- o>&amp;&amp;</c-> <c- n>output</c-><c- p>);</c->

	<c- k>template</c-> <c- o>&lt;</c-><c- k>typename</c-> <c- n>Input</c-><c- p>,</c-> <c- k>typename</c-> <c- n>Output</c-><c- o>></c->
	<c- k>constexpr</c-> <c- k>auto</c-> <c- n>encode_into</c-><c- p>(</c-><c- n>Input</c-><c- o>&amp;&amp;</c-> <c- n>input</c-><c- p>,</c-> <c- n>Output</c-><c- o>&amp;&amp;</c-> <c- n>output</c-><c- p>);</c->

	<c- k>template</c-> <c- o>&lt;</c-><c- k>typename</c-> <c- n>Input</c-><c- p>,</c-> <c- k>typename</c-> <c- n>Encoding</c-><c- p>,</c->
		<c- k>typename</c-> <c- n>ErrorHandler</c-><c- p>,</c-> <c- k>typename</c-> <c- n>State</c-><c- o>></c->
	<c- k>constexpr</c-> <c- k>auto</c-> <c- n>encode</c-><c- p>(</c-><c- n>Input</c-><c- o>&amp;&amp;</c-> <c- n>input</c-><c- p>,</c-> <c- n>Encoding</c-><c- o>&amp;&amp;</c-> <c- n>encoding</c-><c- p>,</c->
		<c- n>ErrorHandler</c-><c- o>&amp;&amp;</c-> <c- n>error_handler</c-><c- p>,</c-> <c- n>State</c-><c- o>&amp;</c-> <c- n>state</c-><c- p>);</c->

	<c- k>template</c-> <c- o>&lt;</c-><c- k>typename</c-> <c- n>Input</c-><c- p>,</c-> <c- k>typename</c-> <c- n>Encoding</c-><c- p>,</c->
		<c- k>typename</c-> <c- n>ErrorHandler</c-><c- o>></c->
	<c- k>constexpr</c-> <c- k>auto</c-> <c- n>encode</c-><c- p>(</c-><c- n>Input</c-><c- o>&amp;&amp;</c-> <c- n>input</c-><c- p>,</c-> <c- n>Encoding</c-><c- o>&amp;&amp;</c-> <c- n>encoding</c-><c- p>,</c->
		<c- n>ErrorHandler</c-><c- o>&amp;&amp;</c-> <c- n>error_handler</c-><c- p>);</c->

	<c- k>template</c-> <c- o>&lt;</c-><c- k>typename</c-> <c- n>Input</c-><c- p>,</c-> <c- k>typename</c-> <c- n>Encoding</c-><c- o>></c->
	<c- k>constexpr</c-> <c- k>auto</c-> <c- n>encode</c-><c- p>(</c-><c- n>Input</c-><c- o>&amp;&amp;</c-> <c- n>input</c-><c- p>,</c-> <c- n>Encoding</c-><c- o>&amp;&amp;</c-> <c- n>encoding</c-><c- p>);</c->

	<c- k>template</c-> <c- o>&lt;</c-><c- k>typename</c-> <c- n>Input</c-><c- o>></c->
	<c- k>constexpr</c-> <c- k>auto</c-> <c- n>encode</c-><c- p>(</c-><c- n>Input</c-><c- o>&amp;&amp;</c-> <c- n>input</c-><c- p>);</c->

<c- p>}}</c->
</pre>
   <p>For <code class="highlight"><c- n>encode</c-></code>, a default encoding of <code class="highlight"><c- n>default_code_point_encoding_t</c-><c- o>&lt;</c-><c- n>std</c-><c- o>::</c-><c- n>ranges</c-><c- o>::</c-><c- n>range_value_t</c-><c- o>&lt;</c-><c- n>Input</c-><c- o>>></c-></code> (<a href="#design-low-level-encodings-default">§ 3.2.3.4 Default Encodings</a>) is picked when no <code class="highlight"><c- n>Encoding</c-></code> object is provided is provided. For <code class="highlight"><c- n>encode_into</c-></code> -- which takes an output range to write code units into -- the following is done:</p>
   <ul>
    <li data-md>
     <p>If <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>is_same_v</c-><c- o>&lt;</c-><c- k>typename</c-> <c- n>std</c-><c- o>::</c-><c- n>iterator_traits</c-><c- o>&lt;</c-><c- n>std</c-><c- o>::</c-><c- n>ranges</c-><c- o>::</c-><c- n>range_iterator_t</c-><c- o>&lt;</c-><c- n>Output</c-><c- o>>>::</c-><c- n>iterator_category</c-><c- p>,</c-> <c- n>std</c-><c- o>::</c-><c- n>output_iterator_tag</c-><c- o>></c-></code> is false, <code class="highlight"><c- n>default_code_unit_encoding_t</c-><c- o>&lt;</c-><c- n>std</c-><c- o>::</c-><c- n>ranges</c-><c- o>::</c-><c- n>range_value_t</c-><c- o>&lt;</c-><c- n>Output</c-><c- o>>></c-><c- p>{}</c-></code> is used.</p>
    <li data-md>
     <p>Otherwise, if the iterator category of the iterators of the output range are <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>output_iterator_tag</c-></code>s, <code class="highlight"><c- n>default_code_point_encoding_t</c-><c- o>&lt;</c-><c- n>std</c-><c- o>::</c-><c- n>ranges</c-><c- o>::</c-><c- n>range_value_t</c-><c- o>&lt;</c-><c- n>Input</c-><c- o>>></c-><c- p>{}</c-></code> is used.</p>
   </ul>
   <p>Otherwise, the user must specify the <code class="highlight"><c- n>encoding</c-></code> object to use themselves. The third parameter is the error handler, which is defaulted to a parameter of type <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>default_handler</c-></code>. The fourth parameter is the state to be used. If it is not provided, then the following is used:</p>
   <ul>
    <li data-md>
     <p>If <code class="highlight"><c- n>is_encoding_self_state_t</c-><c- o>&lt;</c-><c- n>Encoding</c-><c- o>></c-></code> is true, then <code class="highlight"><c- n>encoding</c-><c- p>.</c-><c- n>reset_state</c-><c- p>();</c-></code> is called and <code class="highlight"><c- n>encoding</c-></code> is passed as the <code class="highlight"><c- n>State</c-><c- o>&amp;</c-></code> parameter to the appropriate overload.</p>
    <li data-md>
     <p>Otherwise, <code class="highlight"><c- n>encoding_state_t</c-><c- o>&lt;</c-><c- n>Encoding</c-><c- o>></c-><c- p>{}</c-></code> is used as the parameter to the appropriate overload.</p>
   </ul>
   <p>The <code class="highlight"><c- n>encode</c-></code> family of functions returns a <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>basic_string</c-><c- o>&lt;</c-><c- n>encoding_code_unit_t</c-><c- o>&lt;</c-><c- n>Encoding</c-><c- o>>></c-></code> after calling <code class="highlight"><c- n>encode_into</c-></code> with a <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>ranges</c-><c- o>::</c-><c- n>unbounded_view</c-><c- o>&lt;</c-><c- n>std</c-><c- o>::</c-><c- n>back_inserter</c-><c- o>&lt;</c-><c- p>...</c-><c- o>>></c-></code> that fills in the <code class="highlight"><c- n>basic_string</c-></code>. <code class="highlight"><c- n>encode_into</c-></code> returns a <code class="highlight"><c- n>encode_result</c-><c- o>&lt;</c-><c- n>Input</c-><c- p>,</c-> <c- n>Output</c-><c- p>,</c-> <c- n>encoding_state_t</c-><c- o>&lt;</c-><c- n>Encoding</c-><c- o>>></c-></code>.</p>
   <p class="note" role="note"><span>Note:</span> in the current running implementation, there are also separate overloads for <code class="highlight"><c- n>encode</c-></code> that take an extra template parameter at the beginning called <code class="highlight"><c- n>Output</c-></code>, which allows the user to write e.g. <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>decode</c-><c- o>&lt;</c-><c- n>std</c-><c- o>::</c-><c- n>vector</c-><c- o>&lt;</c-><c- b>uint8_t</c-><c- o>>></c-><c- p>(...)</c-></code> and similar. It is not included in this proposal right now but will be added later, for the purposes of allowing different output types with the simpler calls.</p>
   <h5 class="heading settled" data-level="3.3.1.3" id="design-high-level-free-transcode"><span class="secno">3.3.1.3. </span><span class="content">Free Function <code class="highlight"><c- n>transcode</c-></code></span><a class="self-link" href="#design-high-level-free-transcode"></a></h5>
   <p>The <code class="highlight"><c- n>transcode</c-></code> free function provides a High Level API for transforming text from one encoding to another. It allows performance with some degree of flexibility and customization through its parameters, as well as additional improvements with the use of some ADL customization points. The core loop behaves as follows:</p>
   <ol start="0">
    <li data-md>
     <p>Performing an <code class="highlight"><c- k>auto</c-> <c- n>d_result</c-> <c- o>=</c-> <c- n>from_encoding</c-><c- p>.</c-><c- n>decode_one</c-><c- p>(...)</c-></code> call using the current input view and an intermediate temporary output of <code class="highlight"><c- n>encoding_code_point_t</c-><c- o>&lt;</c-><c- n>FromEncoding</c-><c- o>></c-> <c- n>intermediate</c-><c- p>[</c-><c- n>FromEncoding</c-><c- o>::</c-><c- n>max_code_points</c-><c- p>];</c-></code>.</p>
    <li data-md>
     <p>Checking if the return value’s error code is <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>encoding_errc</c-><c- o>::</c-><c- n>ok</c-></code>, and returning the result early if it is not.</p>
    <li data-md>
     <p>Performing an <code class="highlight"><c- k>auto</c-> <c- n>e_result</c-> <c- o>=</c-> <c- n>to_encoding</c-><c- p>.</c-><c- n>encode_one</c-><c- p>(...)</c-></code> call using the previous temporary <code class="highlight"><c- n>intermediate</c-></code> output wrapped in a view as the input and the target output view.</p>
    <li data-md>
     <p>Checking if the return value’s error code is <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>encoding_errc</c-><c- o>::</c-><c- n>ok</c-></code>, and returning the result early if it is not.</p>
    <li data-md>
     <p>Checking <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>ranges</c-><c- o>::</c-><c- n>empty</c-><c- p>(</c-><c- n>d_result</c-><c- p>.</c-><c- n>input</c-><c- p>)</c-></code>, and returning with a result that has <code class="highlight"><c- n>error_code</c-></code> set to <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>encoding_errc</c-><c- o>::</c-><c- n>ok</c-></code> if it is empty.</p>
    <li data-md>
     <p>Otherwise, go to 0 and use the <code class="highlight"><c- n>d_result</c-><c- p>.</c-><c- n>input</c-></code> and <code class="highlight"><c- n>e_result</c-><c- p>.</c-><c- n>output</c-></code> views.</p>
   </ol>
   <p>The surface of the <code class="highlight"><c- n>transcode</c-></code> API is as follows:</p>
<pre class="language-cpp highlight"><c- c1>// header: &lt;encoding></c->

<c- k>namespace</c-> <c- n>std</c-> <c- p>{</c-> <c- k>namespace</c-> <c- n>text</c-> <c- p>{</c->

	<c- k>template</c-> <c- o>&lt;</c-><c- k>typename</c-> <c- n>Input</c-><c- p>,</c-> <c- k>typename</c-> <c- n>FromEncoding</c-><c- p>,</c->
		<c- k>typename</c-> <c- n>Output</c-><c- p>,</c-> <c- k>typename</c-> <c- n>ToEncoding</c-><c- p>,</c-> <c- k>typename</c-> <c- n>FromErrorHandler</c-><c- p>,</c->
		<c- k>typename</c-> <c- n>ToErrorHandler</c-><c- p>,</c-> <c- k>typename</c-> <c- n>FromState</c-><c- p>,</c-> <c- k>typename</c-> <c- n>ToState</c-><c- o>></c->
	<c- k>constexpr</c-> <c- k>auto</c-> <c- n>transcode_into</c-><c- p>(</c-><c- n>Input</c-><c- o>&amp;&amp;</c-> <c- n>input</c-><c- p>,</c-> <c- n>FromEncoding</c-><c- o>&amp;&amp;</c-> <c- n>from_encoding</c-><c- p>,</c->
		<c- n>Output</c-><c- o>&amp;&amp;</c-> <c- n>output</c-><c- p>,</c-> <c- n>ToEncoding</c-><c- o>&amp;&amp;</c-> <c- n>to_encoding</c-><c- p>,</c->
		<c- n>FromErrorHandler</c-><c- o>&amp;&amp;</c-> <c- n>from_error_handler</c-><c- p>,</c-> <c- n>ToErrorHandler</c-><c- o>&amp;&amp;</c-> <c- n>to_error_handler</c-><c- p>,</c->
		<c- n>FromState</c-><c- o>&amp;</c-> <c- n>from_state</c-><c- p>,</c-> <c- n>ToState</c-><c- o>&amp;</c-> <c- n>to_state</c-><c- p>);</c->

	<c- k>template</c-> <c- o>&lt;</c-><c- k>typename</c-> <c- n>Input</c-><c- p>,</c-> <c- k>typename</c-> <c- n>FromEncoding</c-><c- p>,</c->
		<c- k>typename</c-> <c- n>Output</c-><c- p>,</c-> <c- k>typename</c-> <c- n>ToEncoding</c-><c- p>,</c-> <c- k>typename</c-> <c- n>FromErrorHandler</c-><c- p>,</c->
		<c- k>typename</c-> <c- n>ToErrorHandler</c-><c- p>,</c-> <c- k>typename</c-> <c- n>FromState</c-><c- o>></c->
	<c- k>constexpr</c-> <c- k>auto</c-> <c- n>transcode_into</c-><c- p>(</c-><c- n>Input</c-><c- o>&amp;&amp;</c-> <c- n>input</c-><c- p>,</c-> <c- n>FromEncoding</c-><c- o>&amp;&amp;</c-> <c- n>from_encoding</c-><c- p>,</c->
		<c- n>Output</c-><c- o>&amp;&amp;</c-> <c- n>output</c-><c- p>,</c-> <c- n>ToEncoding</c-><c- o>&amp;&amp;</c-> <c- n>to_encoding</c-><c- p>,</c->
		<c- n>FromErrorHandler</c-><c- o>&amp;&amp;</c-> <c- n>from_error_handler</c-><c- p>,</c->
		<c- n>ToErrorHandler</c-><c- o>&amp;&amp;</c-> <c- n>to_error_handler</c-><c- p>,</c-> <c- n>FromState</c-><c- o>&amp;</c-> <c- n>from_state</c-><c- p>);</c->

	<c- k>template</c-> <c- o>&lt;</c-><c- k>typename</c-> <c- n>Input</c-><c- p>,</c-> <c- k>typename</c-> <c- n>FromEncoding</c-><c- p>,</c->
		<c- k>typename</c-> <c- n>Output</c-><c- p>,</c-> <c- k>typename</c-> <c- n>ToEncoding</c-><c- p>,</c->
		<c- k>typename</c-> <c- n>FromErrorHandler</c-><c- p>,</c-> <c- k>typename</c-> <c- n>ToErrorHandler</c-><c- o>></c->
	<c- k>constexpr</c-> <c- k>auto</c-> <c- n>transcode_into</c-><c- p>(</c-><c- n>Input</c-><c- o>&amp;&amp;</c-> <c- n>input</c-><c- p>,</c-> <c- n>FromEncoding</c-><c- o>&amp;&amp;</c-> <c- n>from_encoding</c-><c- p>,</c->
		<c- n>Output</c-><c- o>&amp;&amp;</c-> <c- n>output</c-><c- p>,</c-> <c- n>ToEncoding</c-><c- o>&amp;&amp;</c-> <c- n>to_encoding</c-><c- p>,</c->
		<c- n>FromErrorHandler</c-><c- o>&amp;&amp;</c-> <c- n>from_error_handler</c-><c- p>,</c-> <c- n>ToErrorHandler</c-><c- o>&amp;&amp;</c-> <c- n>to_error_handler</c-><c- p>);</c->

	<c- k>template</c-> <c- o>&lt;</c-><c- k>typename</c-> <c- n>Input</c-><c- p>,</c-> <c- k>typename</c-> <c- n>FromEncoding</c-><c- p>,</c->
		<c- k>typename</c-> <c- n>Output</c-><c- p>,</c-> <c- k>typename</c-> <c- n>ToEncoding</c-><c- p>,</c-> <c- k>typename</c-> <c- n>FromErrorHandler</c-><c- o>></c->
	<c- k>constexpr</c-> <c- k>auto</c-> <c- n>transcode_into</c-><c- p>(</c-><c- n>Input</c-><c- o>&amp;&amp;</c-> <c- n>input</c-><c- p>,</c-> <c- n>FromEncoding</c-><c- o>&amp;&amp;</c-> <c- n>from_encoding</c-><c- p>,</c->
		<c- n>Output</c-><c- o>&amp;&amp;</c-> <c- n>output</c-><c- p>,</c-> <c- n>ToEncoding</c-><c- o>&amp;&amp;</c-> <c- n>to_encoding</c-><c- p>,</c->
		<c- n>FromErrorHandler</c-><c- o>&amp;&amp;</c-> <c- n>from_error_handler</c-><c- p>);</c->

	<c- k>template</c-> <c- o>&lt;</c-><c- k>typename</c-> <c- n>Input</c-><c- p>,</c-> <c- k>typename</c-> <c- n>Output</c-><c- p>,</c-> <c- k>typename</c-> <c- n>ToEncoding</c-><c- p>,</c->
		<c- k>typename</c-> <c- n>FromEncoding</c-><c- o>></c->
	<c- k>constexpr</c-> <c- k>auto</c-> <c- n>transcode_into</c-><c- p>(</c-><c- n>Input</c-><c- o>&amp;&amp;</c-> <c- n>input</c-><c- p>,</c-> <c- n>Output</c-><c- o>&amp;&amp;</c-> <c- n>output</c-><c- p>,</c-> 
		<c- n>FromEncoding</c-><c- o>&amp;&amp;</c-> <c- n>encoding</c-><c- p>,</c-> <c- n>ToEncoding</c-><c- o>&amp;&amp;</c-> <c- n>encoding</c-><c- p>);</c->

	<c- k>template</c-> <c- o>&lt;</c-><c- k>typename</c-> <c- n>Input</c-><c- p>,</c-> <c- k>typename</c-> <c- n>Output</c-><c- p>,</c-> <c- k>typename</c-> <c- n>ToEncoding</c-><c- o>></c->
	<c- k>constexpr</c-> <c- k>auto</c-> <c- n>transcode_into</c-><c- p>(</c-><c- n>Input</c-><c- o>&amp;&amp;</c-> <c- n>input</c-><c- p>,</c-> <c- n>Output</c-><c- o>&amp;&amp;</c-> <c- n>output</c-><c- p>,</c-> <c- n>ToEncoding</c-><c- o>&amp;&amp;</c-> <c- n>encoding</c-><c- p>);</c->

	<c- k>template</c-> <c- o>&lt;</c-><c- k>typename</c-> <c- n>Input</c-><c- p>,</c-> <c- k>typename</c-> <c- n>FromEncoding</c-><c- p>,</c->
		<c- k>typename</c-> <c- n>ToEncoding</c-><c- p>,</c-> <c- k>typename</c-> <c- n>FromErrorHandler</c-><c- p>,</c->
		<c- k>typename</c-> <c- n>ToErrorHandler</c-><c- p>,</c-> <c- k>typename</c-> <c- n>FromState</c-><c- p>,</c-> <c- k>typename</c-> <c- n>ToState</c-><c- o>></c->
	<c- k>constexpr</c-> <c- k>auto</c-> <c- n>transcode</c-><c- p>(</c-><c- n>Input</c-><c- o>&amp;&amp;</c-> <c- n>input</c-><c- p>,</c-> <c- n>FromEncoding</c-><c- o>&amp;&amp;</c-> <c- n>from_encoding</c-><c- p>,</c->
		<c- n>ToEncoding</c-><c- o>&amp;&amp;</c-> <c- n>to_encoding</c-><c- p>,</c-> <c- n>FromErrorHandler</c-><c- o>&amp;&amp;</c-> <c- n>from_error_handler</c-><c- p>,</c->
		<c- n>ToErrorHandler</c-><c- o>&amp;&amp;</c-> <c- n>to_error_handler</c-><c- p>,</c-> <c- n>FromState</c-><c- o>&amp;</c-> <c- n>from_state</c-><c- p>,</c->
		<c- n>ToState</c-><c- o>&amp;</c-> <c- n>to_state</c-><c- p>);</c->

	<c- k>template</c-> <c- o>&lt;</c-><c- k>typename</c-> <c- n>Input</c-><c- p>,</c-> <c- k>typename</c-> <c- n>FromEncoding</c-><c- p>,</c->
		<c- k>typename</c-> <c- n>ToEncoding</c-><c- p>,</c-> <c- k>typename</c-> <c- n>FromErrorHandler</c-><c- p>,</c->
		<c- k>typename</c-> <c- n>ToErrorHandler</c-><c- p>,</c-> <c- k>typename</c-> <c- n>FromState</c-><c- o>></c->
	<c- k>constexpr</c-> <c- k>auto</c-> <c- n>transcode</c-><c- p>(</c-><c- n>Input</c-><c- o>&amp;&amp;</c-> <c- n>input</c-><c- p>,</c-> <c- n>FromEncoding</c-><c- o>&amp;&amp;</c-> <c- n>from_encoding</c-><c- p>,</c->
		<c- n>ToEncoding</c-><c- o>&amp;&amp;</c-> <c- n>to_encoding</c-><c- p>,</c-> <c- n>FromErrorHandler</c-><c- o>&amp;&amp;</c-> <c- n>from_error_handler</c-><c- p>,</c->
		<c- n>ToErrorHandler</c-><c- o>&amp;&amp;</c-> <c- n>to_error_handler</c-><c- p>,</c-> <c- n>FromState</c-><c- o>&amp;</c-> <c- n>from_state</c-><c- p>);</c->

	<c- k>template</c-> <c- o>&lt;</c-><c- k>typename</c-> <c- n>Input</c-><c- p>,</c-> <c- k>typename</c-> <c- n>FromEncoding</c-><c- p>,</c->
		<c- k>typename</c-> <c- n>ToEncoding</c-><c- p>,</c-> <c- k>typename</c-> <c- n>FromErrorHandler</c-><c- p>,</c->
		<c- k>typename</c-> <c- n>ToErrorHandler</c-><c- o>></c->
	<c- k>constexpr</c-> <c- k>auto</c-> <c- n>transcode</c-><c- p>(</c-><c- n>Input</c-><c- o>&amp;&amp;</c-> <c- n>input</c-><c- p>,</c-> <c- n>FromEncoding</c-><c- o>&amp;&amp;</c-> <c- n>from_encoding</c-><c- p>,</c->
		<c- n>ToEncoding</c-><c- o>&amp;&amp;</c-> <c- n>to_encoding</c-><c- p>,</c-> <c- n>FromErrorHandler</c-><c- o>&amp;&amp;</c-> <c- n>from_error_handler</c-><c- p>,</c->
		<c- n>ToErrorHandler</c-><c- o>&amp;&amp;</c-> <c- n>to_error_handler</c-><c- p>);</c->

	<c- k>template</c-> <c- o>&lt;</c-><c- k>typename</c-> <c- n>Input</c-><c- p>,</c-> <c- k>typename</c-> <c- n>FromEncoding</c-><c- p>,</c->
		<c- k>typename</c-> <c- n>ToEncoding</c-><c- p>,</c-> <c- k>typename</c-> <c- n>FromErrorHandler</c-><c- o>></c->
	<c- k>constexpr</c-> <c- k>auto</c-> <c- n>transcode</c-><c- p>(</c-><c- n>Input</c-><c- o>&amp;&amp;</c-> <c- n>input</c-><c- p>,</c-> <c- n>FromEncoding</c-><c- o>&amp;&amp;</c-> <c- n>from_encoding</c-><c- p>,</c->
		<c- n>ToEncoding</c-><c- o>&amp;&amp;</c-> <c- n>to_encoding</c-><c- p>,</c-> <c- n>FromErrorHandler</c-><c- o>&amp;&amp;</c-> <c- n>from_error_handler</c-><c- p>);</c->

	<c- k>template</c-> <c- o>&lt;</c-><c- k>typename</c-> <c- n>Input</c-><c- p>,</c-> <c- k>typename</c-> <c- n>ToEncoding</c-><c- p>,</c-> <c- k>typename</c-> <c- n>FromEncoding</c-><c- o>></c->
	<c- k>constexpr</c-> <c- k>auto</c-> <c- n>transcode</c-><c- p>(</c-><c- n>Input</c-><c- o>&amp;&amp;</c-> <c- n>input</c-><c- p>,</c-> <c- n>FromEncoding</c-><c- o>&amp;&amp;</c-> <c- n>encoding</c-><c- p>,</c-> <c- n>ToEncoding</c-><c- o>&amp;&amp;</c-> <c- n>encoding</c-><c- p>);</c->

	<c- k>template</c-> <c- o>&lt;</c-><c- k>typename</c-> <c- n>Input</c-><c- p>,</c-> <c- k>typename</c-> <c- n>ToEncoding</c-><c- o>></c->
	<c- k>constexpr</c-> <c- k>auto</c-> <c- n>transcode</c-><c- p>(</c-><c- n>Input</c-><c- o>&amp;&amp;</c-> <c- n>input</c-><c- p>,</c-> <c- n>ToEncoding</c-><c- o>&amp;&amp;</c-> <c- n>encoding</c-><c- p>);</c->

<c- p>}}</c->
</pre>
   <p>For <code class="highlight"><c- n>transcode</c-></code>, a default encoding of <code class="highlight"><c- n>default_code_point_encoding_t</c-><c- o>&lt;</c-><c- n>std</c-><c- o>::</c-><c- n>ranges</c-><c- o>::</c-><c- n>range_value_t</c-><c- o>&lt;</c-><c- n>Input</c-><c- o>>></c-></code> (<a href="#design-low-level-encodings-default">§ 3.2.3.4 Default Encodings</a>) is picked when no <code class="highlight"><c- n>FromEncoding</c-></code> object is provided is provided. For <code class="highlight"><c- n>transcode_into</c-></code> -- which takes an output range to write code units into -- the following is done:</p>
   <ul>
    <li data-md>
     <p>If <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>is_same_v</c-><c- o>&lt;</c-><c- k>typename</c-> <c- n>std</c-><c- o>::</c-><c- n>iterator_traits</c-><c- o>&lt;</c-><c- n>std</c-><c- o>::</c-><c- n>ranges</c-><c- o>::</c-><c- n>range_iterator_t</c-><c- o>&lt;</c-><c- n>Output</c-><c- o>>>::</c-><c- n>iterator_category</c-><c- p>,</c-> <c- n>std</c-><c- o>::</c-><c- n>output_iterator_tag</c-><c- o>></c-></code> is false, <code class="highlight"><c- n>default_code_point_encoding_t</c-><c- o>&lt;</c-><c- n>std</c-><c- o>::</c-><c- n>ranges</c-><c- o>::</c-><c- n>range_value_t</c-><c- o>&lt;</c-><c- n>Output</c-><c- o>>></c-><c- p>{}</c-></code> is used.</p>
    <li data-md>
     <p>Otherwise, if the iterator category of the iterators of the output range are <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>output_iterator_tag</c-></code>s, <code class="highlight"><c- n>default_code_point_encoding_t</c-><c- o>&lt;</c-><c- n>std</c-><c- o>::</c-><c- n>ranges</c-><c- o>::</c-><c- n>range_value_t</c-><c- o>&lt;</c-><c- n>Input</c-><c- o>>></c-><c- p>{}</c-></code> is used.</p>
   </ul>
   <p>Otherwise, the user must specify the <code class="highlight"><c- n>encoding</c-></code> object to use themselves. The third parameter is the error handler, which is defaulted to a parameter of type <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>default_handler</c-></code>. The fourth parameter is the state to be used. If it is not provided, given a type <code class="highlight"><c- n>UEncoding</c-></code> which is <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>remove_cvref_t</c-><c- o>&lt;</c-><c- n>Encoding</c-><c- o>></c-></code> then the following is used:</p>
   <ul>
    <li data-md>
     <p>If <code class="highlight"><c- n>is_encoding_self_state_t</c-><c- o>&lt;</c-><c- n>Encoding</c-><c- o>></c-></code> is true, then <code class="highlight"><c- n>encoding</c-><c- p>.</c-><c- n>reset_state</c-><c- p>();</c-></code> is called and <code class="highlight"><c- n>encoding</c-></code> is passed as the <code class="highlight"><c- n>State</c-><c- o>&amp;</c-></code> parameter to the appropriate overload.</p>
    <li data-md>
     <p>Otherwise, <code class="highlight"><c- n>encoding_state_t</c-><c- o>&lt;</c-><c- n>Encoding</c-><c- o>></c-><c- p>{}</c-></code> is used as the parameter to the appropriate overload.</p>
   </ul>
   <p>The <code class="highlight"><c- n>transcode</c-></code> family of functions returns a <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>basic_string</c-><c- o>&lt;</c-><c- n>encoding_code_unit_t</c-><c- o>&lt;</c-><c- n>ToEncoding</c-><c- o>>></c-></code> after calling <code class="highlight"><c- n>transcode</c-></code> with a <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>ranges</c-><c- o>::</c-><c- n>unbounded_view</c-><c- o>&lt;</c-><c- n>std</c-><c- o>::</c-><c- n>back_inserter</c-><c- o>&lt;</c-><c- p>...</c-><c- o>>></c-></code> that fills in the <code class="highlight"><c- n>basic_string</c-></code>.</p>
   <p class="note" role="note"><span>Note:</span> in the current running implementation, there are also separate overloads for <code class="highlight"><c- n>transcode</c-></code> that take an extra template parameter at the beginning called <code class="highlight"><c- n>Output</c-></code>, which allows the user to write e.g. <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>transcode</c-><c- o>&lt;</c-><c- n>std</c-><c- o>::</c-><c- n>vector</c-><c- o>&lt;</c-><c- b>uint16_t</c-><c- o>>></c-><c- p>(...)</c-></code> and similar. It is not included in this proposal right now but will be added later, for the purposes of allowing different output types with the simpler calls.</p>
   <h5 class="heading settled" data-level="3.3.1.4" id="design-high-level-free-validate"><span class="secno">3.3.1.4. </span><span class="content">Free Function <code class="highlight"><c- n>validate</c-></code></span><a class="self-link" href="#design-high-level-free-validate"></a></h5>
   <p>The <code class="highlight"><c- n>validate</c-></code> free function provides a High Level API for checking that a range of text is properly in the encoding provided by the user. It’s default core implementation works by:</p>
   <ol start="0">
    <li data-md>
     <p>Performing an <code class="highlight"><c- k>auto</c-> <c- n>result</c-> <c- o>=</c-> <c- n>encoding</c-><c- p>.</c-><c- n>decode_one</c-><c- p>(...)</c-></code> call on the input into an intermediate buffer.</p>
    <li data-md>
     <p>Checking if an error occurred, and returning failure if so.</p>
    <li data-md>
     <p>Performing an <code class="highlight"><c- k>auto</c-> <c- n>intermediate_result</c-> <c- o>=</c-> <c- n>encoding</c-><c- p>.</c-><c- n>encode_one</c-><c- p>(...)</c-></code> call on a view wrapping the intermediate buffer to the output.</p>
    <li data-md>
     <p>Checking if an error occurred, and returning failure if so.</p>
    <li data-md>
     <p>Performing a <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>equals</c-></code> call on the final result, comparing it to the original input consumed.</p>
    <li data-md>
     <p>If it is not equal, return failure.</p>
    <li data-md>
     <p>If <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>ranges</c-><c- o>::</c-><c- n>empty</c-><c- p>(</c-><c- n>result</c-><c- p>.</c-><c- n>input</c-><c- p>);</c-></code>, return true.</p>
    <li data-md>
     <p>Go to 0.</p>
   </ol>
   <p>The function signature for <code class="highlight"><c- n>validate</c-></code> is a little different than the above functions that actually do the transcoding. Specifically, this function needs 2 states, one for the <code class="highlight"><c- n>decode_one</c-></code> call and one for the <code class="highlight"><c- n>encode_one</c-></code> call. This is problematic for <a href="#design-low-level-stateful">potential stateful encodings</a>, but for most other encodings this is fine.</p>
<pre class="language-cpp highlight"><c- c1>// header: &lt;encoding></c->

<c- k>namespace</c-> <c- n>std</c-> <c- p>{</c-> <c- k>namespace</c-> <c- n>text</c-> <c- p>{</c->

	<c- k>template</c-> <c- o>&lt;</c-><c- k>typename</c-> <c- n>Input</c-><c- p>,</c-> <c- k>typename</c-> <c- n>Encoding</c-><c- p>,</c->
		<c- k>typename</c-> <c- n>DecodeState</c-><c- p>,</c-> <c- k>typename</c-> <c- n>EncodeState</c-><c- o>></c->
	<c- k>constexpr</c-> <c- k>auto</c-> <c- n>validate</c-><c- p>(</c-><c- n>Input</c-><c- o>&amp;&amp;</c-> <c- n>input</c-><c- p>,</c-> <c- n>Encoding</c-><c- o>&amp;&amp;</c-> <c- n>encoding</c-><c- p>,</c->
		<c- n>DecodeState</c-><c- o>&amp;</c-> <c- n>decode_state</c-><c- p>,</c-> <c- n>EncodeState</c-><c- o>&amp;</c-> <c- n>encode_state</c-><c- p>);</c->

	<c- k>template</c-> <c- o>&lt;</c-><c- k>typename</c-> <c- n>Input</c-><c- p>,</c-> <c- k>typename</c-> <c- n>Encoding</c-><c- p>,</c->
		<c- k>typename</c-> <c- n>DecodeState</c-><c- o>></c->
	<c- k>constexpr</c-> <c- k>auto</c-> <c- n>validate</c-><c- p>(</c-><c- n>Input</c-><c- o>&amp;&amp;</c-> <c- n>input</c-><c- p>,</c-> <c- n>Encoding</c-><c- o>&amp;&amp;</c-> <c- n>encoding</c-><c- p>,</c-> <c- n>DecodeState</c-><c- o>&amp;</c-> <c- n>decode_state</c-><c- p>);</c->

	<c- k>template</c-> <c- o>&lt;</c-><c- k>typename</c-> <c- n>Input</c-><c- p>,</c-> <c- k>typename</c-> <c- n>Encoding</c-><c- o>></c->
	<c- k>constexpr</c-> <c- b>bool</c-> <c- n>validate</c-><c- p>(</c-><c- n>Input</c-><c- o>&amp;&amp;</c-> <c- n>input</c-><c- p>,</c-> <c- n>Encoding</c-><c- o>&amp;&amp;</c-> <c- n>encoding</c-><c- p>);</c->

	<c- k>template</c-> <c- o>&lt;</c-><c- n>typename_Input</c-><c- o>></c->
	<c- k>constexpr</c-> <c- b>bool</c-> <c- n>validate</c-><c- p>(</c-><c- n>Input</c-><c- o>&amp;&amp;</c-> <c- n>input</c-><c- p>);</c->

<c- p>}}</c->
</pre>
   <p>The order of arguments is chosen based on what users are likely to specify first. In many cases, all that is needed is the input: the encoding can be chosen automatically for the user based on such. For <code class="highlight"><c- n>validate</c-></code>, the <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>default_code_unit_encoding_t</c-><c- o>&lt;</c-><c- n>std</c-><c- o>::</c-><c- n>ranges</c-><c- o>::</c-><c- n>range_value_t</c-><c- o>&lt;</c-><c- n>Input</c-><c- o>>></c-></code> encoding type is picked (see <a href="#design-low-level-encodings-default">§ 3.2.3.4 Default Encodings</a>). Otherwise, the user must specify the <code class="highlight"><c- n>encoding</c-></code> object to use themselves. The third parameter is the state, which is passed as follows:</p>
   <ul>
    <li data-md>
     <p>If <code class="highlight"><c- n>is_encoding_self_state_t</c-><c- o>&lt;</c-><c- n>Encoding</c-><c- o>></c-></code> is true, then <code class="highlight"><c- n>encoding</c-><c- p>.</c-><c- n>reset_state</c-><c- p>();</c-></code> is called and <code class="highlight"><c- n>encoding</c-></code> is passed as the <code class="highlight"><c- n>State</c-><c- o>&amp;</c-></code> parameter to the appropriate overload.</p>
    <li data-md>
     <p>Otherwise, <code class="highlight"><c- n>encoding_state_t</c-><c- o>&lt;</c-><c- n>Encoding</c-><c- o>></c-><c- p>{}</c-></code> is used as the parameter to the appropriate overload.</p>
   </ul>
   <p>Interestingly, we come to a conundrum here with "self-referential" encodings. We cannot use the <code class="highlight"><c- n>encoding</c-></code> a second time and call <code class="highlight"><c- p>.</c-><c- n>reset_state</c-><c- p>()</c-></code> on it again, nor can we create one from thin air. This means that for <code class="highlight"><c- n>any_encoding</c-></code>/<code class="highlight"><c- n>variant_encoding</c-></code>-style encodings which contain their own state / are stateful, this function will <code class="highlight"><c- k>static_assert</c-><c- p>(...)</c-></code> if it cannot work out. There are also hooks as detailed in <a href="#design-speed-customization-free_validation_count">§ 3.4.1.3 Customizability: Validating and Counting Free Functions</a>.</p>
   <h5 class="heading settled" data-level="3.3.1.5" id="design-high-level-free-count"><span class="secno">3.3.1.5. </span><span class="content">Free Functions <code class="highlight"><c- n>decode_count</c-></code> and <code class="highlight"><c- n>encode_count</c-></code></span><a class="self-link" href="#design-high-level-free-count"></a></h5>
   <p>This proposal will not spoon feed the reader everything: the <code class="highlight"><c- n>decode_count</c-></code> and <code class="highlight"><c- n>encode_count</c-></code> functions will be left as an exercise to the reader. (Hint: it’s not much different from how the actual encode or decode core default is implemented.)</p>
   <h4 class="heading settled" data-level="3.3.2" id="design-high-level-safety"><span class="secno">3.3.2. </span><span class="content">Safety with the Free Functions</span><a class="self-link" href="#design-high-level-safety"></a></h4>
   <p>The second problem is the ability to _lose_ data due to not using lossless encodings. For example, most legacy encodings are lossy when it comes to code points and graphemes outside of their traditional reservoir (e.g., trying to handle Chinese scripts with a latin-1 encoding). Trying to properly encode between these myriad of encodings leaves room for losing information. Even for Wide Character Locale-based (<code class="highlight"><c- b>wchar_t</c-></code>) data, the only standard transformation to get to UTF32 text requires translating through the normal Character Locale-based (<code class="highlight"><c- b>char</c-></code>) functions first, leading to loss of information and mojibake (see <a data-link-type="biblio" href="#biblio-n2440">A C paper for additional transcoding utilities</a>).</p>
   <p>Therefore, an error at compile-time is wanted if a user uses the above high-level free functions, but does not explicitly specify an error handler in the case where a conversion is lossy. Taking an example <a href="https://youtu.be/BdUipluIf1E?t=2425">from this presentation</a>, this puppy emoji cannot fit in ASCII. In general, most Unicode Code Points cannot fit in an ASCII string: this is a dangerous conversion! So, unless you use a non-default error handler, the library will <code class="highlight"><c- k>static_assert</c-></code> or perform other shenanigans to loudly complain at compile-time:</p>
<pre class="language-cpp highlight"><c- b>int</c-> <c- nf>main</c-> <c- p>(</c-><c- b>int</c-><c- p>,</c-> <c- b>char</c-><c- o>*</c-><c- p>[])</c-> <c- p>{</c->
	<c- c1>// Compiler Error: lossy encoding, specify non-default error handler</c->
	<c- n>std</c-><c- o>::</c-><c- n>string</c-> <c- n>ascii_emoji0</c-> <c- o>=</c-> <c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>encode</c-><c- p>(</c-><c- n>U</c->“🐶”<c- p>,</c-> <c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>ascii</c-><c- p>{});</c->

	<c- c1>// Compiler Error: lossy encoding, specify non-default error handler</c->
	<c- n>std</c-><c- o>::</c-><c- n>string</c-> <c- n>ascii_emoji1</c-> <c- o>=</c-> <c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>encode</c-><c- p>(</c-><c- n>U</c->“🐶”<c- p>,</c-> <c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>ascii</c-><c- p>{},</c->
		<c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>default_handler</c-><c- p>{});</c->

	<c- c1>// Okay: you asked for it!</c->
	<c- n>std</c-><c- o>::</c-><c- n>string</c-> <c- n>ascii_emoji2</c-> <c- o>=</c-> <c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>encode</c-><c- p>(</c-><c- n>U</c->“🐶”<c- p>,</c-> <c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>ascii</c-><c- p>{},</c->
		<c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>replacement_handler</c-><c- p>{});</c->
	<c- c1>// ascii_emoji2 contains '?'</c->

	<c- c1>// Okay: undefined behavior, but you asked for it.</c->
	<c- n>std</c-><c- o>::</c-><c- n>string</c-> <c- n>ascii_emoji3</c-> <c- o>=</c-> <c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>encode</c-><c- p>(</c-><c- n>U</c->“🐶”<c- p>,</c-> <c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>ascii</c-><c- p>{},</c->
		<c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>assume_valid_handler</c-><c- p>{});</c->
	<c- c1>// ascii_emoji3 has no guarantees</c->
	<c- c1>// at this point: undefined behaivor was invoked!</c->
<c- p>}</c->
</pre>
   <h4 class="heading settled" data-level="3.3.3" id="design-high-level-ranges"><span class="secno">3.3.3. </span><span class="content">Improving Usability for Low-Memory Environments: Ranges</span><a class="self-link" href="#design-high-level-ranges"></a></h4>
   <p>One of the biggest problems with <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>encode</c-><c- p>(</c-><c- n>_into</c-><c- p>)</c-></code>, <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>decode</c-><c- p>(</c-><c- n>_into</c-><c- p>)</c-></code>, and <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>transcode</c-><c- p>(</c-><c- n>_into</c-><c- p>)</c-></code> is exactly their eager consumption. The defaults for these APIs will create owning containers by default of <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>basic_string</c-><c- o>&lt;</c-><c- n>code_unit</c-><c- o>></c-></code>/<code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>basic_string</c-><c- o>&lt;</c-><c- n>code_point</c-><c- o>></c-></code> and fill them up as much as they possibly can. This makes these High Level free functions untenable for users in memory-constrained environments. The C++ standard is meant to serve everyone, both high-performance _and_ memory-constrained environments. Therefore, lazy ranges are required to provide low-footprint encode, decode, and transcode operations to everyone.</p>
   <p>Most importantly, wrappers around other ranges are employed here. This is important: nobody has time to rewrite all of this functionality just because the API strongly mixed <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>basic_string_view</c-></code> concerns with encoding concerns. There are spans, string views, and other things outside of the standard that are perfectly suitable for iterating over code units: excluding them by not having this be a wrapper type is a non-starter for getting these abstractions wide adoption in the ecosystem.</p>
   <h5 class="heading settled" data-level="3.3.3.1" id="design-high-level-ranges-decode"><span class="secno">3.3.3.1. </span><span class="content"><code class="highlight"><c- n>decode_view</c-></code> and <code class="highlight"><c- n>decode_iterator</c-></code></span><a class="self-link" href="#design-high-level-ranges-decode"></a></h5>
   <p><code class="highlight"><c- n>decode_view</c-><c- o>&lt;</c-><c- n>Encoding</c-><c- p>,</c-> <c- n>Range</c-><c- p>,</c-> <c- n>ErrorHandler</c-><c- p>,</c-> <c- n>State</c-><c- o>></c-></code> is a templated type that takes the for loop found in <a href="#design-high-level">§ 3.3 High Level</a> and turns it into a one-by-one, iterative process that produces iterators as powerful as the iterator category/concept of the <code class="highlight"><c- n>Range</c-></code> type it is supplied with. It is also meant to work with <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>reference_wrapper</c-></code>s of <code class="highlight"><c- n>Encoding</c-></code>, <code class="highlight"><c- n>Range</c-></code>, <code class="highlight"><c- n>ErrorHandler</c-></code> and <code class="highlight"><c- n>State</c-></code> types (to allow views to be instantiated over pre-existing Encodings and Ranges and used to make algorithms work). <code class="highlight"><c- n>decode_iterator</c-><c- o>&lt;</c-><c- n>Encoding</c-><c- p>,</c-> <c- n>Range</c-><c- p>,</c-> <c- n>ErrorHandler</c-><c- p>,</c-> <c- n>State</c-><c- o>></c-></code> is also specified as well:</p>
<pre class="language-cpp highlight"><c- c1>// header: &lt;encoding></c->

<c- k>namespace</c-> <c- n>std</c-> <c- p>{</c-> <c- k>namespace</c-> <c- n>text</c-> <c- p>{</c->

	<c- k>template</c-> <c- o>&lt;</c-><c- k>typename</c-> <c- n>_Encoding</c-><c- p>,</c->
		<c- k>typename</c-> <c- n>Range</c-> <c- o>=</c-> <c- n>basic_string_view</c-><c- o>&lt;</c-><c- n>encoding_code_unit_t</c-><c- o>&lt;</c-><c- n>_Encoding</c-><c- o>>></c-><c- p>,</c->
		<c- k>typename</c-> <c- n>ErrorHandler</c-> <c- o>=</c-> <c- n>default_handler</c-><c- p>,</c->
		<c- k>typename</c-> <c- n>State</c-> <c- o>=</c-> <c- n>encoding_state_t</c-><c- o>&lt;</c-><c- n>_Encoding</c-><c- o>>></c->
	<c- k>class</c-> <c- nc>decode_iterator</c-><c- p>;</c->

	<c- k>template</c-> <c- o>&lt;</c-><c- k>typename</c-> <c- n>_Encoding</c-><c- p>,</c->
		<c- k>typename</c-> <c- n>Range</c-> <c- o>=</c-> <c- n>basic_string_view</c-><c- o>&lt;</c-><c- n>encoding_code_unit_t</c-><c- o>&lt;</c-><c- n>Encoding</c-><c- o>>></c-><c- p>,</c->
		<c- k>typename</c-> <c- n>ErrorHandler</c-> <c- o>=</c-> <c- n>default_handler</c-><c- p>,</c->
		<c- k>typename</c-> <c- n>State</c-> <c- o>=</c-> <c- n>encoding_state_t</c-><c- o>&lt;</c-><c- n>_Encoding</c-><c- o>>></c->
	<c- k>class</c-> <c- nc>decode_view</c-> <c- p>{</c->
	<c- k>public</c-><c- o>:</c->
		<c- k>using</c-> <c- n>iterator</c->            <c- o>=</c-> <c- n>decode_iterator</c-><c- o>&lt;</c-><c- n>Encoding</c-><c- p>,</c-> <c- n>Range</c-><c- p>,</c-> 
		                                            <c- n>ErrorHandler</c-><c- p>,</c-> <c- n>State</c-><c- o>></c-><c- p>;</c->
		<c- k>using</c-> <c- n>sentinel</c->            <c- o>=</c-> <c- n>decode_sentinel</c-><c- p>;</c->
		<c- k>using</c-> <c- n>range_type</c->          <c- o>=</c-> <c- n>Range</c-><c- p>;</c->
		<c- k>using</c-> <c- n>encoding_type</c->       <c- o>=</c-> <c- n>Encoding</c-><c- p>;</c->
		<c- k>using</c-> <c- n>error_handler_type</c->  <c- o>=</c-> <c- n>ErrorHandler</c-><c- p>;</c->
		<c- k>using</c-> <c- n>encoding_state_type</c-> <c- o>=</c-> <c- n>encoding_state_t</c-><c- o>&lt;</c-><c- n>encoding_type</c-><c- o>></c-><c- p>;</c->

		<c- k>constexpr</c-> <c- nf>decode_view</c-><c- p>(</c-><c- n>range_type</c-> <c- n>range</c-><c- p>)</c-> <c- k>noexcept</c-><c- p>;</c->

		<c- k>constexpr</c-> <c- nf>decode_view</c-><c- p>(</c-><c- n>range_type</c-> <c- n>range</c-><c- p>,</c-> <c- n>encoding_type</c-> <c- n>encoding</c-><c- p>)</c-> <c- k>noexcept</c-><c- p>;</c->

		<c- k>constexpr</c-> <c- nf>decode_view</c-><c- p>(</c-><c- n>range_type</c-> <c- n>range</c-><c- p>,</c-> <c- n>encoding_type</c-> <c- n>encoding</c-><c- p>,</c->
			<c- n>error_handler_type</c-> <c- n>error_handler</c-><c- p>)</c-> <c- k>noexcept</c-><c- p>;</c->

		<c- k>constexpr</c-> <c- nf>decode_view</c-><c- p>(</c-><c- n>range_type</c-> <c- n>range</c-><c- p>,</c-> <c- n>encoding_type</c-> <c- n>encoding</c-><c- p>,</c->
			<c- n>error_handler_type</c-> <c- n>error_handler</c-><c- p>,</c-> <c- n>encoding_state_type</c-> <c- n>state</c-><c- p>)</c-> <c- k>noexcept</c-><c- p>;</c->

		<c- k>constexpr</c-> <c- nf>decode_view</c-><c- p>(</c-><c- n>iterator</c-> <c- n>it</c-><c- p>)</c-> <c- k>noexcept</c-><c- p>;</c->

		<c- k>constexpr</c-> <c- n>iterator</c-> <c- nf>begin</c-><c- p>()</c-> <c- k>const</c-><c- o>&amp;</c-> <c- k>noexcept</c-><c- p>;</c->
		<c- k>constexpr</c-> <c- n>iterator</c-> <c- nf>begin</c-><c- p>()</c-> <c- o>&amp;&amp;</c-> <c- k>noexcept</c-><c- p>;</c->

		<c- k>constexpr</c-> <c- n>sentinel</c-> <c- nf>end</c-><c- p>()</c-> <c- k>const</c-> <c- k>noexcept</c-><c- p>;</c->

		<c- k>friend</c-> <c- k>constexpr</c-> <c- n>decode_view</c-> <c- nf>reconstruct</c-><c- p>(</c-><c- o>::</c-><c- n>std</c-><c- o>::</c-><c- n>in_place_type_t</c-><c- o>&lt;</c-><c- n>decode_view</c-><c- o>></c-><c- p>,</c->
			<c- n>iterator</c-> <c- n>it</c-><c- p>,</c-> <c- n>sentinel</c-><c- p>)</c-> <c- k>noexcept</c-><c- p>;</c->
	<c- p>};</c->
<c- p>}}</c->
</pre>
   <p>The <code class="highlight"><c- n>decode_iterator</c-></code> produces a <code class="highlight"><c- n>value_type</c-></code> of <code class="highlight"><c- n>encoding_code_point_t</c-><c- o>&lt;</c-><c- n>Encoding</c-><c- o>></c-></code>. It keeps track of how many code points are generated by a call to <code class="highlight"><c- n>encoding</c-><c- p>.</c-><c- n>decode_one</c-></code>, and iterates through however many are present, before calling <code class="highlight"><c- n>encoding</c-><c- p>.</c-><c- n>decode_one</c-></code> again to obtain the next values.</p>
   <p>In the case of errors, the standard has a number of well-defined behaviors that prevent the need to add a <code class="highlight"><c- p>.</c-><c- n>is_valid</c-><c- p>()</c-></code> check to the view type, or to provide a <code class="highlight"><c- n>expected</c-></code>-like wrapper for the <code class="highlight"><c- n>value_type</c-></code>:</p>
   <ul>
    <li data-md>
     <p><code class="highlight"><c- n>default_handler</c-></code>/<code class="highlight"><c- n>replacement_handler</c-></code>: provides replacement characters, which will be inserted into the iteration stream. Errors do not escape and are shown as replacement characters. This works fine.</p>
    <li data-md>
     <p><code class="highlight"><c- n>throw_handler</c-></code>: throws on an error, exceptions escape the <code class="highlight"><c- o>++</c-><c- n>it</c-></code> and <code class="highlight"><c- o>*</c-><c- n>it</c-></code> calls. This works fine.</p>
    <li data-md>
     <p><code class="highlight"><c- n>assume_valid_handler</c-></code>: user was already invoking UB if errors were hit. This works "fine" (the user asked for it).</p>
   </ul>
   <p>Therefore, the only error case wherein <code class="highlight"><c- n>decode_view</c-></code> and <code class="highlight"><c- n>decode_iterator</c-></code> perform badly is when the error handler is one which passes through the error without doing anything with the error information with the expectation that the user handles it. The user would be unable to handle it in this case with the custom error handler. There are a few ways to deal with this situation: the first would be to restrict the allowed error handlers into the range and iterator types to Standard Sanctioned™ types. The other would be to just throw hands up when the user passes in an error handler that does not properly throw, massage, or handler errors in an appropriate fashion. This proposal currently advocates the latter: passing an error handler to the 4th template parameter is an extreme amount of buy in. If users have gone this far, they must want a very specific custom behavior. Implementations will be encouraged to add asserts to trap users who have poor behavior, but otherwise leave it undefined behavior if errors are not handled for iterator and range types.</p>
   <p class="note" role="note"><span>Note:</span> This differs from how Tom Honermann’s <code class="highlight"><c- n>text_view</c-></code> and similar behaved. That library returned Boost.Outcome/<code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>expected</c-></code>/<code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>optional</c-></code>-like result types that one had to further dereference to get to the code points. This represented an ergonomics and a composability problem, because a further transformation step to dereference was always required.</p>
   <p>A third option is returning a special type which holds the <code class="highlight"><c- n>decode_one</c-></code> result and has an implicit conversion to the <code class="highlight"><c- n>code_point</c-></code> type. It could throw on a conversion where there is an error. This is design choice has some serious limitations because it makes <code class="highlight"><c- k>auto</c-></code> dangerous to use for casual users due to the nature of "magical proxy types". It also forces a throwing of the error on end users, which forces a choice that invalidates the need of environments where exceptions do not exist or are prohibitively expensive.</p>
   <p class="note" role="note"><span>Note:</span> It is recognized that the Standard does not bless such implementations. This proposal does not care: the needs of C++'s users greatly outweighs the theoretical purity of the C++ abstract machine where the cost of all things is equal and does not matter. The standard’s preferred error handling method has a non-zero cost (particularly in binary size) to simply exist that have not been fully optimized into a "do not pay for what you do not use" state. Furthermore, it is still extremely dubious to throw-by-default on any ill-formed text for reasons mentioned above. Therefore, directions wherein the default is equivalent to throwing are not preferred at this time.</p>
   <h5 class="heading settled" data-level="3.3.3.2" id="design-high-level-ranges-encode"><span class="secno">3.3.3.2. </span><span class="content"><code class="highlight"><c- n>encode_view</c-></code> and <code class="highlight"><c- n>encode_iterator</c-></code></span><a class="self-link" href="#design-high-level-ranges-encode"></a></h5>
   <p>This is identical to <a href="#design-high-level-ranges-decode">§ 3.3.3.1 decode_view and decode_iterator</a>, except the name of the view and iterator are <code class="highlight"><c- n>transcode_view</c-></code> and <code class="highlight"><c- n>transcode_iterator</c-></code>, respectively as well as a few other minor changes.</p>
   <ul>
    <li data-md>
     <p>The <code class="highlight"><c- n>Range</c-></code> template parameter is defaulted to <code class="highlight"><c- n>basic_string_view</c-><c- o>&lt;</c-><c- n>encoding_code_point_t</c-><c- o>&lt;</c-><c- n>_Encoding</c-><c- o>>></c-></code>.</p>
    <li data-md>
     <p>The <code class="highlight"><c- n>encod_view</c-></code> view itself produces code units (e.g., <code class="highlight"><c- n>value_type</c-></code> is <code class="highlight"><c- n>encoding_code_unit_t</c-><c- o>&lt;</c-><c- n>Encoding</c-><c- o>></c-></code> rather than code points), one at a time, of the <code class="highlight"><c- n>Encoding</c-></code> by using <code class="highlight"><c- n>encoding</c-><c- p>.</c-><c- n>encode_one</c-></code>.</p>
   </ul>
   <p>Everything else is identical in nature to <code class="highlight"><c- n>decode_view</c-></code>.</p>
   <h5 class="heading settled" data-level="3.3.3.3" id="design-high-level-ranges-transcode"><span class="secno">3.3.3.3. </span><span class="content"><code class="highlight"><c- n>transcode_view</c-></code> and <code class="highlight"><c- n>transcode_iterator</c-></code></span><a class="self-link" href="#design-high-level-ranges-transcode"></a></h5>
   <p>This is mostly identical to <a href="#design-high-level-ranges-decode">§ 3.3.3.1 decode_view and decode_iterator</a>, though there are more apparent changes here.</p>
   <ul>
    <li data-md>
     <p>The name of the view and iterator types are <code class="highlight"><c- n>transcode_view</c-></code> and <code class="highlight"><c- n>transcode_iterator</c-></code>, respectively.</p>
    <li data-md>
     <p>The template parameters are modified to take a <code class="highlight"><c- n>ToEncoding</c-></code> and a <code class="highlight"><c- n>FromEncoding</c-></code>, a <code class="highlight"><c- n>ToErrorHandler</c-></code> and a <code class="highlight"><c- n>FromErrorHandler</c-></code>, and finally a <code class="highlight"><c- n>ToState</c-></code> and <code class="highlight"><c- n>FromState</c-></code>.</p>
    <li data-md>
     <p>The <code class="highlight"><c- n>Range</c-></code> template parameter is defaulted to <code class="highlight"><c- n>basic_string_view</c-><c- o>&lt;</c-><c- n>encoding_code_unit_t</c-><c- o>&lt;</c-><c- n>ToEncoding</c-><c- o>>></c-></code>. <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>basic_string_view</c-><c- o>&lt;</c-><c- n>encoding_code_unit_t</c-><c- o>&lt;</c-><c- n>ToEncoding</c-><c- o>>></c-></code>.</p>
    <li data-md>
     <p>The <code class="highlight"><c- n>value_type</c-></code> is <code class="highlight"><c- n>encoding_code_unit_t</c-><c- o>&lt;</c-><c- n>ToEncoding</c-><c- o>></c-></code> and produces code units, one at a time, of the <code class="highlight"><c- n>ToEncoding</c-></code>.</p>
   </ul>
   <p>Additionally, another important change here is an optimization opportunity. The default implementation of performing a single "<code class="highlight"><c- n>transcode_one</c-></code>" operation is to:</p>
   <ul>
    <li data-md>
     <p>Take the input range stored in the class, call <code class="highlight"><c- n>from_encoding</c-><c- p>.</c-><c- n>decode_one</c-></code> with it.</p>
    <li data-md>
     <p>Take the intermediate output range for the previous <code class="highlight"><c- n>decode_one</c-></code> call, and feed it into <code class="highlight"><c- n>to_encoding</c-><c- p>.</c-><c- n>encode_one</c-></code>.</p>
    <li data-md>
     <p>Present the output to the user in a suitable manner.</p>
   </ul>
   <p>This is fine, as long as the <code class="highlight"><c- n>code_point</c-></code> types agree when going from the code units of the <code class="highlight"><c- n>FromEncoding</c-></code> to the code units of the <code class="highlight"><c- n>ToEncoding</c-></code>. The problem here is that for many conversions, going from <code class="highlight"><c- n>encoding_code_unit_t</c-><c- o>&lt;</c-><c- n>code_unit</c-><c- o>></c-></code> ➝ shared <code class="highlight"><c- n>encoding_code_point_t</c-><c- o>&lt;</c-><c- n>FromEncoding</c-><c- o>></c-></code> ➝ <code class="highlight"><c- n>encoding_code_unit_t</c-><c- o>&lt;</c-><c- n>ToEncoding</c-><c- o>></c-></code> is an unnecessarily long step. The same way <a href="#design-speed-customization-free_transcoding">ADL customization points</a> are provided for the <a href="#design-high-level-free">free functions</a>, there must be provisions for turning that through-code-points roundtrip into something a little bit faster.</p>
   <p>For example, <code class="highlight"><c- n>ascii</c-></code> and <code class="highlight"><c- n>utf8</c-></code> are bitwise compatible. It is extremely foolish to roundtrip that -- for each and every code point/code unit -- through an intermediary <code class="highlight"><c- n>code_point</c-></code> as is done in the generic core implementation. Therefore, extensibility for this case is provided as described in <a href="#design-speed-customization-transcode_one">§ 3.4.1.1 One-by-one Transcoding Shortcuts</a>.</p>
   <h3 class="heading settled" data-level="3.4" id="design-speed"><span class="secno">3.4. </span><span class="content">The Need for Speed</span><a class="self-link" href="#design-speed"></a></h3>
   <p>Performance is correctness. If these methods and the resulting interface are not fast enough to meet the needs of the programmers, there will be little to no adoption over current solutions. Thanks to work by Bob Steagall and Zach Laine, it is fact that it is incredibly hard to make a range-based or iterator-based interface which will achieve the text processing speeds that will satisfy users of trivial (<code class="highlight"><c- n>span</c-></code>-based, pointer-based) need. There are shortcuts when transcoding between certain encoding pairs that should be taken, even in the <code class="highlight"><c- n>code_point</c-></code>-by-<code class="highlight"><c- n>code_point</c-></code> transcoding works in the general case.</p>
   <p>An explicit goal of this library is that there shall be no room for a lower level abstraction or language here, and the first steps to doing that are recognizing the benefits of eager encoding, decoding and transcoding interfaces, as well as pluggable and overridable behavior for the variety of functionality as it relates to higher-level abstractions.</p>
   <p>Research and implementation experience with <a data-link-type="biblio" href="#biblio-boosttext">[boost.text]</a>, <a data-link-type="biblio" href="#biblio-text_view">[text_view]</a> and others has made it plainly clear that while iterators and ranges can produce an extremely efficient binary, it is still not the fastest code that can be written to compete with hand-written/vectorized bulk text processing routines made specifically for each encoding. Therefore, it is imperative that lazy ranges cannot be the only solution. The C++ Standard must steadily and nicely supplant the codebase-specific or ad-hoc solutions individuals keep rolling for encoding and decoding operations.</p>
   <h4 class="heading settled" data-level="3.4.1" id="design-speed-customization"><span class="secno">3.4.1. </span><span class="content">Speed and Flexibility for Everyone: Customization Points</span><a class="self-link" href="#design-speed-customization"></a></h4>
   <p>An important part of that is the ability to provide performance for both lazy, range-based iteration as described in <a href="#design-high-level-ranges">§ 3.3.3 Improving Usability for Low-Memory Environments: Ranges</a> and fast free functions as described in <a href="#design-high-level-free">§ 3.3.1 Eager Free Functions</a>. To this end, an ADL free function scheme similar to the Range Access Customization Points (e.g. <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>ranges</c-><c- o>::</c-><c- n>begin</c-></code> and friends) has been developed to facilitate the customization for speed that users will require for their code.</p>
   <p>Considering this is going to be one of the most fundamental text layers that sits between typical text and a lot of the new I/O routines, it is imperative that these conversions are not only as fast as possible, but customizable. The user can already customize the encoding by creating their own conforming encoding object, but encodings still do their transformations on a code point-by-code point basis. Therefore, a means of extensibility needs to be chosen for the <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>encode</c-></code>, <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>decode</c-></code> and <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>transcode</c-></code> (<a href="#design-high-level-free">§ 3.3.1 Eager Free Functions</a>) functions. As this paper is targeting C++23, there exists hope that Matt Calabrese’s <a data-link-type="biblio" href="#biblio-p1292">[p1292]</a> receives favor in the Evolution Design Groups so that the extension mechanisms are simple functions that call simple extension points as laid out below. Failing that, a design similar to <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>ranges</c-></code>'s customization points -- as laid out in <a data-link-type="biblio" href="#biblio-n4381">[n4381]</a> -- would be preferred.</p>
   <p>What is not negotiable is that it must be extensible. Users should be able to write fast transcoding functions that the standard picks up for their own encoding types. From GB18030 to other ISO and WHATWG encodings, there will always be a need to extend the fast bulk processing of the standard. Current standard library implementers do not have the time to support every single legacy encoding on the planet, and companies do not have the time to petition each and every standard library to add support for their internal encoding. Similarly, government records kept in legacy encodings for political or organizational reasons cannot be locked out of this world either.</p>
   <p>Thusly, the following extension points are provided.</p>
   <h5 class="heading settled" data-level="3.4.1.1" id="design-speed-customization-transcode_one"><span class="secno">3.4.1.1. </span><span class="content">One-by-one Transcoding Shortcuts</span><a class="self-link" href="#design-speed-customization-transcode_one"></a></h5>
   <p>Using the example of <code class="highlight"><c- n>ascii</c-></code> and <code class="highlight"><c- n>utf8</c-></code> previously made in this paper, there is room for performing faster one-by-one transcoding. Normally, given a <code class="highlight"><c- n>FromEncoding</c-></code> and <code class="highlight"><c- n>ToEncoding</c-></code> such as <code class="highlight"><c- n>ascii</c-></code> and <code class="highlight"><c- n>utf8</c-></code> the process involves round-tripping is as follows:</p>
   <ol start="0">
    <li data-md>
     <p>Convert input <code class="highlight"><c- n>encoding_code_unit_t</c-><c- o>&lt;</c-><c- n>FromEncoding</c-><c- o>></c-></code> ➝ intermediary shared <code class="highlight"><c- n>encoding_code_point_t</c-><c- o>&lt;</c-><c- n>FromEncoding</c-><c- o>></c-></code></p>
    <li data-md>
     <p>Convert shared <code class="highlight"><c- n>encoding_code_point_t</c-><c- o>&lt;</c-><c- n>FromEncoding</c-><c- o>></c-></code> ➝ <code class="highlight"><c- n>encoding_code_unit_t</c-><c- o>&lt;</c-><c- n>ToEncoding</c-><c- o>></c-></code>.</p>
   </ol>
   <p>This is accomplished by first calling <code class="highlight"><c- p>.</c-><c- n>decode_one</c-></code> on the incoming <code class="highlight"><c- n>input</c-></code> with an intermediary output, typically an array of <code class="highlight"><c- n>encoding_code_point_t</c-><c- o>&lt;</c-><c- n>FromEncoding</c-><c- o>></c-> <c- n>intermediate_code_points</c-><c- p>[</c-><c- n>FromEncoding</c-><c- o>::</c-><c- n>max_code_points</c-><c- p>];</c-></code> wrapped up in a view. This intermediary is then put into an <code class="highlight"><c- p>.</c-><c- n>encode_one</c-></code> call and the resulting output used for whatever purpose is necessary.</p>
   <p>To speed this process up, the free function <code class="highlight"><c- n>text_transcode_one</c-></code> can be defined by by the user to skip the round trip:</p>
<pre class="language-cpp highlight"><c- c1>// in any related namespace in which ADL can find it</c->

<c- k>template</c-> <c- o>&lt;</c-><c- k>typename</c-> <c- n>Input</c-><c- p>,</c-> <c- k>typename</c-> <c- n>FromEncoding</c-><c- p>,</c->
	<c- k>typename</c-> <c- n>Output</c-><c- p>,</c-> <c- k>typename</c-> <c- n>ToEncoding</c-><c- p>,</c->
	<c- k>typename</c-> <c- n>FromErrorHandler</c-><c- p>,</c-> <c- k>typename</c-> <c- n>ToErrorHandler</c-><c- p>,</c->
	<c- k>typename</c-> <c- n>FromState</c-><c- p>,</c-> <c- k>typename</c-> <c- n>ToState</c-><c- o>></c->
<c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>transcode_result</c-><c- o>&lt;</c-><c- n>Input</c-><c- p>,</c-> <c- n>Output</c-><c- p>,</c-> <c- n>FromState</c-><c- p>,</c-> <c- n>ToState</c-><c- o>></c->
<c- n>text_transcode_one</c-><c- p>(</c-><c- n>Input</c-> <c- n>input</c-><c- p>,</c-> <c- n>FromEncoding</c-><c- o>&amp;&amp;</c-> <c- n>from</c-><c- p>,</c->
	<c- n>Output</c-> <c- n>output</c-><c- p>,</c-> <c- n>ToEncoding</c-><c- o>&amp;&amp;</c-> <c- n>to</c-><c- p>,</c->
	<c- n>FromErrorHandler</c-><c- o>&amp;&amp;</c-> <c- n>from_error_handler</c-><c- p>,</c->
	<c- n>ToErrorHandler</c-><c- o>&amp;&amp;</c-> <c- n>to_error_handler</c-><c- p>,</c->
	<c- n>FromState</c-><c- o>&amp;</c-> <c- n>from_state</c-><c- p>,</c-> <c- n>ToState</c-><c- o>&amp;</c-> <c- n>to_state</c-><c- p>);</c->
</pre>
   <p>The following is a complete example of this customization point.</p>
<pre class="language-cpp highlight"><c- k>using</c-> <c- n>ascii_to_utf8_result</c-> <c- o>=</c-> <c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>transcode_result</c-><c- o>&lt;</c->
	<c- n>std</c-><c- o>::</c-><c- n>span</c-><c- o>&lt;</c-><c- b>char</c-><c- o>></c-><c- p>,</c-> <c- n>std</c-><c- o>::</c-><c- n>span</c-><c- o>&lt;</c-><c- n>char8_t</c-><c- o>></c-><c- p>,</c->
	<c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>ascii</c-><c- o>::</c-><c- n>state</c-><c- p>,</c-> <c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>utf8</c-><c- o>::</c-><c- n>state</c-><c- o>></c-><c- p>;</c->

<c- k>template</c-> <c- o>&lt;</c-><c- k>typename</c-> <c- n>FromErrorHandler</c-><c- p>,</c-> <c- k>typename</c-> <c- n>ToErrorHandler</c-><c- o>></c->
<c- n>ascii_to_utf8_result</c-> <c- n>text_transcode_one</c-><c- p>(</c-><c- n>std</c-><c- o>::</c-><c- n>span</c-><c- o>&lt;</c-><c- b>char</c-><c- o>></c-> <c- n>input</c-><c- p>,</c-> <c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>ascii</c-><c- o>&amp;</c-> <c- n>from</c-><c- p>,</c->
	<c- n>std</c-><c- o>::</c-><c- n>span</c-><c- o>&lt;</c-><c- n>char8_t</c-><c- o>></c-> <c- n>output</c-><c- p>,</c-> <c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>utf8</c-><c- o>&amp;</c-> <c- n>to</c-><c- p>,</c->
	<c- n>FromErrorHandler</c-><c- o>&amp;&amp;</c-> <c- n>from_error_handler</c-><c- p>,</c-> <c- n>ToErrorHandler</c-><c- o>&amp;&amp;</c-> <c- n>to_error_handler</c-><c- p>,</c->
	<c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>ascii</c-><c- o>::</c-><c- n>state</c-><c- o>&amp;</c-> <c- n>from_state</c-><c- p>,</c-> <c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>utf8</c-><c- o>::</c-><c- n>state</c-><c- o>&amp;</c-> <c- n>to_state</c-><c- p>)</c-> <c- p>{</c->
	
	<c- k>if</c-> <c- p>(</c-><c- n>input</c-><c- p>.</c-><c- n>empty</c-><c- p>())</c-> <c- p>{</c->
		<c- c1>// no input: that’s fine</c->
		<c- k>return</c-> <c- n>ascii_to_utf8_result</c-><c- p>(</c-><c- n>input</c-><c- p>,</c-> <c- n>output</c-><c- p>,</c-> <c- n>from_state</c-><c- p>,</c-> <c- n>to_state</c-><c- p>);</c->
	<c- p>}</c->
	<c- k>if</c-> <c- p>(</c-><c- n>output</c-><c- p>.</c-><c- n>empty</c-><c- p>())</c-> <c- p>{</c->
		<c- c1>// error: no room!</c->
		<c- k>return</c-> <c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>propagate_transcode_one_error</c-><c- p>(</c-><c- n>from</c-><c- p>,</c-> <c- n>input</c-><c- p>,</c->
			<c- n>to</c-><c- p>,</c-> <c- n>output</c-><c- p>,</c->
			<c- n>from_error_handler</c-><c- p>,</c-> <c- n>to_error_handler</c-><c- p>,</c->
			<c- n>from_state</c-><c- p>,</c-> <c- n>to_state</c-><c- p>,</c->
			<c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>encoding_errc</c-><c- o>::</c-><c- n>insufficient_output_space</c-><c- p>,</c->
			<c- n>std</c-><c- o>::</c-><c- n>span</c-><c- o>&lt;</c-><c- b>char</c-><c- p>,</c-> <c- mi>0</c-><c- o>></c-><c- p>{});</c->
	<c- p>}</c->
	<c- k>if</c-> <c- p>((</c-><c- n>input</c-><c- p>[</c-><c- mi>0</c-><c- p>]</c-> <c- o>&amp;</c-> <c- sc>'\x7f'</c-><c- p>)</c-> <c- o>!=</c-> <c- mi>0</c-><c- p>)</c-> <c- p>{</c->
		<c- c1>// error: high bit set in ASCII</c->
		<c- k>return</c-> <c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>propagate_transcode_one_error</c-><c- p>(</c-><c- n>from</c-><c- p>,</c-> <c- n>input</c-><c- p>.</c-><c- n>subspan</c-><c- o>&lt;</c-><c- mi>1</c-><c- o>></c-><c- p>(),</c->
			<c- n>to</c-><c- p>,</c-> <c- n>output</c-><c- p>,</c->
			<c- n>from_error_handler</c-><c- p>,</c-> <c- n>to_error_handler</c-><c- p>,</c->
			<c- n>from_state</c-><c- p>,</c-> <c- n>to_state</c-><c- p>,</c->
			<c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>encoding_errc</c-><c- o>::</c-><c- n>invalid_sequence</c-><c- p>,</c->
			<c- n>input</c-><c- p>.</c-><c- n>subspan</c-><c- o>&lt;</c-><c- mi>1</c-><c- p>,</c-> <c- mi>1</c-><c- o>></c-><c- p>());</c->
	<c- p>}</c->
	<c- c1>// bitwise compatible</c->
	<c- n>output</c-><c- p>[</c-><c- mi>0</c-><c- p>]</c-> <c- o>=</c-> <c- k>static_cast</c-><c- o>&lt;</c-><c- n>char8_t</c-><c- o>></c-><c- p>(</c-><c- n>input</c-><c- p>[</c-><c- mi>0</c-><c- p>]);</c->
	<c- c1>// return result</c->
	<c- k>return</c-> <c- nf>ascii_to_utf8_result</c-><c- p>(</c-><c- n>input</c-><c- p>.</c-><c- n>subspan</c-><c- o>&lt;</c-><c- mi>1</c-><c- o>></c-><c- p>(),</c-> <c- n>output</c-><c- p>.</c-><c- n>subspan</c-><c- o>&lt;</c-><c- mi>1</c-><c- o>></c-><c- p>(),</c->
		<c- n>from_state</c-><c- p>,</c-> <c- n>to_state</c-><c- p>);</c->
<c- p>}</c->
</pre>
   <p>This is faster than the round trip through <code class="highlight"><c- n>unicode_code_point</c-></code> and requires much less checking and work. When <code class="highlight"><c- n>transcode_view</c-></code> is, internally, doing the conversion from one code point to another, it will check if an unqualified call to <code class="highlight"><c- n>text_transcode_one</c-><c- p>(...)</c-></code> is valid, and if so call it with its input, output, to/from encoding, and current states.</p>
   <p class="note" role="note"><span>Note:</span> The function <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>propagate_transcode_one_error</c-></code> takes care of calling the <code class="highlight"><c- n>from_error_handler</c-></code> and, if appropriate, the <code class="highlight"><c- n>to_error_handler</c-></code> as well. It does this by constructing a temporary <code class="highlight"><c- n>decode_result</c-></code> with the current results and a temporary output buffer, milling it through the <code class="highlight"><c- n>from_error_handler</c-></code>, checking if the temporary output buffer was written into by <code class="highlight"><c- n>from_error_handler</c-></code>, and passing that intermediary to <code class="highlight"><c- n>to_error_handler</c-></code> to properly simulate the scheme by which an error would normally be handled in the transcode cycle. This is primarily to facilitate the case when a <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>replacement_handler</c-></code> or similar would communicate a replacement character to the intermediate storage buffer in the default "<code class="highlight"><c- n>encoding_code_unit_t</c-><c- o>&lt;</c-><c- n>FromEncoding</c-><c- o>></c-></code> ➝ shared <code class="highlight"><c- n>encoding_code_point_t</c-><c- o>&lt;</c-><c- n>FromEncoding</c-><c- o>></c-></code> ➝ <code class="highlight"><c- n>encoding_code_unit_t</c-><c- o>&lt;</c-><c- n>ToEncoding</c-><c- o>></c-></code>" chain; and, that change needs to be placed in the final output rather than in an intermediate buffer which is going to disappear.</p>
   <p class="note" role="note"><span>Note:</span> This may be an indication that there should be a third kind of error handler for <code class="highlight"><c- n>transcode</c-></code>, but that threatens to leak the detail that a <code class="highlight"><c- n>transcode_one</c-></code> is an optimization of <code class="highlight"><c- n>encode_one</c-></code> + <code class="highlight"><c- n>decode_one</c-></code> and make the user sensitive to such an internal optimization.</p>
   <p>It is important to note that the above example customization point only works for <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>ranges</c-><c- o>::</c-><c- n>contiguous_range</c-></code>s; or, anything that can be consumed by the respective <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>span</c-></code> arguments. This means that a <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>subrange</c-></code> templated on a <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>list</c-><c- o>&lt;</c-><c- b>char</c-><c- o>>::</c-><c- n>iterator</c-></code> would not qualify here, as it is not a contiguous range. This is intentional: there are cases where the kind of range being captured matters for the purposes of optimization. For example, a contiguous range might have its functionality replaced by a function to function calls to the C standard. Only a contiguous range works in that case, because the C standard deals exclusively in pointers.</p>
   <h5 class="heading settled" data-level="3.4.1.2" id="design-speed-customization-free_transcoding"><span class="secno">3.4.1.2. </span><span class="content">Customizability: Transcoding Free Functions</span><a class="self-link" href="#design-speed-customization-free_transcoding"></a></h5>
   <p>The free functions are the chance for the user to optimize bulk encoding. This is an area that becomes very important to users all over the world. Many people have already written optimized routines to convert from one encoding to another: it would be a shame if all of this work could not interoperate with the standard as it is. That is why there are 3 ADL-found free functions that are checked for well-formedness, and if so are called by the implementation in <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>decode_into</c-></code>, <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>encode_into</c-></code>, and <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>transcode_into</c-></code>. They are as follows:</p>
<pre class="language-cpp highlight"><c- c1>// in any related namespace in which ADL can find it</c->

<c- k>template</c-> <c- o>&lt;</c-><c- k>typename</c-> <c- n>Input</c-><c- p>,</c->
	<c- k>typename</c-> <c- n>Encoding</c-><c- p>,</c-> <c- k>typename</c-> <c- n>Output</c-><c- p>,</c->
	<c- k>typename</c-> <c- n>State</c-><c- p>,</c-> <c- k>typename</c-> <c- n>ErrorHandler</c-><c- o>></c->
<c- n>decode_result</c-><c- o>&lt;</c-><c- n>Input</c-><c- p>,</c-> <c- n>Output</c-><c- p>,</c-> <c- n>State</c-><c- o>></c-> <c- n>text_decode</c-><c- p>(</c-><c- n>Input</c-> <c- n>input</c-><c- p>,</c-> <c- k>const</c-> <c- n>Encoding</c-><c- o>&amp;</c-> <c- n>encoding</c-><c- p>,</c->
	<c- n>Output</c-> <c- n>output</c-><c- p>,</c-> <c- n>State</c-><c- o>&amp;</c-> <c- n>state</c-><c- p>,</c-> <c- n>ErrorHandler</c-><c- o>&amp;&amp;</c-> <c- n>error_handler</c-><c- p>);</c->

<c- k>template</c-> <c- o>&lt;</c-><c- k>typename</c-> <c- n>Input</c-><c- p>,</c->
	<c- k>typename</c-> <c- n>Encoding</c-><c- p>,</c-> <c- k>typename</c-> <c- n>Output</c-><c- p>,</c->
	<c- k>typename</c-> <c- n>State</c-><c- p>,</c-> <c- k>typename</c-> <c- n>ErrorHandler</c-><c- o>></c->
<c- n>encode_result</c-><c- o>&lt;</c-><c- n>Input</c-><c- p>,</c-> <c- n>Output</c-><c- p>,</c-> <c- n>State</c-><c- o>></c-> <c- n>text_encode</c-><c- p>(</c-><c- n>Input</c-> <c- n>input</c-><c- p>,</c-> <c- k>const</c-> <c- n>Encoding</c-><c- o>&amp;</c-> <c- n>encoding</c-><c- p>,</c->
	<c- n>Output</c-> <c- n>output</c-><c- p>,</c-> <c- n>State</c-><c- o>&amp;</c-> <c- n>state</c-><c- p>,</c-> <c- n>ErrorHandler</c-><c- o>&amp;&amp;</c-> <c- n>error_handler</c-><c- p>);</c->

<c- k>template</c-> <c- o>&lt;</c-><c- k>typename</c-> <c- n>Input</c-><c- p>,</c-> <c- k>typename</c-> <c- n>FromEncoding</c-><c- p>,</c->
	<c- k>typename</c-> <c- n>Output</c-><c- p>,</c-> <c- k>typename</c-> <c- n>ToEncoding</c-><c- p>,</c->
	<c- k>typename</c-> <c- n>FromState</c-><c- p>,</c-> <c- k>typename</c-> <c- n>ToState</c-><c- p>,</c->
	<c- k>typename</c-> <c- n>FromErrorHandler</c-><c- p>,</c-> <c- k>typename</c-> <c- n>ToErrorHandler</c-><c- o>></c->
<c- n>transcode_result</c-><c- o>&lt;</c-><c- n>Input</c-><c- p>,</c-> <c- n>Output</c-><c- p>,</c-> <c- n>FromState</c-><c- p>,</c-> <c- n>ToState</c-><c- o>></c-> <c- n>text_transcode</c-><c- p>(</c-><c- n>Input</c-> <c- n>input</c-><c- p>,</c->
	<c- k>const</c-> <c- n>FromEncoding</c-><c- o>&amp;</c-> <c- n>from_encoding</c-><c- p>,</c-> <c- n>Output</c-> <c- n>output</c-><c- p>,</c-> <c- k>const</c-> <c- n>ToEncoding</c-><c- o>&amp;</c-> <c- n>to_encoding</c-><c- p>,</c->
	<c- n>FromState</c-><c- o>&amp;</c-> <c- n>from_state</c-><c- p>,</c-> <c- n>ToState</c-><c- o>&amp;</c-> <c- n>to_state</c-><c- p>,</c->
	<c- n>FromErrorHandler</c-><c- o>&amp;&amp;</c-> <c- n>from_error_handler</c-><c- p>,</c-> <c- n>ToErrorHandler</c-><c- o>&amp;&amp;</c-> <c- n>to_error_handler</c-><c- p>);</c->
</pre>
   <p>Each of these is the customization hook that a user can write in a namespace to enable a proper conversion from one encoding to another. Nominally, users would use concrete types in place of templated types like <code class="highlight"><c- n>Encoding</c-></code>, <code class="highlight"><c- n>FromEncoding</c-></code>, and <code class="highlight"><c- n>ToEncoding</c-></code>. Because each encoding object is a essentially it’s own "strong object", tags are not required here as the encoding itself acts as an overload-separating, anchoring, strongly-identifying tag that can keep overloads separate and non-clashing. This is different from Boost.Text, where the library must employ <a href="https://github.com/tzlaine/text/blob/master/include/boost/text/detail/unpack.hpp#L9">encoding tags</a> on its ranges to gain additional framework-internal optimizations based on <a href="https://github.com/tzlaine/text/blob/master/include/boost/text/transcode_view.hpp#L211">smart tag and type-based dispatching</a>. With strong encoding objects, it is not necessary to craft such things internally and, externally, users can rely on it for their ADL extension points:</p>
<pre class="language-cpp highlight"><c- k>template</c-> <c- o>&lt;</c-><c- k>typename</c-> <c- n>FromErrorHandler</c-><c- p>,</c-> <c- k>typename</c-> <c- n>ToErrorHandler</c-><c- o>></c->
<c- n>transcode_result</c-><c- o>&lt;</c-><c- n>std</c-><c- o>::</c-><c- n>span</c-><c- o>&lt;</c-><c- b>char</c-><c- o>></c-><c- p>,</c-> <c- n>std</c-><c- o>::</c-><c- n>span</c-><c- o>&lt;</c-><c- b>char16_t</c-><c- o>></c-><c- p>,</c->
	<c- n>win_wrap</c-><c- o>::</c-><c- n>windows_1252</c-><c- o>::</c-><c- n>state</c-><c- p>,</c-> <c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>utf8</c-><c- o>::</c-><c- n>state</c-><c- o>></c->
<c- n>text_transcode</c-><c- p>(</c->
	<c- n>std</c-><c- o>::</c-><c- n>span</c-><c- o>&lt;</c-><c- b>char</c-><c- o>></c-> <c- n>input</c-><c- p>,</c-> <c- k>const</c-> <c- n>win_wrap</c-><c- o>::</c-><c- n>windows_1252</c-><c- o>&amp;</c-> <c- n>encoding</c-><c- p>,</c->
	<c- n>std</c-><c- o>::</c-><c- n>span</c-><c- o>&lt;</c-><c- n>char8_t</c-><c- o>></c-> <c- n>output</c-><c- p>,</c-> <c- k>const</c-> <c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>utf16</c-><c- o>&amp;</c-> <c- n>to_encoding</c-><c- p>,</c->
	<c- n>win_wrap</c-><c- o>::</c-><c- n>windows_1252</c-><c- o>::</c-><c- n>state</c-><c- o>&amp;</c-> <c- n>from_state</c-><c- p>,</c->
	<c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>utf16</c-><c- o>::</c-><c- n>state</c-><c- o>&amp;</c-> <c- n>to_state</c-><c- p>,</c->
	<c- n>FromErrorHandler</c-><c- o>&amp;&amp;</c-> <c- n>from_error_handler</c-><c- p>,</c-> <c- n>ToErrorHandler</c-><c- o>&amp;&amp;</c-> <c- n>to_error_handler</c-><c- p>)</c-> <c- p>{</c->

	<c- k>if</c-> <c- p>(</c-><c- n>input</c-><c- p>.</c-><c- n>empty</c-><c- p>())</c-> <c- p>{</c->
		<c- c1>// do nothing</c->
		<c- k>return</c-> <c- n>transcode_result</c-><c- o>&lt;</c-><c- d>/*...*/</c-><c- o>></c-><c- p>(</c-><c- d>/* ... */</c-><c- p>);</c->
	<c- p>}</c->

	<c- b>int</c-> <c- n>Needed</c-> <c- o>=</c-> <c- n>MultiByteToWideChar</c-><c- p>(</c-><c- mi>1252</c-><c- p>,</c-> <c- mi>0</c-><c- p>,</c->
		<c- n>input</c-><c- p>.</c-><c- n>data</c-><c- p>(),</c-> <c- k>static_cast</c-><c- o>&lt;</c-><c- b>int</c-><c- o>></c-><c- p>(</c-><c- n>input</c-><c- p>.</c-><c- n>size</c-><c- p>()),</c->
		<c- k>nullptr</c-><c- p>,</c-> <c- mi>0</c-><c- p>);</c->
	<c- k>if</c-> <c- p>(</c-><c- n>Needed</c-> <c- o>==</c-> <c- mi>0</c-> <c- o>||</c-> <c- p>(</c-><c- n>Needed</c-> <c- o>></c-> <c- k>static_cast</c-><c- o>&lt;</c-><c- b>int</c-><c- o>></c-><c- p>(</c-><c- n>output</c-><c- p>.</c-><c- n>size</c-><c- p>())))</c-> <c- p>{</c->
		<c- c1>// handle error ...</c->
		<c- k>return</c-> <c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>propagate_transcode_error</c-><c- p>(</c-><c- n>input</c-><c- p>,</c-> <c- n>output</c-><c- p>,</c->
			<c- n>from_handler</c-><c- p>,</c-> <c- n>to_handler</c-><c- p>,</c-> <c- n>from_state</c-><c- p>,</c-> <c- n>to_state</c-><c- p>,</c->
			<c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>encoding_errc</c-><c- o>::</c-><c- n>insufficient_output_space</c-><c- p>,</c->
			<c- n>std</c-><c- o>::</c-><c- n>span</c-><c- o>&lt;</c-><c- b>char</c-><c- p>,</c-> <c- mi>0</c-><c- o>></c-><c- p>{});</c->
	<c- p>}</c->

	<c- b>int</c-> <c- n>Succ</c-> <c- o>=</c-> <c- n>MultiByteToWideChar</c-><c- p>(</c-><c- mi>1252</c-><c- p>,</c-> <c- mi>0</c-><c- p>,</c->
		<c- n>input</c-><c- p>.</c-><c- n>data</c-><c- p>(),</c-> <c- k>static_cast</c-><c- o>&lt;</c-><c- b>int</c-><c- o>></c-><c- p>(</c-><c- n>input</c-><c- p>.</c-><c- n>size</c-><c- p>()),</c->
		<c- k>reinterpret_cast</c-><c- o>&lt;</c-><c- b>wchar_t</c-><c- o>*></c-><c- p>(</c-><c- n>output</c-><c- p>.</c-><c- n>data</c-><c- p>()),</c-> <c- k>static_cast</c-><c- o>&lt;</c-><c- b>int</c-><c- o>></c-><c- p>(</c-><c- n>output</c-><c- p>.</c-><c- n>size</c-><c- p>()));</c->
	<c- k>if</c-> <c- p>(</c-><c- n>Succ</c-> <c- o>==</c-> <c- mi>0</c-><c- p>)</c-> <c- p>{</c->
		<c- c1>// handle error ...</c->
		<c- k>return</c-> <c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>propagate_transcode_error</c-><c- p>(</c-><c- n>input</c-><c- p>,</c-> <c- n>to_encoding</c-><c- p>,</c->
			<c- n>output</c-><c- p>,</c-> <c- n>from_encoding</c-><c- p>,</c->
			<c- n>transcode_result</c-><c- o>&lt;</c-><c- d>/*...*/</c-><c- o>></c-><c- p>(</c-><c- d>/* ... */</c-><c- p>),</c->
			<c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>encoding_errc</c-><c- o>::</c-><c- n>invalid_sequence</c-><c- p>,</c->
			<c- n>std</c-><c- o>::</c-><c- n>span</c-><c- o>&lt;</c-><c- b>char</c-><c- p>,</c-> <c- mi>0</c-><c- o>></c-><c- p>{});</c->
	<c- p>}</c->
	<c- k>return</c-> <c- n>transcode_result</c-><c- o>&lt;</c-><c- d>/*...*/</c-><c- o>></c-><c- p>(</c-><c- d>/* ... */</c-><c- p>);</c->
<c- p>}</c->
</pre>
   <p>This does not show all the error handling, but it is a full explanation/demonstration of a custom <code class="highlight"><c- n>windows_1252</c-></code> encoding defined by a user going through the customization point to get to <code class="highlight"><c- n>utf8</c-></code> encoded text. Note that this is a slight simplification, since there are additional checks for what kind of error handler is present and whether or not valid substitution can be performed (e.g., since <code class="highlight"><c- n>MultiByteToWideChar</c-></code> does not accept "unique replacement" characters, but <code class="highlight"><c- n>WideCharToMultiByte</c-></code> does).</p>
   <p class="note" role="note"><span>Note:</span> Like in <a href="#design-speed-customization-transcode_one">§ 3.4.1.1 One-by-one Transcoding Shortcuts</a>, the function <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>propagate_transcode_error</c-></code> takes care of calling the <code class="highlight"><c- n>from_error_handler</c-></code> and, if appropriate, the <code class="highlight"><c- n>to_error_handler</c-></code> as well.</p>
   <p>There does exist some concern for individuals who may want to do specializations for the standard’s encodings. The specification will permit someone to write their own <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>utf8</c-></code> ⇌ <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>utf16</c-></code> optimization, which will take precedent. This does not let the implementation off the hook for performance: this is only expected to be done for cases where the end-user knows their target architecture better than the standard could (small embedded devices with obscure chipsets and ISAs, and platforms with custom compilers, and similar). Common environments can and absolutely should be optimized by the implementation because there is a bounded set of only 9 possible encodings that the C++ Standard will include at first if this proposal progresses all the way.</p>
   <p><strong class="advisement"> Even if this is possible, it is absolutely expected for implementations to optimize common Unicode encoding pairs with OS or library-internal specific algorithms. If a vendor fails to do this, please file a bug against their implementation.<br>Loudly.</strong></p>
   <h5 class="heading settled" data-level="3.4.1.3" id="design-speed-customization-free_validation_count"><span class="secno">3.4.1.3. </span><span class="content">Customizability: Validating and Counting Free Functions</span><a class="self-link" href="#design-speed-customization-free_validation_count"></a></h5>
   <p>The <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>validate</c-></code> function also needs a customization point, as well as <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>encode_count</c-></code> and <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>decode_count</c-></code>. To start, there are efficient ways to count code units (e.g., in UTF-8) that do not require synthesizing the full code point value. This can be used to save on speed when counting the size of a very large buffer of text. Similarly, <code class="highlight"><c- n>validate</c-></code> can be done cheaply and efficiently when compared to the common loop outlined in <a href="#design-high-level-free-validate">§ 3.3.1.4 Free Function validate</a>. Therefore, there are ADL customization points that are as follows:</p>
<pre class="language-cpp highlight"><c- c1>// in any related namespace in which ADL can find it</c->

<c- k>template</c-> <c- o>&lt;</c-><c- k>typename</c-> <c- n>Input</c-><c- p>,</c->
	<c- k>typename</c-> <c- n>Encoding</c-><c- p>,</c-> <c- k>typename</c-> <c- n>Output</c-><c- p>,</c->
	<c- k>typename</c-> <c- n>State</c-><c- p>,</c-> <c- k>typename</c-> <c- n>ErrorHandler</c-><c- o>></c->
<c- n>count_result</c-><c- o>&lt;</c-><c- n>Input</c-><c- p>,</c-> <c- n>State</c-><c- o>></c-> <c- n>text_decode_count</c-><c- p>(</c-><c- n>Input</c-> <c- n>input</c-><c- p>,</c-> <c- k>const</c-> <c- n>Encoding</c-><c- o>&amp;</c-> <c- n>encoding</c-><c- p>,</c->
	<c- n>State</c-><c- o>&amp;</c-> <c- n>state</c-><c- p>,</c-> <c- n>ErrorHandler</c-><c- o>&amp;&amp;</c-> <c- n>error_handler</c-><c- p>);</c->

<c- k>template</c-> <c- o>&lt;</c-><c- k>typename</c-> <c- n>Input</c-><c- p>,</c->
	<c- k>typename</c-> <c- n>Encoding</c-><c- p>,</c-> <c- k>typename</c-> <c- n>Output</c-><c- p>,</c->
	<c- k>typename</c-> <c- n>State</c-><c- p>,</c-> <c- k>typename</c-> <c- n>ErrorHandler</c-><c- o>></c->
<c- n>count_result</c-><c- o>&lt;</c-><c- n>Input</c-><c- p>,</c-> <c- n>State</c-><c- o>></c-> <c- n>text_encode_count</c-><c- p>(</c-><c- n>Input</c-> <c- n>input</c-><c- p>,</c-> <c- k>const</c-> <c- n>Encoding</c-><c- o>&amp;</c-> <c- n>encoding</c-><c- p>,</c->
	<c- n>State</c-><c- o>&amp;</c-> <c- n>state</c-><c- p>,</c-> <c- n>ErrorHandler</c-><c- o>&amp;&amp;</c-> <c- n>error_handler</c-><c- p>);</c->

<c- k>template</c-> <c- o>&lt;</c-><c- k>typename</c-> <c- n>Input</c-><c- p>,</c-> <c- k>typename</c-> <c- n>Encoding</c-><c- p>,</c->
	<c- k>typename</c-> <c- n>DecodeState</c-><c- p>,</c-> <c- k>typename</c-> <c- n>EncodeState</c-><c- o>></c->
<c- n>validate_result</c-><c- o>&lt;</c-><c- n>Input</c-><c- p>,</c-> <c- n>DecodeState</c-><c- o>></c-> <c- n>text_validate</c-><c- p>(</c-><c- n>Input</c-> <c- n>input</c-><c- p>,</c-> <c- k>const</c-> <c- n>Encoding</c-><c- o>&amp;</c-> <c- n>encoding</c-><c- p>,</c->
	<c- n>DecodeState</c-><c- o>&amp;</c-> <c- n>state</c-><c- p>,</c-> <c- n>EncodeState</c-><c- o>&amp;</c-> <c- n>state</c-><c- p>);</c->

<c- k>template</c-> <c- o>&lt;</c-><c- k>typename</c-> <c- n>Input</c-><c- p>,</c-> <c- k>typename</c-> <c- n>Encoding</c-><c- p>,</c-> <c- k>typename</c-> <c- n>DecodeState</c-><c- o>></c->
<c- n>validate_result</c-><c- o>&lt;</c-><c- n>Input</c-><c- p>,</c-> <c- n>DecodeState</c-><c- o>></c-> <c- n>text_validate</c-><c- p>(</c-><c- n>Input</c-> <c- n>input</c-><c- p>,</c-> <c- k>const</c-> <c- n>Encoding</c-><c- o>&amp;</c-> <c- n>encoding</c-><c- p>,</c->
	<c- n>DecodeState</c-><c- o>&amp;</c-> <c- n>state</c-><c- p>);</c->
</pre>
   <p>Notably, there are two <code class="highlight"><c- n>text_validate</c-></code> functions that can be opted into that take 3 or 4 arguments, respectively. This is for the rare case of an encoding that both cannot create a default state, like ones where <code class="highlight"><c- n>is_self_state_encoding_v</c-><c- o>&lt;</c-><c- n>Encoding</c-><c- o>></c-></code> is true (e.g. the <code class="highlight"><c- n>any_encoding</c-></code>/<code class="highlight"><c- n>variant_encoding</c-><c- o>&lt;</c-><c- n>Enc0</c-><c- p>,</c-> <c- n>Enc1</c-><c- p>,</c-> <c- p>...</c-><c- o>></c-></code> described in this proposal).</p>
   <p>In this case, we need a customization point wherein such an encoding, using internal/secret knowledge, can do its validation without needing to rely on the 4-argument <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>validate</c-></code> overload and the core default loop’s specification. This satisfies the ability of self-state encodings to escape the need to pass itself twice to the <code class="highlight"><c- n>validate</c-></code> function.</p>
   <h2 class="heading settled" data-level="4" id="implementation"><span class="secno">4. </span><span class="content">Implementation Experience</span><a class="self-link" href="#implementation"></a></h2>
   <p>There are implementations of this work, taking some of it in part or in full.</p>
   <h3 class="heading settled" data-level="4.1" id="implementation-previous"><span class="secno">4.1. </span><span class="content">Previous Work</span><a class="self-link" href="#implementation-previous"></a></h3>
   <p>While the ideas presented in this paper have been explored in various different forms, the ideas have never been succinctly composed into a single distributable library. Therefore, the author of this paper is working on an implementation that synthesizes all of the learning from <a data-link-type="biblio" href="#biblio-icu">[icu]</a>, <a data-link-type="biblio" href="#biblio-boosttext">[boost.text]</a>, <a data-link-type="biblio" href="#biblio-text_view">[text_view]</a> and <a data-link-type="biblio" href="#biblio-libogonek">[libogonek]</a>. Reportedly, an implementation using a similar system exists in a few Fortune 500 company codebases. <a data-link-type="biblio" href="#biblio-copperspice">[copperspice]</a> also has a somewhat similar implementation, but differs in a few places.</p>
   <h3 class="heading settled" data-level="4.2" id="implementation-visible"><span class="secno">4.2. </span><span class="content">Current Work</span><a class="self-link" href="#implementation-visible"></a></h3>
   <p>This paper’s r2 hopes to contain benchmarks, initial implementation and usage experience. This paper’s r3 hopes to contain more benchmarks, refined implementation and additional field and usage experience after a more valuable and viable minimum product is established. The current implementation is being incubated in a private implementation in <code class="highlight"><c- n>phd</c-><c- p>.</c-><c- n>text</c-></code> (please e-mail the author if you would like to access the implementation).</p>
   <h2 class="heading settled" data-level="5" id="faq"><span class="secno">5. </span><span class="content">FAQ</span><a class="self-link" href="#faq"></a></h2>
   <p>Some commonly asked questions.</p>
   <h3 class="heading settled" data-level="5.1" id="faq-max_code_points"><span class="secno">5.1. </span><span class="content">Question: Why is there a <code class="highlight"><c- n>max_code_points</c-></code> value? Won’t you only ever output a single unicode code point?</span><a class="self-link" href="#faq-max_code_points"></a></h3>
   <p>This is incorrect. There are cases for encodings such as TSCII that output multiple unicode code points at once. The minimum required space must be dictated by the encoding: C++ made the mistake for <code class="highlight"><c- n>basic_filebuf</c-></code> with the <a href="https://youtu.be/BdUipluIf1E?t=371">infamous "N:1" rule</a>, and that rule is one of the primary reasons file-based streams (which can be any <code class="highlight"><c- p>(</c-><c- n>o</c-><c- o>|</c-><c- n>i</c-><c- p>)</c-><c- n>stream</c-></code> in an inheritance-based design, as well as nearly anything with the wide use of what file descriptors represent in many operating systems) cannot handle Unicode properly in many implementations (chief among them, Microsft Windows).</p>
   <h3 class="heading settled" data-level="5.2" id="faq-old_unicode"><span class="secno">5.2. </span><span class="content">Question: What about Old Unicode Encodings / Private Use Area Encodings?</span><a class="self-link" href="#faq-old_unicode"></a></h3>
   <p>These are treated like legacy encodings. Someone must convert to "normal" (Unicode vRight-Now) Unicode in order to have higher level algorithms work. If this includes Private Use Area characters, than a person will need the ability to customize the normalization algorithms for use in getting e.g. Medieval Text and Biblical Text to normalize properly. This will be covered in a future paper on a <code class="highlight"><c- n>normalize</c-><c- p>(...)</c-></code> free function, a <code class="highlight"><c- n>normalization_view</c-></code> type, and <code class="highlight"><c- n>nf</c-><c- p>(</c-><c- n>k</c-><c- p>)(</c-><c- n>d</c-><c- o>/</c-><c- n>c</c-><c- p>)</c-></code>/<code class="highlight"><c- n>fcc</c-></code> normalization objects provided by the standard. SG16 at the moment is against trying to create customization points and changes for the Unicode Character Database and give PUA code points different properties. Individuals who use e.g. Unicode v6 w/ Softbank Private Use Area or <a href="https://en.wikipedia.org/wiki/Tamil_All_Character_Encoding">TACE 16</a> Encodings will need to convert any Private Use Area characters to Unicode and normalize, or provide their own normalization form for upcoming papers.</p>
   <h3 class="heading settled" data-level="5.3" id="faq-encode_decode_transcoding"><span class="secno">5.3. </span><span class="content">Question: It can be faster to bulk-decode, then bulk-encode instead of one-by-one transcoding. Why not that design?</span><a class="self-link" href="#faq-encode_decode_transcoding"></a></h3>
   <p>While this is true, as asserted in the <a href="#design-high-level-free-transcode">§ 3.3.1.3 Free Function transcode</a> section, bulk decoding requires that there is a intermediary storage in to bulk-decode into. This imposes an invisible intermediate in the API, or requires explicitly allowing the user to pass one in. Furthermore, a user may only want to partially decode, partially encode, and then repeat because there is some internal memory limit rather than do a single "complete" bulk conversion.</p>
   <p>A significant amount of thought and experimental implementation went into potentially providing both a <code class="highlight"><c- n>transcode</c-></code> function that behaves as is currently specified, PLUS a <code class="highlight"><c- n>decode_encode</c-></code> function that does a bulk decode and then a bulk encode. The design space was deemed a little too fraught with knobs and potential for exceeding user expectations in unexpected ways. This does not mean a regular user cannot enjoy the benefits of building a similar abstraction. Both the <code class="highlight"><c- n>decode</c-></code> and <code class="highlight"><c- n>encode</c-></code> functions are available for a user to apply the right amount of each to achieve a goal similar to the one behind the <code class="highlight"><c- n>decode_encode</c-></code> abstraction previously envisioned.</p>
   <h3 class="heading settled" data-level="5.4" id="faq-normalization"><span class="secno">5.4. </span><span class="content">Question: Where is the specification for <code class="highlight"><c- n>normalization_view</c-><c- o>&lt;</c-><c- n>nfkc</c-><c- o>></c-></code> and <code class="highlight"><c- n>normalize</c-><c- p>(...)</c-></code>?</span><a class="self-link" href="#faq-normalization"></a></h3>
   <p>Normalization is separable from the low-level transcoding, and even though APIs like <code class="highlight"><c- n>MultiByteToWideChar</c-></code> and similar have additional parameters for doing automatic decomposition or composition upon transcoding, more recently the API has switched to doing these things in 2 separate phases. It is unclear whether there is a performance gain for the two being combined as it is in Windows’s APIs, but without such performance data we prefer correctness and existing practice. Furthermore, normalization overloads can always be added to the transcoding interfaces later, if a combined interface proves to have benefits. There is also an open question about the existence of normalization within the highest level abstraction types like <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>basic_text</c-></code> and whether or not those invariants be enforced. Currently, Zach Laine’s Boost.Text enforces normalization on creation and insertion of data into</p>
   <h3 class="heading settled" data-level="5.5" id="faq-text_types"><span class="secno">5.5. </span><span class="content">Question: Where is the specification for <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>basic_text</c-></code> and <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>text</c-><c- o>::</c-><c- n>basic_text_view</c-></code>?</span><a class="self-link" href="#faq-text_types"></a></h3>
   <p>Those types as currently imagined requires additional functionality, like normalization and potentially segmentation algorithms (e.g., for making Grapheme Clusters). It will be split off into a separate paper, even if we allude to its existence and use in this proposal.</p>
   <h2 class="heading settled" data-level="6" id="acknowledgements"><span class="secno">6. </span><span class="content">Acknowledgements</span><a class="self-link" href="#acknowledgements"></a></h2>
   <p>Thanks to R. Martinho Fernandes, whose insightful Unicode quips got me hooked on the problem space many, many years ago and helped me develop my first in-house solution for an encoding container adaptor several years ago. Thanks to Mark Boyall, Xeo, and Eric Tremblay for bouncing off ideas, fixes, and other thoughts many years ago when struggling to compile libogonek on a disastrous Microsoft Visual Studio November 2012 CTP compiler.</p>
   <p>Thanks to Tom Honermann, who had me present my second SG16 meeting before it was SG16 and help represent and carry his papers which gave me the drive to help fix the C++ standard for text. Many thanks to Zach Laine, whose tireless implementation efforts have given me much insight and understanding into the complexities of Unicode and whose implementation in Boost.Text made clear the tradeoffs and performance issues. Thanks to Mark Zeren who helped keep me in SG16 and working on these problems.</p>
   <p>And thank you to those of you who grew tired of an ASCII-only world and supported this effort.</p>
  </main>
<script>
(function() {
  "use strict";
  var collapseSidebarText = '<span aria-hidden="true">←</span> '
                          + '<span>Collapse Sidebar</span>';
  var expandSidebarText   = '<span aria-hidden="true">→</span> '
                          + '<span>Pop Out Sidebar</span>';
  var tocJumpText         = '<span aria-hidden="true">↑</span> '
                          + '<span>Jump to Table of Contents</span>';

  var sidebarMedia = window.matchMedia('screen and (min-width: 78em)');
  var autoToggle   = function(e){ toggleSidebar(e.matches) };
  if(sidebarMedia.addListener) {
    sidebarMedia.addListener(autoToggle);
  }

  function toggleSidebar(on) {
    if (on == undefined) {
      on = !document.body.classList.contains('toc-sidebar');
    }

    /* Don’t scroll to compensate for the ToC if we’re above it already. */
    var headY = 0;
    var head = document.querySelector('.head');
    if (head) {
      // terrible approx of "top of ToC"
      headY += head.offsetTop + head.offsetHeight;
    }
    var skipScroll = window.scrollY < headY;

    var toggle = document.getElementById('toc-toggle');
    var tocNav = document.getElementById('toc');
    if (on) {
      var tocHeight = tocNav.offsetHeight;
      document.body.classList.add('toc-sidebar');
      document.body.classList.remove('toc-inline');
      toggle.innerHTML = collapseSidebarText;
      if (!skipScroll) {
        window.scrollBy(0, 0 - tocHeight);
      }
      tocNav.focus();
      sidebarMedia.addListener(autoToggle); // auto-collapse when out of room
    }
    else {
      document.body.classList.add('toc-inline');
      document.body.classList.remove('toc-sidebar');
      toggle.innerHTML = expandSidebarText;
      if (!skipScroll) {
        window.scrollBy(0, tocNav.offsetHeight);
      }
      if (toggle.matches(':hover')) {
        /* Unfocus button when not using keyboard navigation,
           because I don’t know where else to send the focus. */
        toggle.blur();
      }
    }
  }

  function createSidebarToggle() {
    /* Create the sidebar toggle in JS; it shouldn’t exist when JS is off. */
    var toggle = document.createElement('a');
      /* This should probably be a button, but appearance isn’t standards-track.*/
    toggle.id = 'toc-toggle';
    toggle.class = 'toc-toggle';
    toggle.href = '#toc';
    toggle.innerHTML = collapseSidebarText;

    sidebarMedia.addListener(autoToggle);
    var toggler = function(e) {
      e.preventDefault();
      sidebarMedia.removeListener(autoToggle); // persist explicit off states
      toggleSidebar();
      return false;
    }
    toggle.addEventListener('click', toggler, false);


    /* Get <nav id=toc-nav>, or make it if we don’t have one. */
    var tocNav = document.getElementById('toc-nav');
    if (!tocNav) {
      tocNav = document.createElement('p');
      tocNav.id = 'toc-nav';
      /* Prepend for better keyboard navigation */
      document.body.insertBefore(tocNav, document.body.firstChild);
    }
    /* While we’re at it, make sure we have a Jump to Toc link. */
    var tocJump = document.getElementById('toc-jump');
    if (!tocJump) {
      tocJump = document.createElement('a');
      tocJump.id = 'toc-jump';
      tocJump.href = '#toc';
      tocJump.innerHTML = tocJumpText;
      tocNav.appendChild(tocJump);
    }

    tocNav.appendChild(toggle);
  }

  var toc = document.getElementById('toc');
  if (toc) {
    createSidebarToggle();
    toggleSidebar(sidebarMedia.matches);

    /* If the sidebar has been manually opened and is currently overlaying the text
       (window too small for the MQ to add the margin to body),
       then auto-close the sidebar once you click on something in there. */
    toc.addEventListener('click', function(e) {
      if(e.target.tagName.toLowerCase() == "a" && document.body.classList.contains('toc-sidebar') && !sidebarMedia.matches) {
        toggleSidebar(false);
      }
    }, false);
  }
  else {
    console.warn("Can’t find Table of Contents. Please use <nav id='toc'> around the ToC.");
  }

  /* Wrap tables in case they overflow */
  var tables = document.querySelectorAll(':not(.overlarge) > table.data, :not(.overlarge) > table.index');
  var numTables = tables.length;
  for (var i = 0; i < numTables; i++) {
    var table = tables[i];
    var wrapper = document.createElement('div');
    wrapper.className = 'overlarge';
    table.parentNode.insertBefore(wrapper, table);
    wrapper.appendChild(table);
  }

})();
</script>
  <h2 class="no-num no-ref heading settled" id="references"><span class="content">References</span><a class="self-link" href="#references"></a></h2>
  <h3 class="no-num no-ref heading settled" id="informative"><span class="content">Informative References</span><a class="self-link" href="#informative"></a></h3>
  <dl>
   <dt id="biblio-boosttext">[BOOST.TEXT]
   <dd>Zach Laine. <a href="https://github.com/tzlaine/text">Boost.Text</a>. October 20th, 2018. URL: <a href="https://github.com/tzlaine/text">https://github.com/tzlaine/text</a>
   <dt id="biblio-cesu8">[CESU8]
   <dd>Unicode Consortium. <a href="https://www.unicode.org/reports/tr26/">UTR #26, Compatibility Encoding Scheme for UTF-16: 8-Bit (CESU-8)</a>. March 13th, 2019. URL: <a href="https://www.unicode.org/reports/tr26/">https://www.unicode.org/reports/tr26/</a>
   <dt id="biblio-copperspice">[COPPERSPICE]
   <dd>CopperSpice C++ Libraries. <a href="https://github.com/copperspice/cs_string">CsString</a>. March 2nd, 2020. URL: <a href="https://github.com/copperspice/cs_string">https://github.com/copperspice/cs_string</a>
   <dt id="biblio-fast-utf8">[FAST-UTF8]
   <dd>Bob Steagall. <a href="https://www.youtube.com/watch?v=5FQ87-Ecb-A">Fast Conversion From UTF-8 with C++, DFAs, and SSE Intrinsics</a>. September 26th, 2019. URL: <a href="https://www.youtube.com/watch?v=5FQ87-Ecb-A">https://www.youtube.com/watch?v=5FQ87-Ecb-A</a>
   <dt id="biblio-icu">[ICU]
   <dd>Unicode Consortium. <a href="http://site.icu-project.org/">International Components for Unicode</a>. April 17th, 2019. URL: <a href="http://site.icu-project.org/">http://site.icu-project.org/</a>
   <dt id="biblio-libogonek">[LIBOGONEK]
   <dd>R. Martinho Fernandes. <a href="https://github.com/libogonek/ogonek">Ogonek</a>. December 9th, 2013. URL: <a href="https://github.com/libogonek/ogonek">https://github.com/libogonek/ogonek</a>
   <dt id="biblio-libogonek-encoding_scheme">[LIBOGONEK-ENCODING_SCHEME]
   <dd>R. Martinho Fernandes. <a href="https://github.com/libogonek/ogonek/blob/devel/include/ogonek/encoding/encoding_scheme.h%2B%2B#L80">encoding_scheme</a>. December 9th, 2013. URL: <a href="https://github.com/libogonek/ogonek/blob/devel/include/ogonek/encoding/encoding_scheme.h%2B%2B#L80">https://github.com/libogonek/ogonek/blob/devel/include/ogonek/encoding/encoding_scheme.h%2B%2B#L80</a>
   <dt id="biblio-n2440">[N2440]
   <dd>JeanHeyd Meneide. <a href="https://thephd.github.io/vendor/future_cxx/papers/source/n2440">Restartable and Non-Restartable Functions for Efficient Character Conversions</a>. March 2nd, 2020. URL: <a href="https://thephd.github.io/vendor/future_cxx/papers/source/n2440">https://thephd.github.io/vendor/future_cxx/papers/source/n2440</a>
   <dt id="biblio-n3574">[N3574]
   <dd>Mark Boyall. <a href="https://wg21.link/n3574">Binding stateful functions as function pointers</a>. 10 March 2013. URL: <a href="https://wg21.link/n3574">https://wg21.link/n3574</a>
   <dt id="biblio-n4381">[N4381]
   <dd>Eric Niebler. <a href="https://wg21.link/n4381">Suggested Design for Customization Points</a>. 11 March 2015. URL: <a href="https://wg21.link/n4381">https://wg21.link/n4381</a>
   <dt id="biblio-p0244r2">[P0244R2]
   <dd>Tom Honermann. <a href="https://wg21.link/p0244r2">Text_view: A C++ concepts and range based character encoding and code point enumeration library</a>. 13 June 2017. URL: <a href="https://wg21.link/p0244r2">https://wg21.link/p0244r2</a>
   <dt id="biblio-p1292">[P1292]
   <dd>Matt Calabrese. <a href="https://wg21.link/p1292">Customization Point Functions</a>. October 10th, 2018. URL: <a href="https://wg21.link/p1292">https://wg21.link/p1292</a>
   <dt id="biblio-range-v3">[RANGE-V3]
   <dd>Eric Niebler; Casey Carter. <a href="https://github.com/ericniebler/range-v3">range-v3</a>. June 11th, 2019. URL: <a href="https://github.com/ericniebler/range-v3">https://github.com/ericniebler/range-v3</a>
   <dt id="biblio-sol2-wstring_convert">[SOL2-WSTRING_CONVERT]
   <dd>ThePhD. <a href="https://github.com/ThePhD/sol2/issues/571">wstring_convert sucks</a>. January 27th, 2018. URL: <a href="https://github.com/ThePhD/sol2/issues/571">https://github.com/ThePhD/sol2/issues/571</a>
   <dt id="biblio-text_view">[TEXT_VIEW]
   <dd>Tom Honermann. <a href="https://github.com/tahonermann/text_view">text_view</a>. November 10th, 2017. URL: <a href="https://github.com/tahonermann/text_view">https://github.com/tahonermann/text_view</a>
   <dt id="biblio-wtf8">[WTF8]
   <dd>Simon Sapin. <a href="https://simonsapin.github.io/wtf-8/">The WTF-8 encoding</a>. May 12th, 2018. URL: <a href="https://simonsapin.github.io/wtf-8/">https://simonsapin.github.io/wtf-8/</a>
  </dl>