<!doctype html><html lang="en">
 <head>
  <meta content="text/html; charset=utf-8" http-equiv="Content-Type">
  <meta content="width=device-width, initial-scale=1, shrink-to-fit=no" name="viewport">
  <title>P3715R0: Tightening floating-point semantics for C++</title>
<style data-fill-with="stylesheet">/******************************************************************************
 *                   Style sheet for the W3C specifications                   *
 *
 * Special classes handled by this style sheet include:
 *
 * Indices
 *   - .toc for the Table of Contents (<ol class="toc">)
 *     + <span class="secno"> for the section numbers
 *   - #toc for the Table of Contents (<nav id="toc">)
 *   - ul.index for Indices (<a href="#ref">term</a><span>, in § N.M</span>)
 *   - table.index for Index Tables (e.g. for properties or elements)
 *
 * Structural Markup
 *   - table.data for general data tables
 *     -> use 'scope' attribute, <colgroup>, <thead>, and <tbody> for best results !
 *     -> use <table class='complex data'> for extra-complex tables
 *     -> use <td class='long'> for paragraph-length cell content
 *     -> use <td class='pre'> when manual line breaks/indentation would help readability
 *   - dl.switch for switch statements
 *   - ol.algorithm for algorithms (helps to visualize nesting)
 *   - .figure and .caption (HTML4) and figure and figcaption (HTML5)
 *     -> .sidefigure for right-floated figures
 *   - ins/del
 *     -> ins/del.c### for candidate and proposed changes (amendments)
 *
 * Code
 *   - pre and code
 *
 * Special Sections
 *   - .note       for informative notes             (div, p, span, aside, details)
 *   - .example    for informative examples          (div, p, pre, span)
 *   - .issue      for issues                        (div, p, span)
 *   - .advisement for loud normative statements     (div, p, strong)
 *   - .annoying-warning for spec obsoletion notices (div, aside, details)
 *   - .correction for "candidate corrections"       (div, aside, details, section)
 *   - .addition   for "candidate additions"         (div, aside, details, section)
 *   - .correction.proposed for "proposed corrections" (div, aside, details, section)
 *   - .addition.proposed   for "proposed additions"   (div, aside, details, section)
 *
 * Definition Boxes
 *   - pre.def   for WebIDL definitions
 *   - table.def for tables that define other entities (e.g. CSS properties)
 *   - dl.def    for definition lists that define other entitles (e.g. HTML elements)
 *
 * Numbering
 *   - .secno for section numbers in .toc and headings (<span class='secno'>3.2</span>)
 *   - .marker for source-inserted example/figure/issue numbers (<span class='marker'>Issue 4</span>)
 *   - ::before styled for CSS-generated issue/example/figure numbers:
 *     -> Documents wishing to use this only need to add
 *        figcaption::before,
 *        .caption::before { content: "Figure "  counter(figure) " ";  }
 *        .example::before { content: "Example " counter(example) " "; }
 *        .issue::before   { content: "Issue "   counter(issue) " ";   }
 *
 * Header Stuff (ignore, just don't conflict with these classes)
 *   - .head for the header
 *   - .copyright for the copyright
 *
 * Outdated warning for old specs
 *
 * Miscellaneous
 *   - .overlarge for things that should be as wide as possible, even if
 *     that overflows the body text area. This can be used on an item or
 *     on its container, depending on the effect desired.
 *     Note that this styling basically doesn't help at all when printing,
 *     since A4 paper isn't much wider than the max-width here.
 *     It's better to design things to fit into a narrower measure if possible.
 *
 *   - js-added ToC jump links (see fixup.js)
 *
 ******************************************************************************/

/* color variables included separately for reliability */

/******************************************************************************/
/*                                    Body                                    */
/******************************************************************************/

	html {
	}

	body {
		counter-reset: example figure issue;

		/* Layout */
		max-width: 50em;			  /* limit line length to 50em for readability   */
		margin: 0 auto;				/* center text within page                    */
		padding: 1.6em 1.5em 2em 50px; /* assume 16px font size for downlevel clients */
		padding: 1.6em 1.5em 2em calc(26px + 1.5em); /* leave space for status flag    */

		/* Typography */
		line-height: 1.5;
		font-family: sans-serif;
		widows: 2;
		orphans: 2;
		word-wrap: break-word;
		overflow-wrap: break-word;
		hyphens: auto;

		color: black;
		color: var(--text);
		background: white top left fixed no-repeat;
		background: var(--bg) top left fixed no-repeat;
		background-size: 25px auto;
	}


/******************************************************************************/
/*                         Front Matter & Navigation                          */
/******************************************************************************/

/** Header ********************************************************************/

	div.head { margin-bottom: 1em; }
	div.head hr { border-style: solid; }

	div.head h1 {
		font-weight: bold;
		margin: 0 0 .1em;
		font-size: 220%;
	}

	div.head h2 { margin-bottom: 1.5em;}

/** W3C Logo ******************************************************************/

	.head .logo {
		float: right;
		margin: 0.4rem 0 0.2rem .4rem;
	}

	.head img[src*="logos/W3C"] {
		display: block;
		border: solid #1a5e9a;
		border: solid var(--logo-bg);
		border-width: .65rem .7rem .6rem;
		border-radius: .4rem;
		background: #1a5e9a;
		background: var(--logo-bg);
		color: white;
		color: var(--logo-text);
		font-weight: bold;
	}

	.head a:hover > img[src*="logos/W3C"],
	.head a:focus > img[src*="logos/W3C"] {
		opacity: .8;
	}

	.head a:active > img[src*="logos/W3C"] {
		background: #c00;
		background: var(--logo-active-bg);
		border-color: #c00;
		border-color: var(--logo-active-bg);
	}

	/* see also additional rules in Link Styling section */

/** Copyright *****************************************************************/

	p.copyright,
	p.copyright small { font-size: small; }

/** Back to Top / ToC Toggle **************************************************/

	@media print {
		#toc-nav {
			display: none;
		}
	}
	@media not print {
		#toc-nav {
			position: fixed;
			z-index: 3;
			bottom: 0; left: 0;
			margin: 0;
			min-width: 1.33em;
			border-top-right-radius: 2rem;
			box-shadow: 0 0 2px;
			font-size: 1.5em;
		}
		#toc-nav > a {
			display: block;
			white-space: nowrap;

			height: 1.33em;
			padding: .1em 0.3em;
			margin: 0;

			box-shadow: 0 0 2px;
			border: none;
			border-top-right-radius: 1.33em;

			color: #707070;
			color: var(--tocnav-normal-text);
			background: white;
			background: var(--tocnav-normal-bg);
		}
		#toc-nav > a:hover,
		#toc-nav > a:focus {
			color: black;
			color: var(--tocnav-hover-text);
			background: #f8f8f8;
			background: var(--tocnav-hover-bg);
		}
		#toc-nav > a:active {
			color: #c00;
			color: var(--tocnav-active-text);
			background: white;
			background: var(--tocnav-active-bg);
		}

		#toc-nav > #toc-jump {
			padding-bottom: 2em;
			margin-bottom: -1.9em;
		}

		/* statusbar gets in the way on keyboard focus; remove once browsers fix */
		#toc-nav > a[href="#toc"]:not(:hover):focus:last-child {
			padding-bottom: 1.5rem;
		}

		#toc-nav:not(:hover) > a:not(:focus) > span + span {
			/* Ideally this uses :focus-within on #toc-nav */
			display: none;
		}
		#toc-nav > a > span + span {
			padding-right: 0.2em;
		}
	}

/** ToC Sidebar ***************************************************************/

	/* Floating sidebar */
	@media screen {
		body.toc-sidebar #toc {
			position: fixed;
			top: 0; bottom: 0;
			left: 0;
			width: 23.5em;
			max-width: 80%;
			max-width: calc(100% - 2em - 26px);
			overflow: auto;
			padding: 0 1em;
			padding-left: 42px;
			padding-left: calc(1em + 26px);
			color: black;
			color: var(--tocsidebar-text);
			background: inherit;
			background-color: #f7f8f9;
			background-color: var(--tocsidebar-bg);
			z-index: 1;
			box-shadow: -.1em 0 .25em rgba(0,0,0,.1) inset;
			box-shadow: -.1em 0 .25em var(--tocsidebar-shadow) inset;
		}
		body.toc-sidebar #toc h2 {
			margin-top: .8rem;
			font-variant: small-caps;
			font-variant: all-small-caps;
			text-transform: lowercase;
			font-weight: bold;
			color: gray;
			color: hsla(203,20%,40%,.7);
			color: var(--tocsidebar-heading-text);
		}
		body.toc-sidebar #toc-jump:not(:focus) {
			width: 0;
			height: 0;
			padding: 0;
			position: absolute;
			overflow: hidden;
		}
	}
	/* Hide main scroller when only the ToC is visible anyway */
	@media screen and (max-width: 28em) {
		body.toc-sidebar {
			overflow: hidden;
		}
	}

	/* Sidebar with its own space */
	@media screen and (min-width: 78em) {
		body:not(.toc-inline) #toc {
			position: fixed;
			top: 0; bottom: 0;
			left: 0;
			width: 23.5em;
			overflow: auto;
			padding: 0 1em;
			padding-left: 42px;
			padding-left: calc(1em + 26px);
			color: black;
			color: var(--tocsidebar-text);
			background: inherit;
			background-color: #f7f8f9;
			background-color: var(--tocsidebar-bg);
			z-index: 1;
			box-shadow: -.1em 0 .25em rgba(0,0,0,.1) inset;
			box-shadow: -.1em 0 .25em var(--tocsidebar-shadow) inset;
		}
		body:not(.toc-inline) #toc h2 {
			margin-top: .8rem;
			font-variant: small-caps;
			font-variant: all-small-caps;
			text-transform: lowercase;
			font-weight: bold;
			color: gray;
			color: hsla(203,20%,40%,.7);
			color: var(--tocsidebar-heading-text);
		}

		body:not(.toc-inline) {
			padding-left: 29em;
		}
		/* See also Overflow section at the bottom */

		body:not(.toc-inline) #toc-jump:not(:focus) {
			width: 0;
			height: 0;
			padding: 0;
			position: absolute;
			overflow: hidden;
		}
	}
	@media screen and (min-width: 90em) {
		body:not(.toc-inline) {
			margin: 0 4em;
		}
	}

/******************************************************************************/
/*                                Sectioning                                  */
/******************************************************************************/

/** Headings ******************************************************************/

	h1, h2, h3, h4, h5, h6, dt {
		page-break-after: avoid;
		page-break-inside: avoid;
		font: 100% sans-serif;   /* Reset all font styling to clear out UA styles */
		font-family: inherit;	/* Inherit the font family. */
		line-height: 1.2;		/* Keep wrapped headings compact */
		hyphens: manual;		/* Hyphenated headings look weird */
	}

	h2, h3, h4, h5, h6 {
		margin-top: 3rem;
	}

	h1, h2, h3 {
		color: #005A9C;
		color: var(--heading-text);
	}

	h1 { font-size: 170%; }
	h2 { font-size: 140%; }
	h3 { font-size: 120%; }
	h4 { font-weight: bold; }
	h5 { font-style: italic; }
	h6 { font-variant: small-caps; }
	dt { font-weight: bold; }

/** Subheadings ***************************************************************/

	h1 + h2,
	#profile-and-date {
		/* #profile-and-date is a subtitle in an H2 under the H1 */
		margin-top: 0;
	}
	h2 + h3,
	h3 + h4,
	h4 + h5,
	h5 + h6 {
		margin-top: 1.2em; /* = 1 x line-height */
	}

/** Section divider ***********************************************************/

	:not(.head) > :not(.head) + hr {
		font-size: 1.5em;
		text-align: center;
		margin: 1em auto;
		height: auto;
		color: black;
		color: var(--hr-text);
		border: transparent solid 0;
		background: transparent;
	}
	:not(.head) > hr::before {
		content: "\2727\2003\2003\2727\2003\2003\2727";
	}

/******************************************************************************/
/*                            Paragraphs and Lists                            */
/******************************************************************************/

	p {
		margin: 1em 0;
	}

	dd > p:first-child,
	li > p:first-child {
		margin-top: 0;
	}

	ul, ol {
		margin-left: 0;
		padding-left: 2em;
	}

	li {
		margin: 0.25em 0 0.5em;
		padding: 0;
	}

	dl dd {
		margin: 0 0 .5em 2em;
	}

	.head dd + dd { /* compact for header */
		margin-top: -.5em;
	}

	/* Style for algorithms */
	ol.algorithm ol:not(.algorithm),
	.algorithm > ol ol:not(.algorithm) {
	border-left: 0.5em solid #DEF;
	border-left: 0.5em solid var(--algo-border);
	}

	/* Put nice boxes around each algorithm. */
	[data-algorithm]:not(.heading) {
	 padding: .5em;
	 border: thin solid #ddd;
	 border: thin solid var(--algo-border);
	 border-radius: .5em;
	 margin: .5em calc(-0.5em - 1px);
	}
	[data-algorithm]:not(.heading) > :first-child {
	 margin-top: 0;
	}
	[data-algorithm]:not(.heading) > :last-child {
	 margin-bottom: 0;
	}

	/* Style for switch/case <dl>s */
	dl.switch > dd > ol.only,
	dl.switch > dd > .only > ol {
	margin-left: 0;
	}
	dl.switch > dd > ol.algorithm,
	dl.switch > dd > .algorithm > ol {
	margin-left: -2em;
	}
	dl.switch {
	padding-left: 2em;
	}
	dl.switch > dt {
	text-indent: -1.5em;
	margin-top: 1em;
	}
	dl.switch > dt + dt {
	margin-top: 0;
	}
	dl.switch > dt::before {
	content: '\21AA';
	padding: 0 0.5em 0 0;
	display: inline-block;
	width: 1em;
	text-align: right;
	line-height: 0.5em;
	}

/** Terminology Markup ********************************************************/


/******************************************************************************/
/*                                 Inline Markup                              */
/******************************************************************************/

/** Terminology Markup ********************************************************/
	dfn   { /* Defining instance */
		font-weight: bolder;
	}
	a > i { /* Instance of term */
		font-style: normal;
	}
	dt dfn code, code.idl {
		font-size: inherit;
	}
	dfn var {
		font-style: normal;
	}

/** Change Marking ************************************************************/

	del {
		color: #aa0000;
		color: var(--del-text);
		background: transparent;
		background: var(--del-bg);
		text-decoration: line-through;
	}
	ins {
		color: #006100;
		color: var(--ins-text);
		background: transparent;
		background: var(--ins-bg);
		text-decoration: underline;
	}

	/* for amendments (candidate/proposed changes) */

	.amendment ins, .correction ins, .addition ins,
	ins[class^=c] {
		text-decoration-style: dotted;
	}
	.amendment del, .correction del, .addition del,
	del[class^=c] {
		text-decoration-style: dotted;
	}
	.amendment.proposed ins, .correction.proposed ins, .addition.proposed ins,
	ins[class^=c].proposed {
		text-decoration-style: double;
	}
	.amendment.proposed del, .correction.proposed del, .addition.proposed del,
	del[class^=c].proposed {
		text-decoration-style: double;
	}

/** Miscellaneous improvements to inline formatting ***************************/

	sup {
		vertical-align: super;
		font-size: 80%
	}

/******************************************************************************/
/*                                    Code                                    */
/******************************************************************************/

/** General monospace/pre rules ***********************************************/

	pre, code, samp {
		font-family: Menlo, Consolas, "DejaVu Sans Mono", Monaco, monospace;
		font-size: .9em;
		hyphens: none;
		text-transform: none;
		text-align: left;
		text-align: start;
		font-variant: normal;
		orphans: 3;
		widows: 3;
		page-break-before: avoid;
	}
	pre code,
	code code {
		font-size: 100%;
	}

	pre {
		margin-top: 1em;
		margin-bottom: 1em;
		overflow: auto;
	}

/** Inline Code fragments *****************************************************/

	/* Do something nice. */

/******************************************************************************/
/*                                    Links                                   */
/******************************************************************************/

/** General Hyperlinks ********************************************************/

	/* We hyperlink a lot, so make it less intrusive */
	a[href] {
		color: #034575;
		color: var(--a-normal-text);
		text-decoration: underline #707070;
		text-decoration: underline var(--a-normal-underline);
		text-decoration-skip-ink: none;
	}
	a:visited {
		color: #034575;
		color: var(--a-visited-text);
		text-decoration-color: #bbb;
		text-decoration-color: var(--a-visited-underline);
	}

	/* Indicate interaction with the link */
	a[href]:focus,
	a[href]:hover {
		text-decoration-thickness: 2px;
	}
	a[href]:active {
		color: #c00;
		color: var(--a-active-text);
		text-decoration-color: #c00;
		text-decoration-color: var(--a-active-underline);
	}

	/* Backout above styling for W3C logo */
	.head .logo,
	.head .logo a {
		border: none;
		text-decoration: none;
		background: transparent;
	}

/******************************************************************************/
/*                                    Images                                  */
/******************************************************************************/

	img {
		border-style: none;
	}

	img, svg {
		/* Intentionally not color-scheme aware. */
		background: white;
	}

	/* For autogen numbers, add
	  .caption::before, figcaption::before { content: "Figure " counter(figure) ". "; }
	*/

	figure, .figure, .sidefigure {
		page-break-inside: avoid;
		text-align: center;
		margin: 2.5em 0;
	}
	.figure img,	.sidefigure img,	figure img,
	.figure object, .sidefigure object, figure object {
		max-width: 100%;
		margin: auto;
		height: auto;
	}
	.figure pre, .sidefigure pre, figure pre {
		text-align: left;
		display: table;
		margin: 1em auto;
	}
	.figure table, figure table {
		margin: auto;
	}
	@media screen and (min-width: 20em) {
		.sidefigure {
			float: right;
			width: 50%;
			margin: 0 0 0.5em 0.5em;
		}
	}
	.caption, figcaption, caption {
		font-style: italic;
		font-size: 90%;
	}
	.caption::before, figcaption::before, figcaption > .marker {
		font-weight: bold;
	}
	.caption, figcaption {
		counter-increment: figure;
	}

	/* DL list is indented 2em, but figure inside it is not */
	dd > .figure, dd > figure { margin-left: -2em; }

/******************************************************************************/
/*                             Colored Boxes                                  */
/******************************************************************************/

	.issue, .note, .example, .assertion, .advisement, blockquote,
	.amendment, .correction, .addition {
		margin: 1em auto;
		padding: .5em;
		border: .5em;
		border-left-style: solid;
		page-break-inside: avoid;
	}
	span.issue, span.note {
		padding: .1em .5em .15em;
		border-right-style: solid;
	}

	blockquote > :first-child,
	.note  > p:first-child,
	.issue > p:first-child,
	.amendment > p:first-child,
	.correction > p:first-child,
	.addition > p:first-child {
		margin-top: 0;
	}
	blockquote > :last-child,
	.note  > p:last-child,
	.issue > p:last-child,
	.amendment > p:last-child,
	.correction > p:last-child,
	.addition > p:last-child {
		margin-bottom: 0;
	}


	.issue::before, .issue > .marker,
	.example::before, .example > .marker,
	.note::before, .note > .marker,
	details.note > summary > .marker,
	.amendment::before, .amendment > .marker,
	details.amendment > summary > .marker,
	.addition::before, .addition > .marker,
	addition.amendment > summary > .marker,
	.correction::before, .correction > .marker,
	correction.amendment > summary > .marker
	{
		text-transform: uppercase;
		padding-right: 1em;
	}

	.example::before, .example > .marker {
		display: block;
		padding-right: 0em;
	}

/** Blockquotes ***************************************************************/

	blockquote {
		border-color: silver;
		border-color: var(--blockquote-border);
		background: transparent;
		background: var(--blockquote-bg);
		color: currentcolor;
		color: var(--blockquote-text);
	}

/** Open issue ****************************************************************/

	.issue {
		border-color: #e05252;
		border-color: var(--issue-border);
		background: #fbe9e9;
		background: var(--issue-bg);
		color: black;
		color: var(--issue-text);
		counter-increment: issue;
		overflow: auto;
	}
	.issue::before, .issue > .marker {
		color: #831616;
		color: var(--issueheading-text);
	}
	/* Add .issue::before { content: "Issue " counter(issue) " "; } for autogen numbers,
	  or use class="marker" to mark up the issue number in source. */

/** Example *******************************************************************/

	.example {
		border-color: #e0cb52;
		border-color: var(--example-border);
		background: #fcfaee;
		background: var(--example-bg);
		color: black;
		color: var(--example-text);
		counter-increment: example;
		overflow: auto;
		clear: both;
	}
	.example::before, .example > .marker {
		color: #574b0f;
		color: var(--exampleheading-text);
	}
	/* Add .example::before { content: "Example " counter(example) " "; } for autogen numbers,
	  or use class="marker" to mark up the example number in source. */

/** Non-normative Note ********************************************************/

	.note {
		border-color: #52e052;
		border-color: var(--note-border);
		background: #e9fbe9;
		background: var(--note-bg);
		color: black;
		color: var(--note-text);
		overflow: auto;
	}

	.note::before, .note > .marker,
	details.note > summary {
		color: hsl(120, 70%, 30%);
		color: var(--noteheading-text);
	}
	/* Add .note::before { content: "Note "; } for autogen label,
	  or use class="marker" to mark up the label in source. */

	details.note[open] > summary {
		border-bottom: 1px silver solid;
		border-bottom: 1px var(--notesummary-underline) solid;
	}

/** Assertion Box *************************************************************/
	/*  for assertions in algorithms */

	.assertion {
		border-color: #AAA;
		border-color: var(--assertion-border);
		background: #EEE;
		background: var(--assertion-bg);
		color: black;
		color: var(--assertion-text);
	}

/** Advisement Box ************************************************************/
	/*  for attention-grabbing normative statements */

	.advisement {
		border-color: orange;
		border-color: var(--advisement-border);
		border-style: none solid;
		background: #fec;
		background: var(--advisement-bg);
		color: black;
		color: var(--advisement-text);
	}
	strong.advisement {
		display: block;
		text-align: center;
	}
	.advisement::before, .advisement > .marker {
		color: #b35f00;
		color: var(--advisementheading-text);
	}

/** Amendment Box *************************************************************/

	.amendment, .correction, .addition {
		border-color: #330099;
		border-color: var(--amendment-border);
		background: #F5F0FF;
		background: var(--amendment-bg);
		color: black;
		color: var(--amendment-text);
	}
	.amendment.proposed, .correction.proposed, .addition.proposed {
		border-style: solid;
		border-block-width: 0.25em;
	}
	.amendment::before, .amendment > .marker,
	details.amendment > summary::before, details.amendment > summary > .marker,
	.correction::before, .correction > .marker,
	details.correction > summary::before, details.correction > summary > .marker,
	.addition::before, .addition > .marker,
	details.addition > summary::before, details.addition > summary > .marker {
		color: #220066;
		color: var(--amendmentheading-text);
	}
	.amendment.proposed::before, .amendment.proposed > .marker,
	details.amendment.proposed > summary::before, details.amendment.proposed > summary > .marker,
	.correction.proposed::before, .correction.proposed > .marker,
	details.correction.proposed > summary::before, details.correction.proposed > summary > .marker,
	.addition.proposed::before, .addition.proposed > .marker,
	details.addition.proposed > summary::before, details.addition.proposed > summary > .marker {
		font-weight: bold;
	}

/** Spec Obsoletion Notice ****************************************************/
	/* obnoxious obsoletion notice for older/abandoned specs. */

	details {
		display: block;
	}
	summary {
		font-weight: bolder;
	}

	.annoying-warning:not(details),
	details.annoying-warning:not([open]) > summary,
	details.annoying-warning[open] {
		background: hsla(40,100%,50%,0.95);
		background: var(--warning-bg);
		color: black;
		color: var(--warning-text);
		padding: .75em 1em;
		border: red;
		border: var(--warning-border);
		border-style: solid none;
		box-shadow: 0 2px 8px black;
		text-align: center;
	}
	.annoying-warning :last-child {
		margin-bottom: 0;
	}

@media not print {
	details.annoying-warning[open] {
		position: fixed;
		left: 0;
		right: 0;
		bottom: 2em;
		z-index: 1000;
	}
}

	details.annoying-warning:not([open]) > summary {
		text-align: center;
	}

/** Entity Definition Boxes ***************************************************/

	.def {
		padding: .5em 1em;
		background: #def;
		background: var(--def-bg);
		margin: 1.2em 0;
		border-left: 0.5em solid #8ccbf2;
		border-left: 0.5em solid var(--def-border);
		color: black;
		color: var(--def-text);
	}

/******************************************************************************/
/*                                    Tables                                  */
/******************************************************************************/

	th, td {
		text-align: left;
		text-align: start;
	}

/** Property/Descriptor Definition Tables *************************************/

	table.def {
		/* inherits .def box styling, see above */
		width: 100%;
		border-spacing: 0;
	}

	table.def td,
	table.def th {
		padding: 0.5em;
		vertical-align: baseline;
		border-bottom: 1px solid #bbd7e9;
		border-bottom: 1px solid var(--defrow-border);
	}

	table.def > tbody > tr:last-child th,
	table.def > tbody > tr:last-child td {
		border-bottom: 0;
	}

	table.def th {
		font-style: italic;
		font-weight: normal;
		padding-left: 1em;
		width: 3em;
	}

	/* For when values are extra-complex and need formatting for readability */
	table td.pre {
		white-space: pre-wrap;
	}

	/* A footnote at the bottom of a def table */
	table.def td.footnote {
		padding-top: 0.6em;
	}
	table.def td.footnote::before {
		content: " ";
		display: block;
		height: 0.6em;
		width: 4em;
		border-top: thin solid;
	}

/** Data tables (and properly marked-up index tables) *************************/
	/*
		<table class="data"> highlights structural relationships in a table
		when correct markup is used (e.g. thead/tbody, th vs. td, scope attribute)

		Use class="complex data" for particularly complicated tables --
		(This will draw more lines: busier, but clearer.)

		Use class="long" on table cells with paragraph-like contents
		(This will adjust text alignment accordingly.)
		Alternately use class="longlastcol" on tables, to have the last column assume "long".
	*/

	table {
		word-wrap: normal;
		overflow-wrap: normal;
		hyphens: manual;
	}

	table.data,
	table.index {
		margin: 1em auto;
		border-collapse: collapse;
		border: hidden;
		width: 100%;
	}
	table.data caption,
	table.index caption {
		max-width: 50em;
		margin: 0 auto 1em;
	}

	table.data td,  table.data th,
	table.index td, table.index th {
		padding: 0.5em 1em;
		border-width: 1px;
		border-color: silver;
		border-color: var(--datacell-border);
		border-top-style: solid;
	}

	table.data thead td:empty {
		padding: 0;
		border: 0;
	}

	table.data  thead,
	table.index thead,
	table.data  tbody,
	table.index tbody {
		border-bottom: 2px solid;
	}

	table.data colgroup,
	table.index colgroup {
		border-left: 2px solid;
	}

	table.data  tbody th:first-child,
	table.index tbody th:first-child  {
		border-right: 2px solid;
		border-top: 1px solid silver;
		border-top: 1px solid var(--datacell-border);
		padding-right: 1em;
	}

	table.data th[colspan],
	table.data td[colspan] {
		text-align: center;
	}

	table.complex.data th,
	table.complex.data td {
		border: 1px solid silver;
		border: 1px solid var(--datacell-border);
		text-align: center;
	}

	table.data.longlastcol td:last-child,
	table.data td.long {
		vertical-align: baseline;
		text-align: left;
	}

	table.data img {
		vertical-align: middle;
	}


/*
Alternate table alignment rules

	table.data,
	table.index {
		text-align: center;
	}

	table.data  thead th[scope="row"],
	table.index thead th[scope="row"] {
		text-align: right;
	}

	table.data  tbody th:first-child,
	table.index tbody th:first-child  {
		text-align: right;
	}

Possible extra rowspan handling

	table.data  tbody th[rowspan]:not([rowspan='1']),
	table.index tbody th[rowspan]:not([rowspan='1']),
	table.data  tbody td[rowspan]:not([rowspan='1']),
	table.index tbody td[rowspan]:not([rowspan='1']) {
		border-left: 1px solid silver;
	}

	table.data  tbody th[rowspan]:first-child,
	table.index tbody th[rowspan]:first-child,
	table.data  tbody td[rowspan]:first-child,
	table.index tbody td[rowspan]:first-child{
		border-left: 0;
		border-right: 1px solid silver;
	}
*/

/******************************************************************************/
/*                                  Indices                                   */
/******************************************************************************/


/** Table of Contents *********************************************************/

	.toc a {
		/* More spacing; use padding to make it part of the click target. */
		padding: 0.1rem 1px 0;
		/* Larger, more consistently-sized click target */
		display: block;
		/* Switch to using border-bottom for underlines */
		text-decoration: none;
		border-bottom: 1px solid;
		/* Reverse color scheme */
		color: black;
		color: var(--toclink-text);
		border-color: #3980b5;
		border-color: var(--toclink-underline);
	}
	.toc a:visited {
		color: black;
		color: var(--toclink-visited-text);
		border-color: #054572;
		border-color: var(--toclink-visited-underline);
	}
	.toc a:focus,
	.toc a:hover {
		background: rgba(75%, 75%, 75%, .25);
		background: var(--a-hover-bg);
		border-bottom-width: 3px;
		margin-bottom: -2px;
	}
	.toc a:not(:focus):not(:hover) {
		/* Allow colors to cascade through from link styling */
		border-bottom-color: transparent;
	}

	.toc, .toc ol, .toc ul, .toc li {
		list-style: none; /* Numbers must be inlined into source */
		/* because generated content isn't search/selectable and markers can't do multilevel yet */
		margin:  0;
		padding: 0;
	}
	.toc {
		line-height: 1.1em;
	}

	/* ToC not indented until third level, but font style & margins show hierarchy */
	.toc > li			{ font-weight: bold;   }
	.toc > li li		 { font-weight: normal; }
	.toc > li li li	  { font-size:   95%;	}
	.toc > li li li li	{ font-size:   90%;	}
	.toc > li li li li li { font-size:   85%;	}

	/* @supports not (display:grid) { */
		.toc > li			{ margin: 1.5rem 0;	}
		.toc > li li		 { margin: 0.3rem 0;	}
		.toc > li li li	  { margin-left: 2rem;   }

		/* Section numbers in a column of their own */
		.toc .secno {
			float: left;
			width: 4rem;
			white-space: nowrap;
		}
		.toc > li li li li .secno { font-size: 85%; }
		.toc > li li li li li .secno { font-size: 100%; }

		.toc li {
			clear: both;
		}

		:not(li) > .toc			 { margin-left:  5rem; }
		.toc .secno				 { margin-left: -5rem; }
		.toc > li li li .secno	  { margin-left: -7rem; }
		.toc > li li li li .secno	{ margin-left: -9rem; }
		.toc > li li li li li .secno { margin-left: -11rem; }

		/* Tighten up indentation in narrow ToCs */
		@media (max-width: 30em) {
			:not(li) > .toc			 { margin-left:  4rem; }
			.toc .secno				 { margin-left: -4rem; }
			.toc > li li li			 { margin-left:  1rem; }
			.toc > li li li .secno	  { margin-left: -5rem; }
			.toc > li li li li .secno	{ margin-left: -6rem; }
			.toc > li li li li li .secno { margin-left: -7rem; }
		}
		/* Loosen it on wide screens */
		@media screen and (min-width: 78em) {
			body:not(.toc-inline) :not(li) > .toc			 { margin-left:  4rem; }
			body:not(.toc-inline) .toc .secno				 { margin-left: -4rem; }
			body:not(.toc-inline) .toc > li li li			 { margin-left:  1rem; }
			body:not(.toc-inline) .toc > li li li .secno	  { margin-left: -5rem; }
			body:not(.toc-inline) .toc > li li li li .secno	{ margin-left: -6rem; }
			body:not(.toc-inline) .toc > li li li li li .secno { margin-left: -7rem; }
	}
	/* } */

	@supports (display:grid) and (display:contents) {
		/* Use #toc over .toc to override non-@supports rules. */
		#toc {
			display: grid;
			align-content: start;
			grid-template-columns: auto 1fr;
			grid-column-gap: 1rem;
			column-gap: 1rem;
			grid-row-gap: .6rem;
			row-gap: .6rem;
		}
		#toc h2 {
			grid-column: 1 / -1;
			margin-bottom: 0;
		}
		#toc ol,
		#toc li,
		#toc a {
			display: contents;
			/* Switch <a> to subgrid when supported */
		}
		#toc span {
			margin: 0;
		}
		#toc > .toc > li > a > span {
			/* The spans of the top-level list,
			  comprising the first items of each top-level section. */
			margin-top: 1.1rem;
		}
		#toc#toc .secno { /* Ugh, need more specificity to override base.css */
			grid-column: 1;
			width: auto;
			margin-left: 0;
		}
		#toc .content {
			grid-column: 2;
			width: auto;
			margin-right: 1rem;
			border-bottom: 3px solid transparent;
			margin-bottom: -3px;
		}
		#toc .content:hover,
		#toc .content:focus {
			background: rgba(75%, 75%, 75%, .25);
			background: var(--a-hover-bg);
			border-bottom-color: #054572;
			border-bottom-color: var(--toclink-underline);
		}
		#toc li li li .content {
			margin-left: 1rem;
		}
		#toc li li li li .content {
			margin-left: 2rem;
		}
	}


/** Index *********************************************************************/

	/* Index Lists: Layout */
	ul.index	  { margin-left: 0; columns: 15em; text-indent: 1em hanging; }
	ul.index li	{ margin-left: 0; list-style: none; break-inside: avoid; }
	ul.index li li { margin-left: 1em; }
	ul.index dl	{ margin-top: 0; }
	ul.index dt	{ margin: .2em 0 .2em 20px;}
	ul.index dd	{ margin: .2em 0 .2em 40px;}
	/* Index Lists: Typography */
	ul.index ul,
	ul.index dl { font-size: smaller; }
	@media not print {
		ul.index li a + span {
			white-space: nowrap;
			color: transparent; }
		ul.index li a:hover + span,
		ul.index li a:focus + span {
			color: #707070;
			color: var(--indexinfo-text);
		}
	}

/** Index Tables *****************************************************/
	/* See also the data table styling section, which this effectively subclasses */

	table.index {
		font-size: small;
		border-collapse: collapse;
		border-spacing: 0;
		text-align: left;
		margin: 1em 0;
	}

	table.index td,
	table.index th {
		padding: 0.4em;
	}

	table.index tr:hover td:not([rowspan]),
	table.index tr:hover th:not([rowspan]) {
		color: black;
		color: var(--indextable-hover-text);
		background: #f7f8f9;
		background: var(--indextable-hover-bg);
	}

	/* The link in the first column in the property table (formerly a TD) */
	table.index th:first-child a {
		font-weight: bold;
	}

/** Outdated warning **********************************************************/

.outdated-spec {
	color: black;
	color: var(--outdatedspec-text);
	background-color: rgba(0,0,0,0.5);
	background-color: var(--outdatedspec-bg);
}

.outdated-warning {
	position: fixed;
	bottom: 50%;
	left: 0;
	right: 0;
	margin: 0 auto;
	width: 50%;
	background: maroon;
	background: var(--outdated-bg);
	color: white;
	color: var(--outdated-text);
	border-radius: 1em;
	box-shadow: 0 0 1em red;
	box-shadow: 0 0 1em var(--outdated-shadow);
	padding: 2em;
	text-align: center;
	z-index: 2;
}

.outdated-warning a {
	color: currentcolor;
	background: transparent;
}

.edited-rec-warning {
	background: darkorange;
	background: var(--editedrec-bg);
	box-shadow: 0 0 1em;
}

.outdated-warning button {
	color: var(--outdated-text);
	border-radius: 1em;
	box-shadow: 0 0 1em red;
	box-shadow: 0 0 1em var(--outdated-shadow);
	padding: 2em;
	text-align: center;
	z-index: 2;
}

.outdated-warning a {
	color: currentcolor;
	background: transparent;
}

.edited-rec-warning {
	background: darkorange;
	background: var(--editedrec-bg);
	box-shadow: 0 0 1em;
}

.outdated-warning button {
	position: absolute;
	top: 0;
	right:0;
	margin: 0;
	border: 0;
	padding: 0.25em 0.5em;
	background: transparent;
	color: white;
	color: var(--outdated-text);
	font:1em sans-serif;
	text-align:center;
}

.outdated-warning span {
	display: block;
}

.outdated-collapsed {
	bottom: 0;
	border-radius: 0;
	width: 100%;
	padding: 0;
}

/******************************************************************************/
/*                                    Print                                   */
/******************************************************************************/

	@media print {
		/* Pages have their own margins. */
		html {
			margin: 0;
		}
		/* Serif for print. */
		body {
			font-family: serif;
		}

		.outdated-warning {
			position: absolute;
			border-style: solid;
			border-color: red;
		}

		.outdated-warning input {
			display: none;
		}
	}
	@page {
		margin: 1.5cm 1.1cm;
	}



/******************************************************************************/
/*                             Overflow Control                               */
/******************************************************************************/

	.figure .caption, .sidefigure .caption, figcaption {
		/* in case figure is overlarge, limit caption to 50em */
		max-width: 50rem;
		margin-left: auto;
		margin-right: auto;
	}
	.overlarge {
		/* Magic to create good item positioning:
		  "content column" is 50ems wide at max; less on smaller screens.
		  Extra space (after ToC + content) is empty on the right.

		  1. When item < content column, centers item in column.
		  2. When content < item < available, left-aligns.
		  3. When item > available, fills available + scroll bar.
		*/
		display: grid;
		grid-template-columns: minmax(0, 50em);
	}
	.overlarge > table {
		/* limit preferred width of table */
		max-width: 50em;
		margin-left: auto;
		margin-right: auto;
	}

	@media (min-width: 55em) {
		.overlarge {
			margin-right: calc(13px + 26.5rem - 50vw);
			max-width: none;
		}
	}
	@media screen and (min-width: 78em) {
		body:not(.toc-inline) .overlarge {
			/* 30.5em body padding 50em content area */
			margin-right: calc(40em - 50vw) !important;
		}
	}
	@media screen and (min-width: 90em) {
		body:not(.toc-inline) .overlarge {
			/* 4em html margin 30.5em body padding 50em content area */
			margin-right: calc(84.5em - 100vw) !important;
		}
	}

	@media not print {
		.overlarge {
			overflow-x: auto;
			/* See Lea Verou's explanation background-attachment:
			* http://lea.verou.me/2012/04/background-attachment-local/
			*
			background: top left  / 4em 100% linear-gradient(to right,  #ffffff, rgba(255, 255, 255, 0)) local,
						top right / 4em 100% linear-gradient(to left, #ffffff, rgba(255, 255, 255, 0)) local,
						top left  / 1em 100% linear-gradient(to right,  #c3c3c5, rgba(195, 195, 197, 0)) scroll,
						top right / 1em 100% linear-gradient(to left, #c3c3c5, rgba(195, 195, 197, 0)) scroll,
						white;
			background-repeat: no-repeat;
			*/
		}
	}
</style>
<style>
    table, th, td {
      border: 1px solid black;
      border-collapse: collapse;
      vertical-align: top;
    }
    th, td {
      border-left: none;
      border-right: none;
      padding: 0px 10px;
    }
    th {
      text-align: center;
    }

    del { background: #fcc; color: #000; text-decoration: line-through; }
    ins { background: #cfc; color: #000; }
    blockquote .highlight:not(.idl) { background: initial; margin: initial; padding: 0.5em }
    blockquote ul { background: inherit; }
    blockquote code.highlight:not(.idl) { padding: initial; }
    blockquote c-[a] { color: inherit; } /* Keyword.Declaration */
    blockquote c-[b] { color: inherit; } /* Keyword.Type */
    blockquote c-[c] { color: inherit; } /* Comment */
    blockquote c-[d] { color: inherit; } /* Comment.Multiline */
    blockquote c-[e] { color: inherit; } /* Name.Attribute */
    blockquote c-[f] { color: inherit; } /* Name.Tag */
    blockquote c-[g] { color: inherit; } /* Name.Variable */
    blockquote c-[k] { color: inherit; } /* Keyword */
    blockquote c-[l] { color: inherit; } /* Literal */
    blockquote c-[m] { color: inherit; } /* Literal.Number */
    blockquote c-[n] { color: inherit; } /* Name */
    blockquote c-[o] { color: inherit; } /* Operator */
    blockquote c-[p] { color: inherit; } /* Punctuation */
    blockquote c-[s] { color: inherit; } /* Literal.String */
    blockquote c-[t] { color: inherit; } /* Literal.String.Single */
    blockquote c-[u] { color: inherit; } /* Literal.String.Double */
    blockquote c-[cp] { color: inherit; } /* Comment.Preproc */
    blockquote c-[c1] { color: inherit; } /* Comment.Single */
    blockquote c-[cs] { color: inherit; } /* Comment.Special */
    blockquote c-[kc] { color: inherit; } /* Keyword.Constant */
    blockquote c-[kn] { color: inherit; } /* Keyword.Namespace */
    blockquote c-[kp] { color: inherit; } /* Keyword.Pseudo */
    blockquote c-[kr] { color: inherit; } /* Keyword.Reserved */
    blockquote c-[ld] { color: inherit; } /* Literal.Date */
    blockquote c-[nc] { color: inherit; } /* Name.Class */
    blockquote c-[no] { color: inherit; } /* Name.Constant */
    blockquote c-[nd] { color: inherit; } /* Name.Decorator */
    blockquote c-[ni] { color: inherit; } /* Name.Entity */
    blockquote c-[ne] { color: inherit; } /* Name.Exception */
    blockquote c-[nf] { color: inherit; } /* Name.Function */
    blockquote c-[nl] { color: inherit; } /* Name.Label */
    blockquote c-[nn] { color: inherit; } /* Name.Namespace */
    blockquote c-[py] { color: inherit; } /* Name.Property */
    blockquote c-[ow] { color: inherit; } /* Operator.Word */
    blockquote c-[mb] { color: inherit; } /* Literal.Number.Bin */
    blockquote c-[mf] { color: inherit; } /* Literal.Number.Float */
    blockquote c-[mh] { color: inherit; } /* Literal.Number.Hex */
    blockquote c-[mi] { color: inherit; } /* Literal.Number.Integer */
    blockquote c-[mo] { color: inherit; } /* Literal.Number.Oct */
    blockquote c-[sb] { color: inherit; } /* Literal.String.Backtick */
    blockquote c-[sc] { color: inherit; } /* Literal.String.Char */
    blockquote c-[sd] { color: inherit; } /* Literal.String.Doc */
    blockquote c-[se] { color: inherit; } /* Literal.String.Escape */
    blockquote c-[sh] { color: inherit; } /* Literal.String.Heredoc */
    blockquote c-[si] { color: inherit; } /* Literal.String.Interpol */
    blockquote c-[sx] { color: inherit; } /* Literal.String.Other */
    blockquote c-[sr] { color: inherit; } /* Literal.String.Regex */
    blockquote c-[ss] { color: inherit; } /* Literal.String.Symbol */
    blockquote c-[vc] { color: inherit; } /* Name.Variable.Class */
    blockquote c-[vg] { color: inherit; } /* Name.Variable.Global */
    blockquote c-[vi] { color: inherit; } /* Name.Variable.Instance */
    blockquote c-[il] { color: inherit; } /* Literal.Number.Integer.Long */
  </style>
  <meta content="Bikeshed version 82ce88815, updated Thu Sep 7 16:33:55 2023 -0700" name="generator">
  <link href="http://wg21.link/p3715r0" rel="canonical">
  <link href="https://isocpp.org/favicon.ico" rel="icon">
<style>/* Boilerplate: style-autolinks */
.css.css, .property.property, .descriptor.descriptor {
    color: var(--a-normal-text);
    font-size: inherit;
    font-family: inherit;
}
.css::before, .property::before, .descriptor::before {
    content: "‘";
}
.css::after, .property::after, .descriptor::after {
    content: "’";
}
.property, .descriptor {
    /* Don't wrap property and descriptor names */
    white-space: nowrap;
}
.type { /* CSS value <type> */
    font-style: italic;
}
pre .property::before, pre .property::after {
    content: "";
}
[data-link-type="property"]::before,
[data-link-type="propdesc"]::before,
[data-link-type="descriptor"]::before,
[data-link-type="value"]::before,
[data-link-type="function"]::before,
[data-link-type="at-rule"]::before,
[data-link-type="selector"]::before,
[data-link-type="maybe"]::before {
    content: "‘";
}
[data-link-type="property"]::after,
[data-link-type="propdesc"]::after,
[data-link-type="descriptor"]::after,
[data-link-type="value"]::after,
[data-link-type="function"]::after,
[data-link-type="at-rule"]::after,
[data-link-type="selector"]::after,
[data-link-type="maybe"]::after {
    content: "’";
}

[data-link-type].production::before,
[data-link-type].production::after,
.prod [data-link-type]::before,
.prod [data-link-type]::after {
    content: "";
}

[data-link-type=element],
[data-link-type=element-attr] {
    font-family: Menlo, Consolas, "DejaVu Sans Mono", monospace;
    font-size: .9em;
}
[data-link-type=element]::before { content: "<" }
[data-link-type=element]::after  { content: ">" }

[data-link-type=biblio] {
    white-space: pre;
}

@media (prefers-color-scheme: dark) {
    :root {
        --selflink-text: black;
        --selflink-bg: silver;
        --selflink-hover-text: white;
    }
}
</style>
<style>/* Boilerplate: style-colors */

/* Any --*-text not paired with a --*-bg is assumed to have a transparent bg */
:root {
    color-scheme: light dark;

    --text: black;
    --bg: white;

    --unofficial-watermark: url(https://www.w3.org/StyleSheets/TR/2016/logos/UD-watermark);

    --logo-bg: #1a5e9a;
    --logo-active-bg: #c00;
    --logo-text: white;

    --tocnav-normal-text: #707070;
    --tocnav-normal-bg: var(--bg);
    --tocnav-hover-text: var(--tocnav-normal-text);
    --tocnav-hover-bg: #f8f8f8;
    --tocnav-active-text: #c00;
    --tocnav-active-bg: var(--tocnav-normal-bg);

    --tocsidebar-text: var(--text);
    --tocsidebar-bg: #f7f8f9;
    --tocsidebar-shadow: rgba(0,0,0,.1);
    --tocsidebar-heading-text: hsla(203,20%,40%,.7);

    --toclink-text: var(--text);
    --toclink-underline: #3980b5;
    --toclink-visited-text: var(--toclink-text);
    --toclink-visited-underline: #054572;

    --heading-text: #005a9c;

    --hr-text: var(--text);

    --algo-border: #def;

    --del-text: red;
    --del-bg: transparent;
    --ins-text: #080;
    --ins-bg: transparent;

    --a-normal-text: #034575;
    --a-normal-underline: #bbb;
    --a-visited-text: var(--a-normal-text);
    --a-visited-underline: #707070;
    --a-hover-bg: rgba(75%, 75%, 75%, .25);
    --a-active-text: #c00;
    --a-active-underline: #c00;

    --blockquote-border: silver;
    --blockquote-bg: transparent;
    --blockquote-text: currentcolor;

    --issue-border: #e05252;
    --issue-bg: #fbe9e9;
    --issue-text: var(--text);
    --issueheading-text: #831616;

    --example-border: #e0cb52;
    --example-bg: #fcfaee;
    --example-text: var(--text);
    --exampleheading-text: #574b0f;

    --note-border: #52e052;
    --note-bg: #e9fbe9;
    --note-text: var(--text);
    --noteheading-text: hsl(120, 70%, 30%);
    --notesummary-underline: silver;

    --assertion-border: #aaa;
    --assertion-bg: #eee;
    --assertion-text: black;

    --advisement-border: orange;
    --advisement-bg: #fec;
    --advisement-text: var(--text);
    --advisementheading-text: #b35f00;

    --warning-border: red;
    --warning-bg: hsla(40,100%,50%,0.95);
    --warning-text: var(--text);

    --amendment-border: #330099;
    --amendment-bg: #F5F0FF;
    --amendment-text: var(--text);
    --amendmentheading-text: #220066;

    --def-border: #8ccbf2;
    --def-bg: #def;
    --def-text: var(--text);
    --defrow-border: #bbd7e9;

    --datacell-border: silver;

    --indexinfo-text: #707070;

    --indextable-hover-text: black;
    --indextable-hover-bg: #f7f8f9;

    --outdatedspec-bg: rgba(0, 0, 0, .5);
    --outdatedspec-text: black;
    --outdated-bg: maroon;
    --outdated-text: white;
    --outdated-shadow: red;

    --editedrec-bg: darkorange;
}

@media (prefers-color-scheme: dark) {
    :root {
        --text: #ddd;
        --bg: black;

        --unofficial-watermark: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='400' height='400'%3E%3Cg fill='%23100808' transform='translate(200 200) rotate(-45) translate(-200 -200)' stroke='%23100808' stroke-width='3'%3E%3Ctext x='50%25' y='220' style='font: bold 70px sans-serif; text-anchor: middle; letter-spacing: 6px;'%3EUNOFFICIAL%3C/text%3E%3Ctext x='50%25' y='305' style='font: bold 70px sans-serif; text-anchor: middle; letter-spacing: 6px;'%3EDRAFT%3C/text%3E%3C/g%3E%3C/svg%3E");

        --logo-bg: #1a5e9a;
        --logo-active-bg: #c00;
        --logo-text: white;

        --tocnav-normal-text: #999;
        --tocnav-normal-bg: var(--bg);
        --tocnav-hover-text: var(--tocnav-normal-text);
        --tocnav-hover-bg: #080808;
        --tocnav-active-text: #f44;
        --tocnav-active-bg: var(--tocnav-normal-bg);

        --tocsidebar-text: var(--text);
        --tocsidebar-bg: #080808;
        --tocsidebar-shadow: rgba(255,255,255,.1);
        --tocsidebar-heading-text: hsla(203,20%,40%,.7);

        --toclink-text: var(--text);
        --toclink-underline: #6af;
        --toclink-visited-text: var(--toclink-text);
        --toclink-visited-underline: #054572;

        --heading-text: #8af;

        --hr-text: var(--text);

        --algo-border: #456;

        --del-text: #f44;
        --del-bg: transparent;
        --ins-text: #4a4;
        --ins-bg: transparent;

        --a-normal-text: #6af;
        --a-normal-underline: #555;
        --a-visited-text: var(--a-normal-text);
        --a-visited-underline: var(--a-normal-underline);
        --a-hover-bg: rgba(25%, 25%, 25%, .2);
        --a-active-text: #f44;
        --a-active-underline: var(--a-active-text);

        --borderedblock-bg: rgba(255, 255, 255, .05);

        --blockquote-border: silver;
        --blockquote-bg: var(--borderedblock-bg);
        --blockquote-text: currentcolor;

        --issue-border: #e05252;
        --issue-bg: var(--borderedblock-bg);
        --issue-text: var(--text);
        --issueheading-text: hsl(0deg, 70%, 70%);

        --example-border: hsl(50deg, 90%, 60%);
        --example-bg: var(--borderedblock-bg);
        --example-text: var(--text);
        --exampleheading-text: hsl(50deg, 70%, 70%);

        --note-border: hsl(120deg, 100%, 35%);
        --note-bg: var(--borderedblock-bg);
        --note-text: var(--text);
        --noteheading-text: hsl(120, 70%, 70%);
        --notesummary-underline: silver;

        --assertion-border: #444;
        --assertion-bg: var(--borderedblock-bg);
        --assertion-text: var(--text);

        --advisement-border: orange;
        --advisement-bg: #222218;
        --advisement-text: var(--text);
        --advisementheading-text: #f84;

        --warning-border: red;
        --warning-bg: hsla(40,100%,20%,0.95);
        --warning-text: var(--text);

        --amendment-border: #330099;
        --amendment-bg: #080010;
        --amendment-text: var(--text);
        --amendmentheading-text: #cc00ff;

        --def-border: #8ccbf2;
        --def-bg: #080818;
        --def-text: var(--text);
        --defrow-border: #136;

        --datacell-border: silver;

        --indexinfo-text: #aaa;

        --indextable-hover-text: var(--text);
        --indextable-hover-bg: #181818;

        --outdatedspec-bg: rgba(255, 255, 255, .5);
        --outdatedspec-text: black;
        --outdated-bg: maroon;
        --outdated-text: white;
        --outdated-shadow: red;

        --editedrec-bg: darkorange;
    }
    /* In case a transparent-bg image doesn't expect to be on a dark bg,
       which is quite common in practice... */
    img { background: white; }
}
</style>
<style>/* Boilerplate: style-counters */
body {
    counter-reset: example figure issue;
}
.issue {
    counter-increment: issue;
}
.issue:not(.no-marker)::before {
    content: "Issue " counter(issue);
}

.example {
    counter-increment: example;
}
.example:not(.no-marker)::before {
    content: "Example " counter(example);
}
.invalid.example:not(.no-marker)::before,
.illegal.example:not(.no-marker)::before {
    content: "Invalid Example" counter(example);
}

figcaption {
    counter-increment: figure;
}
figcaption:not(.no-marker)::before {
    content: "Figure " counter(figure) " ";
}
</style>
<style>/* Boilerplate: style-issues */
a[href].issue-return {
    float: right;
    float: inline-end;
    color: var(--issueheading-text);
    font-weight: bold;
    text-decoration: none;
}
</style>
<style>/* Boilerplate: style-md-lists */
/* This is a weird hack for me not yet following the commonmark spec
   regarding paragraph and lists. */
[data-md] > :first-child {
    margin-top: 0;
}
[data-md] > :last-child {
    margin-bottom: 0;
}
</style>
<style>/* Boilerplate: style-selflinks */

:root {
    --selflink-text: white;
    --selflink-bg: gray;
    --selflink-hover-text: black;
}
.heading, .issue, .note, .example, li, dt {
    position: relative;
}
a.self-link {
    position: absolute;
    top: 0;
    left: calc(-1 * (3.5rem - 26px));
    width: calc(3.5rem - 26px);
    height: 2em;
    text-align: center;
    border: none;
    transition: opacity .2s;
    opacity: .5;
}
a.self-link:hover {
    opacity: 1;
}
.heading > a.self-link {
    font-size: 83%;
}
.example > a.self-link,
.note > a.self-link,
.issue > a.self-link {
    /* These blocks are overflow:auto, so positioning outside
       doesn't work. */
    left: auto;
    right: 0;
}
li > a.self-link {
    left: calc(-1 * (3.5rem - 26px) - 2em);
}
dfn > a.self-link {
    top: auto;
    left: auto;
    opacity: 0;
    width: 1.5em;
    height: 1.5em;
    background: var(--selflink-bg);
    color: var(--selflink-text);
    font-style: normal;
    transition: opacity .2s, background-color .2s, color .2s;
}
dfn:hover > a.self-link {
    opacity: 1;
}
dfn > a.self-link:hover {
    color: var(--selflink-hover-text);
}

a.self-link::before            { content: "¶"; }
.heading > a.self-link::before { content: "§"; }
dfn > a.self-link::before      { content: "#"; }
</style>
<style>/* Boilerplate: style-syntax-highlighting */

code.highlight { padding: .1em; border-radius: .3em; }
pre.highlight, pre > code.highlight { display: block; padding: 1em; margin: .5em 0; overflow: auto; border-radius: 0; }

.highlight:not(.idl) { background: rgba(0, 0, 0, .03); }
c-[a] { color: #990055 } /* Keyword.Declaration */
c-[b] { color: #990055 } /* Keyword.Type */
c-[c] { color: #708090 } /* Comment */
c-[d] { color: #708090 } /* Comment.Multiline */
c-[e] { color: #0077aa } /* Name.Attribute */
c-[f] { color: #669900 } /* Name.Tag */
c-[g] { color: #222222 } /* Name.Variable */
c-[k] { color: #990055 } /* Keyword */
c-[l] { color: #000000 } /* Literal */
c-[m] { color: #000000 } /* Literal.Number */
c-[n] { color: #0077aa } /* Name */
c-[o] { color: #999999 } /* Operator */
c-[p] { color: #999999 } /* Punctuation */
c-[s] { color: #a67f59 } /* Literal.String */
c-[t] { color: #a67f59 } /* Literal.String.Single */
c-[u] { color: #a67f59 } /* Literal.String.Double */
c-[cp] { color: #708090 } /* Comment.Preproc */
c-[c1] { color: #708090 } /* Comment.Single */
c-[cs] { color: #708090 } /* Comment.Special */
c-[kc] { color: #990055 } /* Keyword.Constant */
c-[kn] { color: #990055 } /* Keyword.Namespace */
c-[kp] { color: #990055 } /* Keyword.Pseudo */
c-[kr] { color: #990055 } /* Keyword.Reserved */
c-[ld] { color: #000000 } /* Literal.Date */
c-[nc] { color: #0077aa } /* Name.Class */
c-[no] { color: #0077aa } /* Name.Constant */
c-[nd] { color: #0077aa } /* Name.Decorator */
c-[ni] { color: #0077aa } /* Name.Entity */
c-[ne] { color: #0077aa } /* Name.Exception */
c-[nf] { color: #0077aa } /* Name.Function */
c-[nl] { color: #0077aa } /* Name.Label */
c-[nn] { color: #0077aa } /* Name.Namespace */
c-[py] { color: #0077aa } /* Name.Property */
c-[ow] { color: #999999 } /* Operator.Word */
c-[mb] { color: #000000 } /* Literal.Number.Bin */
c-[mf] { color: #000000 } /* Literal.Number.Float */
c-[mh] { color: #000000 } /* Literal.Number.Hex */
c-[mi] { color: #000000 } /* Literal.Number.Integer */
c-[mo] { color: #000000 } /* Literal.Number.Oct */
c-[sb] { color: #a67f59 } /* Literal.String.Backtick */
c-[sc] { color: #a67f59 } /* Literal.String.Char */
c-[sd] { color: #a67f59 } /* Literal.String.Doc */
c-[se] { color: #a67f59 } /* Literal.String.Escape */
c-[sh] { color: #a67f59 } /* Literal.String.Heredoc */
c-[si] { color: #a67f59 } /* Literal.String.Interpol */
c-[sx] { color: #a67f59 } /* Literal.String.Other */
c-[sr] { color: #a67f59 } /* Literal.String.Regex */
c-[ss] { color: #a67f59 } /* Literal.String.Symbol */
c-[vc] { color: #0077aa } /* Name.Variable.Class */
c-[vg] { color: #0077aa } /* Name.Variable.Global */
c-[vi] { color: #0077aa } /* Name.Variable.Instance */
c-[il] { color: #000000 } /* Literal.Number.Integer.Long */


@media (prefers-color-scheme: dark) {
    .highlight:not(.idl) { background: rgba(255, 255, 255, .05); }

    c-[a] { color: #d33682 } /* Keyword.Declaration */
    c-[b] { color: #d33682 } /* Keyword.Type */
    c-[c] { color: #2aa198 } /* Comment */
    c-[d] { color: #2aa198 } /* Comment.Multiline */
    c-[e] { color: #268bd2 } /* Name.Attribute */
    c-[f] { color: #b58900 } /* Name.Tag */
    c-[g] { color: #cb4b16 } /* Name.Variable */
    c-[k] { color: #d33682 } /* Keyword */
    c-[l] { color: #657b83 } /* Literal */
    c-[m] { color: #657b83 } /* Literal.Number */
    c-[n] { color: #268bd2 } /* Name */
    c-[o] { color: #657b83 } /* Operator */
    c-[p] { color: #657b83 } /* Punctuation */
    c-[s] { color: #6c71c4 } /* Literal.String */
    c-[t] { color: #6c71c4 } /* Literal.String.Single */
    c-[u] { color: #6c71c4 } /* Literal.String.Double */
    c-[ch] { color: #2aa198 } /* Comment.Hashbang */
    c-[cp] { color: #2aa198 } /* Comment.Preproc */
    c-[cpf] { color: #2aa198 } /* Comment.PreprocFile */
    c-[c1] { color: #2aa198 } /* Comment.Single */
    c-[cs] { color: #2aa198 } /* Comment.Special */
    c-[kc] { color: #d33682 } /* Keyword.Constant */
    c-[kn] { color: #d33682 } /* Keyword.Namespace */
    c-[kp] { color: #d33682 } /* Keyword.Pseudo */
    c-[kr] { color: #d33682 } /* Keyword.Reserved */
    c-[ld] { color: #657b83 } /* Literal.Date */
    c-[nc] { color: #268bd2 } /* Name.Class */
    c-[no] { color: #268bd2 } /* Name.Constant */
    c-[nd] { color: #268bd2 } /* Name.Decorator */
    c-[ni] { color: #268bd2 } /* Name.Entity */
    c-[ne] { color: #268bd2 } /* Name.Exception */
    c-[nf] { color: #268bd2 } /* Name.Function */
    c-[nl] { color: #268bd2 } /* Name.Label */
    c-[nn] { color: #268bd2 } /* Name.Namespace */
    c-[py] { color: #268bd2 } /* Name.Property */
    c-[ow] { color: #657b83 } /* Operator.Word */
    c-[mb] { color: #657b83 } /* Literal.Number.Bin */
    c-[mf] { color: #657b83 } /* Literal.Number.Float */
    c-[mh] { color: #657b83 } /* Literal.Number.Hex */
    c-[mi] { color: #657b83 } /* Literal.Number.Integer */
    c-[mo] { color: #657b83 } /* Literal.Number.Oct */
    c-[sa] { color: #6c71c4 } /* Literal.String.Affix */
    c-[sb] { color: #6c71c4 } /* Literal.String.Backtick */
    c-[sc] { color: #6c71c4 } /* Literal.String.Char */
    c-[dl] { color: #6c71c4 } /* Literal.String.Delimiter */
    c-[sd] { color: #6c71c4 } /* Literal.String.Doc */
    c-[se] { color: #6c71c4 } /* Literal.String.Escape */
    c-[sh] { color: #6c71c4 } /* Literal.String.Heredoc */
    c-[si] { color: #6c71c4 } /* Literal.String.Interpol */
    c-[sx] { color: #6c71c4 } /* Literal.String.Other */
    c-[sr] { color: #6c71c4 } /* Literal.String.Regex */
    c-[ss] { color: #6c71c4 } /* Literal.String.Symbol */
    c-[fm] { color: #268bd2 } /* Name.Function.Magic */
    c-[vc] { color: #cb4b16 } /* Name.Variable.Class */
    c-[vg] { color: #cb4b16 } /* Name.Variable.Global */
    c-[vi] { color: #cb4b16 } /* Name.Variable.Instance */
    c-[vm] { color: #cb4b16 } /* Name.Variable.Magic */
    c-[il] { color: #657b83 } /* Literal.Number.Integer.Long */
}
</style>
 <body class="h-entry">
  <div class="head">
   <p data-fill-with="logo"></p>
   <h1 class="p-name no-ref" id="title">P3715R0<br>Tightening floating-point semantics for C++</h1>
   <h2 class="no-num no-toc no-ref heading settled" id="profile-and-date"><span class="content">Draft Proposal, <time class="dt-updated" datetime="2025-04-18">2025-04-18</time></span></h2>
   <div data-fill-with="spec-metadata">
    <dl>
     <dt>This version:
     <dd><a class="u-url" href="http://wg21.link/p3715r0">http://wg21.link/p3715r0</a>
     <dt class="editor">Author:
     <dd class="editor p-author h-card vcard"><a class="p-name fn u-email email" href="mailto:joshua.cranmer@intel.com">Joshua Cranmer</a>
     <dt>Audience:
     <dd>SG6, SG22, EWG
     <dt>Project:
     <dd>ISO/IEC 14882 Programming Languages — C++, ISO/IEC JTC1/SC22/WG21
    </dl>
   </div>
   <div data-fill-with="warning"></div>
   <hr title="Separator for header">
  </div>
  <div class="p-summary" data-fill-with="abstract">
   <h2 class="no-num no-toc no-ref heading settled" id="abstract"><span class="content">Abstract</span></h2>
   <p>Floating-point semantics are hard, and C++ is almost entirely silent on the matter at present. This is a paper to start the process of fixing that silence, by beginning with a comprehensive overview of the current situation in order to take a holistic approach instead of a piecemeal approach for floating-point semantics.</p>
  </div>
  <nav data-fill-with="table-of-contents" id="toc">
   <h2 class="no-num no-toc no-ref" id="contents">Table of Contents</h2>
   <ol class="toc" role="directory">
    <li><a href="#history"><span class="secno">1</span> <span class="content">Revision history</span></a>
    <li><a href="#introduction"><span class="secno">2</span> <span class="content">Introduction</span></a>
    <li>
     <a href="#background"><span class="secno">3</span> <span class="content">Background</span></a>
     <ol class="toc">
      <li>
       <a href="#ieee-754"><span class="secno">3.1</span> <span class="content">IEEE 754</span></a>
       <ol class="toc">
        <li><a href="#not-ieee754"><span class="secno">3.1.1</span> <span class="content">Non-IEEE 754 types</span></a>
       </ol>
      <li><a href="#hardware"><span class="secno">3.2</span> <span class="content">Hardware implementation</span></a>
      <li><a href="#middle-end"><span class="secno">3.3</span> <span class="content">Compiler implementation</span></a>
      <li><a href="#front-end"><span class="secno">3.4</span> <span class="content">Programming language semantics</span></a>
      <li><a href="#ieee-754-lang"><span class="secno">3.5</span> <span class="content">IEEE 754 rules for language specifications</span></a>
     </ol>
    <li><a href="#xlang-fp"><span class="secno">4</span> <span class="content">Comparison of language standards</span></a>
    <li>
     <a href="#motivation"><span class="secno">5</span> <span class="content">Motivation</span></a>
     <ol class="toc">
      <li>
       <a href="#implementer"><span class="secno">5.1</span> <span class="content">Implementers' perspective</span></a>
       <ol class="toc">
        <li><a href="#FLT_EVAL_METHOD"><span class="secno">5.1.1</span> <span class="content">Excess precision</span></a>
        <li><a href="#denormal"><span class="secno">5.1.2</span> <span class="content">Denormal flushing</span></a>
        <li><a href="#reassoc"><span class="secno">5.1.3</span> <span class="content">Associativity and vectorization</span></a>
        <li><a href="#fma"><span class="secno">5.1.4</span> <span class="content">FMA contraction</span></a>
        <li><a href="#fast-math"><span class="secno">5.1.5</span> <span class="content">Fast-math</span></a>
        <li><a href="#constexpr"><span class="secno">5.1.6</span> <span class="content">Constant expressions</span></a>
        <li><a href="#type-traits"><span class="secno">5.1.7</span> <span class="content">Type traits</span></a>
       </ol>
      <li>
       <a href="#user"><span class="secno">5.2</span> <span class="content">Users' perspective</span></a>
       <ol class="toc">
        <li><a href="#reproducible"><span class="secno">5.2.1</span> <span class="content">Reproducible results</span></a>
        <li><a href="#round"><span class="secno">5.2.2</span> <span class="content">Rounding mode</span></a>
        <li><a href="#fenv"><span class="secno">5.2.3</span> <span class="content">Environment access</span></a>
       </ol>
     </ol>
    <li>
     <a href="#solutions"><span class="secno">6</span> <span class="content">Solution space</span></a>
     <ol class="toc">
      <li><a href="#rien"><span class="secno">6.1</span> <span class="content">Do nothing</span></a>
      <li><a href="#ub"><span class="secno">6.2</span> <span class="content">Unspecified behavior</span></a>
      <li><a href="#uber-alles"><span class="secno">6.3</span> <span class="content">Demand strict conformance</span></a>
      <li><a href="#please-pick-me"><span class="secno">6.4</span> <span class="content">Pragmas</span></a>
      <li><a href="#a-bad-idea"><span class="secno">6.5</span> <span class="content">Attributes</span></a>
      <li><a href="#a-worse-idea"><span class="secno">6.6</span> <span class="content">Fundamental types</span></a>
      <li><a href="#please-dont-pick-me"><span class="secno">6.7</span> <span class="content">Type wrappers</span></a>
      <li><a href="#viva-la-resistance"><span class="secno">6.8</span> <span class="content">Free functions</span></a>
      <li><a href="#hand-of-merlin"><span class="secno">6.9</span> <span class="content">Special lambdas</span></a>
     </ol>
    <li>
     <a href="#proposal"><span class="secno">7</span> <span class="content">Proposal</span></a>
     <ol class="toc">
      <li><a href="#floating-point-formats"><span class="secno">7.1</span> <span class="content">Floating-point formats</span></a>
      <li><a href="#excess-precision"><span class="secno">7.2</span> <span class="content">Excess precision</span></a>
      <li><a href="#denormal-flushing"><span class="secno">7.3</span> <span class="content">Denormal flushing</span></a>
      <li><a href="#fast-math①"><span class="secno">7.4</span> <span class="content">Fast-math</span></a>
      <li><a href="#associativity"><span class="secno">7.5</span> <span class="content">Associativity</span></a>
      <li><a href="#fma-contraction"><span class="secno">7.6</span> <span class="content">FMA contraction</span></a>
      <li><a href="#constant-expression-rules"><span class="secno">7.7</span> <span class="content">Constant expression rules</span></a>
      <li><a href="#type-traits①"><span class="secno">7.8</span> <span class="content">Type traits</span></a>
      <li><a href="#rounding-mode"><span class="secno">7.9</span> <span class="content">Rounding mode</span></a>
      <li><a href="#reproducible-results"><span class="secno">7.10</span> <span class="content">Reproducible results</span></a>
      <li><a href="#environment-access"><span class="secno">7.11</span> <span class="content">Environment access</span></a>
     </ol>
    <li><a href="#questions"><span class="secno">8</span> <span class="content">Questions</span></a>
   </ol>
  </nav>
  <main>
   <h2 class="heading settled" data-level="1" id="history"><span class="secno">1. </span><span class="content">Revision history</span><a class="self-link" href="#history"></a></h2>
   <p>R0: first version</p>
   <h2 class="heading settled" data-level="2" id="introduction"><span class="secno">2. </span><span class="content">Introduction</span><a class="self-link" href="#introduction"></a></h2>
   <p>The C++ standard, at present, says extremely little about floating-point
semantics. In recent meetings, however, there has been a few tracks of papers
that are trying to clarify the behavior of floating-point, starting with
P2746 on proposing to abandon the current C functions for using the rounding
mode and replacing it with a new facility for handling rounding. This has been
followed with P3375, P3479, P3488, and P3565 on various aspects of
floating-point.</p>
   <p>In the course of discussing these papers, the committee has signalled an intent
to firm up the specification of floating-point semantics. However, many of the
issues of floating-point are somewhat related to one another, and without an
understanding of all of these issues, the risk is that the committee can advance
a design for one problem which forecloses better solutions for another problem.</p>
   <p>Thus the first goal of this paper is to provide a comprehensive look at
floating-point, to provide sufficient understanding to evaluate current and
future proposals. It covers not just what the dominant specification for
floating-point, IEEE 754, says, but also what it <em>doesn’t</em> say. It also covers
the existing landscape of hardware and compilers do and don’t do with regards
to floating-point, including the switches all C++ compilers today provide to
let users choose among varying floating-point semantics. Then it covers the
choices made (or not made) by other language specifications, as well as covering
the specific scenarios that need more specification in the current standard.</p>
   <p>At this version of the paper, a full proposal for fixing the semantics is not
yet provided. Instead, there is an exploration of the solution space for various
aspects of floating-point semantics, and the author’s personal preference for
the which solutions work best in various scenarios. Individual scenarios can be
progressed in future versions of this paper in omnibus paper, or split out into
separate papers (some of which are already advancing independently).</p>
   <h2 class="heading settled" data-level="3" id="background"><span class="secno">3. </span><span class="content">Background</span><a class="self-link" href="#background"></a></h2>
   <p>To understand the specific problems with the current floating-point semantics
in C++, one needs to have some background information on what even constitutes
the semantics of floating-point, and in particular, what the differences are
between the different options.</p>
   <h3 class="heading settled" data-level="3.1" id="ieee-754"><span class="secno">3.1. </span><span class="content">IEEE 754</span><a class="self-link" href="#ieee-754"></a></h3>
   <p>For most programmers, their first introduction to floating-point will be via
IEEE 754: if a programming course touches on floating-point, it will generally
introduce it by explaining as if all floating-point were IEEE 754. At the same
time, it does need to be understand that while almost all modern hardware
implements IEEE 754-ish semantics: there is often subtle variance from the exact
IEEE 754 semantics. The <em>chief</em> problem of floating-point semantics is in fact
in the need to pin down what the deviations from "standard" IEEE 754 are.</p>
   <p>IEEE 754, also known as ISO/IEC 60559, is the main base specification of
floating-point. While it defines both binary and decimal floating-point in the
same document, since C++ does not (and is not actively looking to) support
decimal floating-point, only the parts that are relevant to binary
floating-point are discussed here. The main things that are provided:</p>
   <ul>
    <li data-md>
     <p>A general specification of floating-point formats, based on the radix (fixed
    to 2 for binary floating-point), the maximum exponent and the number of
    digits in the significand. Note that the minimum exponent and exponent
    range are inferred from the maximum exponent, as <code class="highlight"><c- n>emin</c-> <c- o>=</c-> <c- mi>1</c-> <c- o>-</c-> <c- n>emax</c-></code> is
    required by the specification.</p>
    <li data-md>
     <p>The set of valid values for each floating-point format. This includes as
    special values <code class="highlight"><c- o>+</c-><c- mf>0.0</c-></code>, <code class="highlight"><c- mf>-0.0</c-></code>, a set of subnormal (sometimes called
    denormal) values where the leading bit is <code class="highlight"><c- mi>0</c-></code> instead of <code class="highlight"><c- mi>1</c-></code>, positive
    and negative infinities, qNaN (quiet not-a-number), sNaN (signaling
    not-a-number). The qNaN and sNaN may have multiple distinct
    representations, as they contain observable payload and sign bits, but
    which of those representations is chosen is generally left unspecified.</p>
    <li data-md>
     <p>The encoding of these floating-point formats into binary. This is generally
    the most well-known part of the specification, and "supports IEEE 754"
    is often colloquially used to mean "values are represented using
    IEEE 754 encoding" instead of "follows IEEE 754 specification rules
    precisely."</p>
    <li data-md>
     <p>Specific formats are given for 16-bit, 32-bit, 64-bit, and 128-bit
    floating-point formats, called binary16, binary32, binary64, and
    binary128 respectively, or "(binary) interchange formats" collectively.</p>
    <li data-md>
     <p>The concept of rounding modes, which are rules on how to convert the
    infinitely-precise result of an operation to the finite set of values
    allowed by a given format.</p>
    <li data-md>
     <p>The concept of exceptions. Unlike C++ exceptions, an IEEE 754 operation that
    raises an exception also returns a value at the same time, and
    exceptions need not interrupt execution. Indeed, the default behavior of
    floating-point exceptions is to set a flag and carry on execution.</p>
    <li data-md>
     <p>The concept of attributes, which are properties of blocks of code that change
    the behavior of floating-point operations statically contained in that
    block. In C, these attributes are mapped to pragmas, for example, <code class="highlight"><c- cp>#pragma STDC FP_CONTRACT</c-></code> controls whether or not an expression <code class="highlight"><c- p>(</c-><c- n>a</c-> <c- o>*</c-> <c- n>b</c-><c- p>)</c-> <c- o>+</c-> <c- n>c</c-></code> may be converted into <code class="highlight"><c- n>fma</c-><c- p>(</c-><c- n>a</c-><c- p>,</c-> <c- n>b</c-><c- p>,</c-> <c- n>c</c-><c- p>)</c-></code>.</p>
    <li data-md>
     <p>A set of core operations and their behavior, especially with regards to
    special cases. The regular non-function operators, including arithmetic
    operators like <code class="highlight"><c- o>+</c-></code> or <code class="highlight"><c- o>-</c-></code> and cast operators both to integers and other
    floating-point types, are included in this set. Also included are <code class="highlight"><c- n>sqrt</c-></code> and <code class="highlight"><c- n>fma</c-></code>.</p>
    <li data-md>
     <p>A partial ordering of floating-point values is provided, and two sets of
    comparison operations are specified, one which raises exceptions on NaN
    inputs and one which does not. A recommendation on which version a
    source-level operator like <code class="highlight"><c- o>&lt;=</c-></code> should map to (although none is provided
    for a dedicated partial order operator like <code class="highlight"><c- o>&lt;=></c-></code>).</p>
    <li data-md>
     <p>A set of recommended extra operations, which most of the functions in <code class="highlight"><c- o>&lt;</c-><c- n>cmath</c-><c- o>></c-></code> correspond to. A side note is that IEEE 754 requires these
    functions be correctly rounded, but C does not, and most implementations
    do not correctly round them.</p>
    <li data-md>
     <p>A chapter on how programming languages must/should map source-level
    expressions to the underlying operations, which will be covered in
    detail in a later section of this paper.</p>
    <li data-md>
     <p>A chapter on what users need to do to expect reproducible results across
    diverse implementations.</p>
   </ul>
   <p>In brief summary, IEEE 754 can be seen as providing reasonably well-defined
semantics for the behavior of something like this:</p>
<pre class="language-c++ highlight"><c- k>enum</c-> <c- n>class</c-> <c- n>rnd_mode_t</c-><c- p>;</c-> <c- c1>// The rounding mode to use</c->
<c- k>struct</c-> <c- nc>fp_except_t</c-><c- p>;</c-> <c- c1>// A bitset of individual exceptions</c->

<c- c1>// A pure operation, templated over an IEEE 754 format type.</c->
<c- n>template</c-> <c- o>&lt;</c-><c- n>typename</c-> <c- n>T</c-><c- o>></c->
<c- n>std</c-><c- o>::</c-><c- n>pair</c-><c- o>&lt;</c-><c- n>T</c-><c- p>,</c-> <c- n>fp_except_t</c-><c- o>></c-> <c- n>ieee754_op</c-><c- p>(</c-><c- n>T</c-> <c- n>lhs</c-><c- p>,</c-> <c- n>T</c-> <c- n>rhs</c-><c- p>,</c-> <c- n>rnd_mode_t</c-> <c- n>rm</c-><c- p>);</c->
</pre>
   <p>There is a small amount of nondeterminism in this definition, for example, the
payload of a NaN result is explicitly not constrained by the standard, and it
does vary among hardware implementations. However, this nondeterminism can
generally be ignored in practice, and it is probably not worth worrying about
for a language specification.</p>
   <h4 class="heading settled" data-level="3.1.1" id="not-ieee754"><span class="secno">3.1.1. </span><span class="content">Non-IEEE 754 types</span><a class="self-link" href="#not-ieee754"></a></h4>
   <p>C++ compilers already support floating-point types that are not the IEEE 754
interchange formats, and so the standard does need to worry about such support.
Many of these types are already IEEE 754-ish, and while they do differ from
IEEE 754 semantics in sometimes dramatic ways, it is still generally safe to
view them for the purposes of this paper as having something akin to the <code class="highlight"><c- n>ieee754_op</c-></code> templates mentioned above.</p>
   <p><code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>bfloat16_t</c-></code>, while not an interchange format, is fully specifiable using
the generic binary floating-point type rules for IEEE 754 (at least up until
normal hardware variance), just using a different mix of exponent and
significand bits. Indeed, it is likely that the next revision of IEEE 754 will
incorporate this type in its list of floating-point types.</p>
   <p>A more difficult IEEE 754-ish type is the 80-bit x87 FPU type, here referred
to by its LLVM IR name <code class="highlight"><c- n>x86_fp80</c-></code> to distinguish it from other types used as <code class="highlight"><c- b>long</c-> <c- b>double</c-></code>. Though it contains 80 bits, it can be viewed as a 79-bit IEEE
754 type, with an extra bit whose value is forced by the other 79 bits. If that
bit is set incorrectly, the result is essentially a noncanonical value, a
concept which IEEE 754 provides, but is not relevant for any other type already
mentioned. Noncanonical values are alternative representations of a value which
are never produced by any operation (except as the result of sign bit
operations, which are guaranteed to only touch the sign bit) and should not
arise except from bit-casting of an integer type to a floating-point type.</p>
   <p>Beyond these two types, that support all of the features of the IEEE 754
standard but aren’t directly specified by the standard, there also exist several
types that do not fully adhere to the standard.</p>
   <p>There have been several proposals for 8-bit floating-point types along the IEEE
754 encoding rules, but several of these make deviations to reduce the number of
representations of special values, and may combine or even outright eliminate
the notions of NaN values and infinities.</p>
   <p>Some pre-IEEE 754 architectures are still supported by modern compilers, and
they may have floating-point types which similarly lack the special value
categories of infinity, NaN, or subnormals. Examples of such types include the
IBM hexadecimal floating-point types and the VAX floating-point types.</p>
   <p>The <code class="highlight"><c- b>long</c-> <c- b>double</c-></code> type on Power and PowerPC (<code class="highlight"><c- n>ppc_fp128</c-></code>, to use LLVM’s name for
it), also known as double-double, is a more radically different floating-point
type, consisting of a pair of <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>float64_t</c-></code> values, the second of which is
smaller than the first. Unlike all of the aforementioned types, this type is
hard to describe via a sign-and-exponent-range-and-fixed-number-of-significand
bits model, as the number of significand bits can change dramatically
(consider the pair <code class="highlight"><c- p>{</c-><c- n>DBL_MAX</c-><c- p>,</c-> <c- n>DBL_MIN</c-><c- p>}</c-></code>, which would have a very large number
of implied 0 bits in the middle of its significand).</p>
   <p>Despite the diversity in the formats of non-IEEE 754 types, the concept of a
well-defined, pure function implementing their fundamental operations that is
templated on the type remains sound, and many of them even retain structures
that correspond to the rounding mode and floating-point exception elements in
the function signature. The diversity primarily impacts the definition of type
traits in <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>numeric_limits</c-></code>, and in the behavior of special cases, neither
of which are being changed in this paper, and so we will consider the interface
of <code class="highlight"><c- n>ieee754_op</c-></code> to be sufficient for these cases, even if the implementation is
very different.</p>
   <h3 class="heading settled" data-level="3.2" id="hardware"><span class="secno">3.2. </span><span class="content">Hardware implementation</span><a class="self-link" href="#hardware"></a></h3>
   <p>While most hardware is based on IEEE 754, very rarely is access to the pure <code class="highlight"><c- n>ieee754_op</c-></code> interface described available to a compiler, essentially being
limited to software floating-point implementations. Instead, most hardware
chooses to define their core instructions by means of some sort of
floating-point control/status register (FPCSR):</p>
<pre class="language-c++ highlight"><c- k>struct</c-> <c- nc>fp_env_t</c-> <c- p>{</c->
  <c- c1>// Contains several bits, these are some of the more common ones</c->
  <c- n>rnd_mode_t</c-> <c- n>rm</c-><c- p>;</c->
  <c- n>fp_except_t</c-> <c- n>flags</c-><c- p>;</c->
  <c- n>fp_except_t</c-> <c- n>except_mask</c-><c- p>;</c->
  <c- b>bool</c-> <c- n>flush_denormals</c-><c- p>;</c->
<c- p>};</c->

<c- c1>// Global register</c->
<c- k>thread_local</c-> <c- n>fp_env_t</c-> <c- n>fpcsr</c-><c- p>;</c->

<c- c1>// Parameters/return value correspond to FP registers</c->
<c- c1>// Will call ieee754_op&lt;T>, but will do other things based on fpcsr</c->
<c- n>template</c-><c- o>&lt;</c-><c- n>typename</c-> <c- n>T</c-><c- o>></c-> <c- n>T</c-> <c- n>hardware_op</c-><c- p>(</c-><c- n>T</c-> <c- n>lhs</c-><c- p>,</c-> <c- n>T</c-> <c- n>rhs</c-><c- p>);</c->
</pre>
   <p>Most commonly, there is the dynamic rounding mode in the FPCSR, which gets used
as the rounding mode parameter of <code class="highlight"><c- n>ieee754_op</c-></code>. Generally, exceptions will
either cause a hardware trap, if it is instructed to do so by one of the bits in
the FPCSR, or it will instead set a bit that can be read later (in C/C++, the
appropriate test method is <code class="highlight"><c- n>fetestexcept</c-></code>).</p>
   <p>Additionally, there are usually other bits that control the actual semantics of
the underlying operation. The most common of these is denormal flushing, but the
precise behavior of denormal flushing varies between architectures. Some
architectures even implement <code class="highlight"><c- n>hardware_op</c-></code> in such a way that denormal flushing
happens unconditionally, with no way to get the correct IEEE 754 behavior on
denormals. Sometimes, the bits get more exotic: x87 has bits to control the
effective precision, for example.</p>
   <p>Some architectures provide the ability to specify the rounding mode statically
on the instruction (i.e., pulling it from what is in effect a template
parameter rather than the current value in the FPCSR), but this is by no means
universal.</p>
   <p>Some architectures, especially in the accelerator space, choose to just drop the
concept of an FPCSR entirely, providing no means to maintain a dynamic rounding
mode (or even a static rounding mode at times), or to observe floating-point
exceptions.</p>
   <p>Finally, it is worth noting that some hardware will have multiple FPU
implementations, with the capabilities of those units diverging quite wildly,
and sometimes using entirely different registers as their effective FPCSR. For
example, x86 processors have both an x87 FPU (with relatively unusual semantics)
and an SSE execution unit, which works more like typical FPUs. Since these units
tend not to support the same sets of types (especially when SIMD vector types
are accounted for), that means that the hardware capabilities can be at least
partially type-dependent.</p>
   <h3 class="heading settled" data-level="3.3" id="middle-end"><span class="secno">3.3. </span><span class="content">Compiler implementation</span><a class="self-link" href="#middle-end"></a></h3>
   <p>Because of the need for optimizations on floating-point code, the internal
representation of a compiler contains its own layer of semantics which is fairly
independent of both hardware and language specifications. Indeed, to support the
configurability of semantics via command-line flags, pragmas, or other such
mechanisms, there is actually typically a very large number of variants for the
floating-point semantics.</p>
   <p>At a very high level, the compiler representation of floating-point semantics
tends to fall into three main buckets of floating-point model. The first model
is the one that demands a complete adherence to IEEE 754 or hardware semantics,
including modelling the interaction with the floating-point environment fully
and correctly: this is the <code class="highlight"><c- n>strict</c-></code> model. The second model requires strict
adherence only for the values being produced, and presumes that the
floating-point environment is left untouched in its default state and no one is
going to attempt to read the flags: this is the <code class="highlight"><c- n>precise</c-></code> model. The final
model goes further and allows the results of operations to vary in precision
somewhat, and this is some kind of <code class="highlight"><c- n>fast</c-></code> model.</p>
   <p>Note that these models are buckets of actual semantics; in practice, the knobs
of control within the compiler internally, made accessible via flags and other
user-visible tools, can be tuned much more finely. There’s a full combinatorial
explosion of possibilities here.</p>
   <p>For example, within the LLVM optimizer of the Clang compiler, there are the
following flags that can be attached to a regular floating-point operator that
can all be <em>independently</em> applied:</p>
   <ul>
    <li data-md>
     <p>7 fast-math flags per instruction</p>
     <ul>
      <li data-md>
       <p><code class="highlight"><c- n>nnan</c-></code>, which makes the use of NaN values in the operation undefined behavior</p>
      <li data-md>
       <p><code class="highlight"><c- n>ninf</c-></code>, which makes the use of infinities in the operation undefined behavior</p>
      <li data-md>
       <p><code class="highlight"><c- n>nsz</c-></code>, which allows <code class="highlight"><c- mf>-0.0</c-></code> to be represented as <code class="highlight"><c- mf>0.0</c-></code></p>
      <li data-md>
       <p><code class="highlight"><c- n>arcp</c-></code>, which allows <code class="highlight"><c- n>a</c-> <c- o>/</c-> <c- n>b</c-></code> to be converted to <code class="highlight"><c- n>a</c-> <c- o>*</c-> <c- p>(</c-><c- mf>1.0</c-> <c- o>/</c-> <c- n>b</c-><c- p>)</c-></code></p>
      <li data-md>
       <p><code class="highlight"><c- n>reassoc</c-></code>, which allows (among other things) reassociability to be inferred</p>
      <li data-md>
       <p><code class="highlight"><c- n>contract</c-></code>, which allows FMA contractions to take place</p>
      <li data-md>
       <p><code class="highlight"><c- n>afn</c-></code>, which allows some lower-precision approximations to be used</p>
     </ul>
    <li data-md>
     <p>11 function attributes</p>
     <ul>
      <li data-md>
       <p><code class="highlight"><c- n>strictfp</c-></code>, which prohibits speculation or reordering of floating-point
  operations</p>
      <li data-md>
       <p><code class="highlight"><c- n>denormal</c-><c- o>-</c-><c- n>fp</c-><c- o>-</c-><c- n>math</c-></code>, which controls assumption of denormal flushing behavior</p>
      <li data-md>
       <p><code class="highlight"><c- n>denormal</c-><c- o>-</c-><c- n>fp</c-><c- o>-</c-><c- n>math</c-><c- o>-</c-><c- n>f32</c-></code>, similar to above, but allows a different denormal
  flushing behavior to be chosen only for <code class="highlight"><c- b>float</c-></code> values</p>
      <li data-md>
       <p><code class="highlight"><c- n>reciprocal</c-><c- o>-</c-><c- n>estimates</c-></code>, which controls the number of refinement steps needed
  for an approximation of division or square root computations</p>
      <li data-md>
       <p>A function-level version of each fast-math flag</p>
     </ul>
    <li data-md>
     <p>Optional instruction metadata indicating accuracy of result in ULPs.</p>
    <li data-md>
     <p>Variants of instructions</p>
     <ul>
      <li data-md>
       <p>Constrained intrinsics of regular operations, with one extra parameter
  indicating assumed rounding-mode behavior, and another extra parameter
  indication whether or not floating-point exceptions may cause traps</p>
     </ul>
     <ul>
      <li data-md>
       <p>Intrinsic versions of math functions, which must not set <code class="highlight"><c- n>errno</c-></code></p>
      <li data-md>
       <p>Special intrinsic that can be expanded either to a single three-argument
      fma instruction or a pair of multiply and add instructions</p>
     </ul>
   </ul>
   <p>Even so, this list is known to be missing variants that are necessary. It is
likely that LLVM and Clang will add yet more fast-math flags in the future. The
existing repertoire is deficient in supporting static rounding mode
instructions, as well as supporting low-precision approximations (which are
especially useful on offloaded code). Furthermore, several features may be
removed entirely: the constrained intrinsics and <code class="highlight"><c- n>strictfp</c-></code> operation are likely
to be replaced in the near future, and the current handling of denormal
flushing is problematic.</p>
   <p>In general, any fixed list of relevant properties for optimization should be
avoided: they are likely to change, both in additions and removals of parameters
that influence optimization.</p>
   <h3 class="heading settled" data-level="3.4" id="front-end"><span class="secno">3.4. </span><span class="content">Programming language semantics</span><a class="self-link" href="#front-end"></a></h3>
   <p>At the end of this list of semantics is the compiler front-end, which needs to
work out which of the variety of slightly-different shades of operations
provided by the middle-end to map a source-level <code class="highlight"><c- k>operator</c-><c- o>+</c-><c- p>(</c-><c- n>T</c-><c- p>,</c-> <c- n>T</c-><c- p>)</c-></code> to. This
choice is dictated both by the rules of the standard and the panoply of
command-line flags or other language extensions (such as pragmas) offered by the
compiler specifically to influence the choice of how to lower a language-level
floating-point operation to internal IR and ultimately the final hardware
instructions.</p>
   <p>It is also important to note that the front-end is a completely different part
of the compile from the optimizer. If the optimizer has a choice of whether or
not it may make a transformation (which is true for most of the attributes
mentioned in the previous section), the front-end is not generally capable of
knowing if it will or will not make that choice. Most importantly, this means
that the constant expression evaluation done by the front-end is done by an
entirely different process than the constant folding done by optimizations, and
it is not possible in general to guarantee that the two come to the same
decision (in particular for things like contracting expressions into FMAs). C++
today does not guarantee equivalence between constant expression evaluation and
runtime evaluation, and it is unlikely that implementations <em>could</em> make that
guarantee.</p>
   <h3 class="heading settled" data-level="3.5" id="ieee-754-lang"><span class="secno">3.5. </span><span class="content">IEEE 754 rules for language specifications</span><a class="self-link" href="#ieee-754-lang"></a></h3>
   <p>IEEE 754, as mentioned earlier, has a small section laying out how programming
language standards are to map expressions to the underlying operations. It only
directly governs the rules for the behavior of an <em>individual</em> operation; an
expression might comprise multiple operations, and most of that behavior is left
up to the language specification. C++ already fulfills the basic requirements of
defining types for intermediate values in expressions, specifying the order of
operations.</p>
   <p>The core requirements that are actually "shall" requirements relate to
assignments, requiring:</p>
   <blockquote>
    <p>implementations shall never use an assigned-to variable’s wider precursor in
place of the assigned-to variable’s stored value when evaluating subsequent
expressions</p>
   </blockquote>
   <p>Similar language is used for the parameter and return types of a function: in
all of these cases, IEEE 754 is explicitly precluding the use of extended
precision of any kind. These rules are why C specifies <code class="highlight"><c- n>FLT_EVAL_METHOD</c-></code> in the
manner that it does, and even C++ alludes to this requirement in the footnote
attached to [expr.pre]p6.</p>
   <p>Beyond these requirements are a few "should" recommendations. IEEE 754 envisions
that the behavior is governed by "attributes," which are implicit parameters to
the underlying operations. The main recommended attributes are an attribute to
control the preferred format for intermediate values of an expression, and
another attribute to control whether or not "value-safe optimizations" are
allowed. Proposing ways in C++ to define attributes is one of the main goals of
this paper.</p>
   <p>Value-safe optimizations are more commonly known to users as fast-math
optimizations. But even if value-safe optimizations are fully disabled,
bit-exact reproducibility is not guaranteed. Properties like the sign and
payload of NaN values need not be preserved by a value-safe optimization. Nor
is the number of times (so long as it is at least one) or order of
floating-point exceptions. However, things like the sign of 0, the exponent of
a decimal floating-point number, or the distinction between an sNaN and a qNaN
are not allowed to be changed by a value-safe optimization.</p>
   <h2 class="heading settled" data-level="4" id="xlang-fp"><span class="secno">4. </span><span class="content">Comparison of language standards</span><a class="self-link" href="#xlang-fp"></a></h2>
   <p>Floating-point is rarely described in detail by programming language standards,
with most of them largely being silent on the sorts of issues described in this
paper. What follows is a brief summary of the detail provided by other
languages with regards to floating-point.</p>
   <ul>
    <li data-md>
     <p>Ada allows users to define their own floating-point types with minimum
    precision and optional range parameters in lieu of providing standard
    floating-point types most languages do (although there is a default <code class="highlight"><c- n>Float</c-></code> type, which GNAT appears to map to IEEE 754 single-precision).
    The specification has two modes for floating-point, a strict mode which
    requires strict conformance to the floating-point model of the standard,
    including raising overflow errors if a computation overflows; and a
    relaxed mode which allows some of the requirements to be relaxed (in
    particular, forgoing overflow errors on floating-point overflow). The
    GNAT compiler provides two separate floating-point pragmas here, one
    which turns on the overflow check, and one which is roughly equivalent
    to C’s <code class="highlight"><c- cp>#pragma STDC CX_LIMITED_RANGE ON</c-></code>. Intermediate range checks,
    for floating-point types that declare limited range, are still required
    even in relaxed mode.</p>
    <li data-md>
     <p>C has arguably the single most detailed discussion of floating-point in its
    reference manual. However, a lot of that detail is locked behind the
    implementation-dependent Annex F, and many implementations (even when
    claiming conformance) fail to fully conform. An Annex F-conformant
    compiler is still allowed to engage in excess precision and contract
    expressions, subject to some restrictions that are poorly observed in
    practice. Denormal flushing is by inference prohibited in Annex F, but
    may be allowed outside of it, given the general latitude allowed by the
    standard. Full sNaN support is not required, even in Annex F. The
    environment requires use of a pragma to access, and whether or not an
    expression occurs at runtime or compile-time (for purposes of
    environment influence) is strictly controlled by the standard. A few
    macros are provided by the compiler/runtime to indicate the capabilities
    of the floating-point types, but these macros do not, in practice,
    reflect the behaviors when compiled with fast-math.</p>
    <li data-md>
     <p>C# requires the IEEE 754 formats for its floating-point types, but explicitly
    allows excess precision to be used. Additionally, it explicitly mentions
    that denormal support is optional, neither requiring nor forbidding it.</p>
    <li data-md>
     <p>Forth provides IEEE 754 single-precision and double-precision floating-point
    types, in addition to an implementation-defined floating-point type.
    The general rounding behavior is implementation-defined, and there is no
    further discussion of other situations discussed in this paper.</p>
    <li data-md>
     <p>Fortran behaves like C in many ways with respect to floating-point. The
    language predates IEEE 754, and so IEEE 754 support is optional. An
    intrinsic module is provided that allows querying which types are IEEE
    754-compliant, with compliance here being a set of rather loose
    properties. For example, the base <code class="highlight"><c- n>IEEE_SUPPORT_DATATYPE</c-></code> doesn’t
    require complete conformance with IEEE 754, but only that the format for
    normal numbers matches an IEEE 754 base datatype, that one of the
    rounding modes matches for regular arithmetic, and that a few particular
    IEEE 754 operations are provided. There are also queries for support for
    NaNs, infinities, and subnormal numbers, but experiments show that
    these are not affected by fast-math flags. Additionally, routines are
    provided for querying or modifying the floating-point environment,
    including rounding mode, exceptions, rather exceptions trap, and
    enabling or disabling denormal flushing; the behavior of the
    environment when the intrinsic modules are not used is akin to the way
    C supports <code class="highlight"><c- cp>#pragma STDC FENV_ACCESS OFF</c-></code>. Finally, expressions are
    allowed to be substituted for mathematically-equivalent expressions,
    except across parentheses: this allows some non-value-safe optimizations
    (such as FMA contraction) to occur. There is no direct mention of excess
    precision concerns.</p>
    <li data-md>
     <p>Go explicitly maps its types to the IEEE 754 format types. FMA contraction is
    explicitly permitted, even across statements, with only an explicit
    type conversion providing a contraction barrier. Otherwise, no mention
    is made of floating-point environment, denormal flushing, rounding mode,
    or excess precision.</p>
    <li data-md>
     <p>Java, starting from Java 17, requires strict adherence to IEEE 754 standard,
    prohibiting any optimizations that would be a value-changing
    optimization. Denormal support is explicitly required. From Java 1.2 to
    Java 16, there was a non-strict mode which allowed excess exponent range
    (but not number of mantissa bits) to be used for floating-point values;
    code could opt out of this mode by using the <code class="highlight"><c- n>strictfp</c-></code> keyword on
    methods. This nonstrict mode is not quite equivalent to the situation
    where <code class="highlight"><c- n>FLT_EVAL_METHOD</c-> <c- o>==</c-> <c- mi>2</c-></code> in C.</p>
    <li data-md>
     <p>JavaScript uses an IEEE 754 double precision type as its main numeric type and
    has an unusually descriptive abstract machine that explicitly references
    the behavior of IEEE 754 default rounding mode in its description of
    core arithmetic operations. Some functions, such as <code class="highlight"><c- n>Math</c-><c- p>.</c-><c- n>cos</c-></code>, have
    explicit license to be approximated.</p>
    <li data-md>
     <p>Julia has support for the 16-bit, 32-bit, and 64-bit IEEE 754 floating-point
    types. These are expected to generally follow IEEE 754 semantics. For
    supporting fast FMAs, it provides an operation <code class="highlight"><c- n>muladd</c-></code> which is defined
    to be the faster of FMA or multiply-and-add. Functions exist to change
    the rounding mode and denormal-flushing modes globally, even changing
    the floating-point environment of other flags. Finally, it provides a
    macro facility that enables LLVM’s fast-math flags on a per-operation
    basis.</p>
    <li data-md>
     <p>Kotlin provides IEEE 754 single-precision and double-precision floating-point
    types, but defers details of their implementation to the implementing
    platform. The main platform used for Kotlin is the JVM, so similar rules
    to Java can be inferred to apply.</p>
    <li data-md>
     <p>Lua has a float type, which can either be mapped to a <code class="highlight"><c- b>float</c-></code> or a <code class="highlight"><c- b>double</c-></code>,
    depending on how the interpreter was compiled. While not explicit in
    saying so, it can be inferred that the semantics follow the semantics of
    the C type on the host machine.</p>
    <li data-md>
     <p>MATLAB provides several different floating-point types, including a full
    arbitrary-precision type, although the default it uses is the
    IEEE 754 double-precision floating-point type. The language reference
    largely does not discuss the topics here, although as an array-based
    language with thread-based, offload, and distributed parallelism modes,
    several array language constructs tend to have explicit footnotes saying
    that their precision isn’t guaranteed. Furthermore, the reference does
    link to the blogs of some MATLAB developers who discuss some of the
    topics in this paper, with the general inference being that MATLAB is
    not going out of its way to provide reproducible results across diverse
    hardware.</p>
    <li data-md>
     <p>Pascal provides a single <code class="highlight"><c- n>real</c-></code> type to represent floating-point types,
    although several implementations do provide extensions that cover the
    major IEEE 754 formats. The result of <code class="highlight"><c- n>real</c-></code> arithmetic operations are,
    per the standard, "approximations to the corresponding mathematical
    results" whose accuracy is implementation-defined.</p>
    <li data-md>
     <p>Perl has a single floating-point type, which the documentation implies is the
    underlying <code class="highlight"><c- b>double</c-></code> type on the host architecture.</p>
    <li data-md>
     <p>PHP has a single floating-point type, which the documentation implies is the
    underlying <code class="highlight"><c- b>double</c-></code> type on the host architecture.</p>
    <li data-md>
     <p>Python uses the underlying representation of <code class="highlight"><c- b>double</c-></code> in its implementation
    (either C or Java) for floating-point types, and says explicitly that
    "you are the mercy of the underlying machine architecture" for behavior
    here.</p>
    <li data-md>
     <p>R does not explicitly mention anything with regards to IEEE 754 conformance.</p>
    <li data-md>
     <p>Ruby uses the underlying <code class="highlight"><c- b>double</c-></code> type on the host architecture.</p>
    <li data-md>
     <p>Rust does not yet officially document its precise float semantics guarantee
    yet, but a recent accepted proposal lays out the guarantees it intends
    to support. Rust wants to mandate strict IEEE 754 conformance, without
    support for floating-point environment, but it acknowledges buggy
    implementation on some platforms with regards to excess precision, sNaN
    handling, and denormal flushing. Uniquely among the languages considered
    here, Rust actually provides some guarantees on the behavior of NaN
    payloads, essentially guaranteeing that the compiler will not introduce
    certain payloads if neither the source nor the hardware do so.</p>
    <li data-md>
     <p>Swift explicitly uses IEEE 754 formats for its floating-point types, but does
    not go into any further details on the semantics of floating-point.</p>
   </ul>
   <h2 class="heading settled" data-level="5" id="motivation"><span class="secno">5. </span><span class="content">Motivation</span><a class="self-link" href="#motivation"></a></h2>
   <p>Floating-point semantics in C++ are well-known to be thoroughly underspecified.
In recent years, though, there is a resurgence of interest in bringing clarity
to the specification. The goal of this paper is to provide a comprehensive look
at what needs to be done to clarify the semantics, as partial solutions that
only tackle a subset of concerns may generalize poorly to the full problem
space.</p>
   <p>This motivation section is split into two subsections, looking at the existing
problems from two different perspectives. The first section will focus on
implementers and the varying hardware semantics and compiler models they have
to support. The second section will focus on users and on specific use cases
that they might want to achieve.</p>
   <h3 class="heading settled" data-level="5.1" id="implementer"><span class="secno">5.1. </span><span class="content">Implementers' perspective</span><a class="self-link" href="#implementer"></a></h3>
   <h4 class="heading settled" data-level="5.1.1" id="FLT_EVAL_METHOD"><span class="secno">5.1.1. </span><span class="content">Excess precision</span><a class="self-link" href="#FLT_EVAL_METHOD"></a></h4>
   <p>This is a subject that has come up recently in a few papers. Most prominent
are P3565 and P3488.</p>
   <p>The core problem that <code class="highlight"><c- n>FLT_EVAL_METHOD</c-></code> tries to solve is the x87 problem. The
x87 floating-point unit only supports internal computation with one data type:
the 80-bit floating-point type that compilers targeting it map <code class="highlight"><c- b>long</c-> <c- b>double</c-></code> to.
It lacks any arithmetic support for 32-bit and 64-bit floating-point values,
although the unit has load and store instructions for such types that convert
to/from the 80-bit type as appropriate.</p>
   <p>Unlike integer types, it is not always the case that a smaller floating-point
type can be losslessly emulated by using larger floating-point types; the
larger type needs to be sufficiently larger to avoid double rounding
(for more details, see <a href="https://hal.science/hal-01091186/document">this academic paper</a>). For the
standard IEEE 754 sequence of types (binary16, binary32, binary64, binary128),
it is the case that each type can be emulated with the next one in the sequence
without risk of double rounding. But this is not the case for the x87’s 80-bit
type: it cannot emulate IEEE 754 binary64 arithmetic without inducing double
rounding.</p>
   <p>To solve this problem, C99 added <code class="highlight"><c- n>FLT_EVAL_METHOD</c-></code>, which allows an implementer
to evaluate the temporary values within expressions in higher precision instead
of strictly sticking to the exact source types. However, at prescribed points in
the program (when assigning to a variable, use as a parameter or return value,
or using an explicit source cast), the value must be truncated to its source
type.</p>
   <p>Despite the presence of this feature, most modern compilers do not in fact
correctly implement the behavior required of <code class="highlight"><c- n>FLT_EVAL_METHOD</c-> <c- o>==</c-> <c- mi>2</c-></code>. Instead,
the compiler frontends lower the code to an IR where all the values are using
the lower-precision binary32 and binary64 values, and merely map the IR’s
implementation of <code class="highlight"><c- k>operator</c-><c- o>+</c-><c- p>(</c-><c- n>binary64</c-><c- p>,</c-> <c- n>binary64</c-><c- p>)</c-></code> to the hardware <code class="highlight"><c- n>FADD</c-></code> instruction. The following table illustrates the consequences of this difference
in implementation (using LLVM IR as the representation for a generic compiler’s
internal IR):</p>
   <table>
    <tbody>
     <tr>
      <td>
      <th>IR
      <th>x86 Assembly
     <tr>
      <th>Incorrect<br>(implemented by clang, gcc)
      <td>
<pre class="language-llvm highlight"><c- k>define</c-> <c- b>double</c-> <c- vg>@do_add</c-><c- p>(</c-><c- b>double</c-> <c- g>%a</c-><c- p>,</c-> <c- b>double</c-> <c- g>%b</c-><c- p>,</c-> <c- b>double</c-> <c- g>%c</c-><c- p>)</c-> <c- p>{</c->
  <c- g>%sum1</c-> <c- p>=</c-> <c- k>fadd</c-> <c- b>double</c-> <c- g>%a</c-><c- p>,</c-> <c- g>%b</c->
  <c- g>%res</c-> <c- p>=</c-> <c- k>fadd</c-> <c- b>double</c-> <c- g>%sum1</c-><c- p>,</c-> <c- g>%c</c->
  <c- k>ret</c-> <c- b>double</c-> <c- g>%res</c->
<c- p>}</c->
</pre>
      <td>
<pre class="language-asm highlight"><c- nl>do_add:</c->
  <c- nf>fld</c-> <c- no>qword</c-> <c- no>ptr</c-> <c- p>[</c-><c- no>esp</c-> + <c- mi>20</c-><c- p>]</c-><c- c1>; load third argument (as a double) on the FP stack</c->
  <c- nf>fld</c-> <c- no>qword</c-> <c- no>ptr</c-> <c- p>[</c-><c- no>esp</c-> + <c- mi>12</c-><c- p>]</c-><c- c1>; load second argument on the FP stack</c->
  <c- nf>fld</c-> <c- no>qword</c-> <c- no>ptr</c-> <c- p>[</c-><c- no>esp</c-> + <c- mi>4</c-><c- p>]</c-> <c- c1>; load first argument on the FP stack</c->
  <c- nf>faddp</c-> <c- no>st</c-><c- p>(</c-><c- mi>1</c-><c- p>),</c-> <c- no>st</c->         <c- c1>; add two values, popping one off the stack</c->
  <c- nf>faddp</c-> <c- no>st</c-><c- p>(</c-><c- mi>1</c-><c- p>),</c-> <c- no>st</c->         <c- c1>; repeat</c->
  <c- nf>ret</c->                     <c- c1>; (return value is on the top of the stack)</c->
</pre>
     <tr>
      <th>Correct<br>(implemented by icc)
      <td>
<pre class="language-llvm highlight"><c- k>define</c-> <c- b>double</c-> <c- vg>@do_add</c-><c- p>(</c-><c- b>double</c-> <c- g>%a</c-><c- p>,</c-> <c- b>double</c-> <c- g>%b</c-><c- p>,</c-> <c- b>double</c-> <c- g>%c</c-><c- p>)</c-> <c- p>{</c->
  <c- g>%a.conv</c-> <c- p>=</c-> <c- k>fpext</c-> <c- b>double</c-> <c- g>%a</c-> <c- k>to</c-> <c- b>x86_fp80</c->
  <c- g>%b.conv</c-> <c- p>=</c-> <c- k>fpext</c-> <c- b>double</c-> <c- g>%b</c-> <c- k>to</c-> <c- b>x86_fp80</c->
  <c- g>%c.conv</c-> <c- p>=</c-> <c- k>fpext</c-> <c- b>double</c-> <c- g>%c</c-> <c- k>to</c-> <c- b>x86_fp80</c->
  <c- g>%sum1</c-> <c- p>=</c-> <c- k>fadd</c-> <c- b>x86_fp80</c-> <c- g>%a.conv</c-><c- p>,</c-> <c- g>%b.conv</c->
  <c- g>%res</c-> <c- p>=</c-> <c- k>fadd</c-> <c- b>x86_fp80</c-> <c- g>%sum1</c-><c- p>,</c-> <c- g>%c.conv</c->
  <c- g>%res.conv</c-> <c- p>=</c-> <c- k>fptrunc</c-> <c- b>x86_fp80</c-> <c- g>%res.conv</c-> <c- k>to</c-> <c- b>double</c->
  <c- k>ret</c-> <c- b>double</c-> <c- g>%res.conv</c->
<c- p>}</c->
</pre>
      <td>
<pre class="language-asm highlight"><c- nl>do_add:</c->
  <c- nf>sub</c-> <c- no>esp</c-><c- p>,</c-> <c- mi>12</c->             <c- c1>; reserve space to spill the value</c->
  <c- nf>fld</c-> <c- no>qword</c-> <c- no>ptr</c-> <c- p>[</c-><c- no>esp</c-> + <c- mi>32</c-><c- p>]</c-><c- c1>; load first argument (as a double) on the FP stack</c->
  <c- nf>fld</c-> <c- no>qword</c-> <c- no>ptr</c-> <c- p>[</c-><c- no>esp</c-> + <c- mi>24</c-><c- p>]</c-><c- c1>; load second argument on the FP stack</c->
  <c- nf>fld</c-> <c- no>qword</c-> <c- no>ptr</c-> <c- p>[</c-><c- no>esp</c-> + <c- mi>16</c-><c- p>]</c-><c- c1>; load third argument on the FP stack</c->
  <c- nf>faddp</c-> <c- no>st</c-><c- p>(</c-><c- mi>1</c-><c- p>),</c-> <c- no>st</c->         <c- c1>; add two values, popping one off the stack</c->
  <c- nf>faddp</c-> <c- no>st</c-><c- p>(</c-><c- mi>1</c-><c- p>),</c-> <c- no>st</c->         <c- c1>; repeat</c->
  <c- nf>fstp</c-> <c- no>qword</c-> <c- no>ptr</c-> <c- p>[</c-><c- no>esp</c-><c- p>]</c->    <c- c1>; store the top of the stack as a double</c->
  <c- nf>fld</c-> <c- no>qword</c-> <c- no>ptr</c-> <c- p>[</c-><c- no>esp</c-><c- p>]</c->     <c- c1>; load the truncated value back on the stack</c->
  <c- nf>add</c-> <c- no>esp</c-><c- p>,</c-> <c- mi>12</c->             <c- c1>; restore stack pointer</c->
  <c- nf>ret</c->                     <c- c1>; (return value is on the top of the stack)</c->
</pre>
   </table>
   <p>Incorrect behavior can be observed in other ways. For example, a sufficiently
large floating-point expression that requires spilling intermediate results
due to insufficient registers causes those results to be spilled as their source
types rather than the correct extended precision types. Storing a result in a
variable fails to force truncation of the extended precision arithmetic. As a
result, the actual semantics implemented by these nonconforming compilers
amounts to evaluating all <code class="highlight"><c- b>float</c-></code> and <code class="highlight"><c- b>double</c-></code> arithmetic in extended precision,
except that at unpredictable points in the time, it is truncated to the source
precision. This behavior is not helpful for users, since there is little or no
ability to influence the actual truncation behavior.</p>
   <p>That compilers do not conform to the correct behavior is long-known. The <a href="https://gcc.gnu.org/bugzilla/show_bug.cgi?id=323">gcc bug</a> pointing out the issue for x87 is the second-most duplicated bug
in its bug tracker (eclipsed only by the bug used for reports based on
alias violations in user code). If 25 years and 100 duplicates is not enough to
motivate a compiler to make their code conforming, then there is little hope of
the compiler ever doing so. Clang similarly has a <a href="https://github.com/llvm/llvm-project/issues/44218">long-open bug</a> on its
nonconformance here, and while there is discussion on how to fix it, it is not
considered a priority.</p>
   <p>The problems described here are relevant for very few architectures. For x86
processors, the SSE and SSE2 instruction sets added an IEEE 754-compliant
implementation for binary32 and binary64. The last x86 processor released
without SSE2 support was in 2004, and the 64-bit ABIs all require SSE2 support,
which means only x86 processors targeting 32-bit applications and supporting
hardware 20 years old cannot easily conform to precise floating-point semantics
for binary64. Outside of x86, the next most prevalent architecture that has the
excess precision problem is the Motorola 68000 family, where FPUs before the
MC68040 (released in 1990) lack the ability to do binary32 or binary64
arithmetic exactly.</p>
   <p>Given the declining importance of architectures for which a solution like <code class="highlight"><c- n>FLT_EVAL_METHOD</c-></code> is necessary, and given that current compilers largely do not
conform to the specification where it is relevant, the most prudent course of
action is to not reserve any space in the standard for these implementations and
accept that compilers will likely always be non-conforming on these
architectures.</p>
   <h4 class="heading settled" data-level="5.1.2" id="denormal"><span class="secno">5.1.2. </span><span class="content">Denormal flushing</span><a class="self-link" href="#denormal"></a></h4>
   <p>For various reasons, many hardware implementations have opted to not implement
proper support for denormals, sometimes providing an option to opt out of
denormal support via a bit in the floating-point environment, or sometimes
even going so far as to provide no mechanism to support denormals at all. As a
result, for some architectures (such as the original ARM NEON implementation),
flushing denormals is necessary to be able to use hardware floating-point at
all.</p>
   <p>Some hardware supports denormals only via expensive microcode or software trap
handling for the denormal cases. For an individual instruction, the execution
penalty for a denormal input can be 100 times slower. Averaged over an entire
benchmark (which obviously executes more than just floating-point instructions
involving only denormals), this tends to be single-digit percentage loss or
less, unless the compiler believes it is necessary to flush denormals to be able
to access a vectorized SIMD unit. However, full-speed hardware with full
denormal support is known now, and many architectures that previously required
denormal flushing, or imposed severe speed penalties on denormals, are able to
do use denormals with no speed impact on their newest versions. Thus, denormal
flushing is also an issue whose salience is decreasing and will become less of
an issue in the future.</p>
   <p>A main complication of denormal flushing is that some implementations choose to
link in a library that sets the denormal flushing bit in the environment on
startup when linking with fast-math flags. Owing to user complaints, this has
shifted recently to linking in this library only when compiling an executable
and not a shared library. Consequently, whether or not denormals will be
flushed is unknowable by the compiler as it compiles a translation unit. In
such implementations, a <code class="highlight"><c- k>constexpr</c-></code> function indicating support for denormals
can only be at best a guess and cannot be made reliable.</p>
   <h4 class="heading settled" data-level="5.1.3" id="reassoc"><span class="secno">5.1.3. </span><span class="content">Associativity and vectorization</span><a class="self-link" href="#reassoc"></a></h4>
   <p>It should be fairly well-known that floating-point arithmetic is nonassociative,
which means <code class="highlight"><c- n>a</c-> <c- o>+</c-> <c- p>(</c-><c- n>b</c-> <c- o>+</c-> <c- n>c</c-><c- p>)</c-></code> may return a different result from <code class="highlight"><c- p>(</c-><c- n>a</c-> <c- o>+</c-> <c- n>b</c-><c- p>)</c-> <c- o>+</c-> <c- n>c</c-></code>.
Unfortunately, associativity is a required property for parallel algorithms, so
the nonassociativity blocks the ability to automatically block code. All C++
compilers provide some means to allow assumption of associativity to enable
vectorization. Frequently, this also allows the related assumption of
distributivity (allowing <code class="highlight"><c- n>a</c-> <c- o>*</c-> <c- p>(</c-><c- n>b</c-> <c- o>+</c-> <c- n>c</c-><c- p>)</c-></code> to be converted to <code class="highlight"><c- n>a</c-> <c- o>*</c-> <c- n>b</c-> <c- o>+</c-> <c- n>a</c-> <c- o>*</c-> <c- n>c</c-></code> or
vice versa).</p>
   <p>For most numerical code, these are generally safe assumptions to make. If all of
the values involved are about the same magnitude and the same sign, then the
resulting value of the expressions will only differ in the last few bits of the
significand, a difference subsumed by the inherent inaccuracy of the source data
in the first place. When signs are different, there is the potential for values
to be greatly different due to overflow (if <code class="highlight"><c- n>b</c-> <c- o>+</c-> <c- n>c</c-></code> is positive infinity and <code class="highlight"><c- n>a</c-></code> is negative, then <code class="highlight"><c- n>a</c-> <c- o>+</c-> <c- p>(</c-><c- n>b</c-> <c- o>+</c-> <c- n>c</c-><c- p>)</c-></code> would be infinite where <code class="highlight"><c- p>(</c-><c- n>a</c-> <c- o>+</c-> <c- n>b</c-><c- p>)</c-> <c- o>+</c-> <c- n>c</c-></code> could
be a finite value), or other artifacts due to catastrophic cancellation.</p>
   <p>There are times when these assumptions are not safe. Some algorithms rely on the
precise order of arithmetic to get extra precision. For example, Fast2Sum and
Kahan summation provide extra precision that is destroyed with reassociation:</p>
<pre class="language-c++ highlight"><c- c1>// Return two values such that sum + error is the exact result of a + b, without</c->
<c- c1>// any precision loss.</c->
<c- n>std</c-><c- o>::</c-><c- n>pair</c-><c- o>&lt;</c-><c- b>double</c-><c- p>,</c-> <c- b>double</c-><c- o>></c-> <c- n>fast2sum</c-><c- p>(</c-><c- b>double</c-> <c- n>a</c-><c- p>,</c-> <c- b>double</c-> <c- n>b</c-><c- p>)</c-> <c- p>{</c->
  <c- b>double</c-> <c- n>sum</c-> <c- o>=</c-> <c- n>a</c-> <c- o>+</c-> <c- n>b</c-><c- p>;</c->
  <c- c1>// With reassociation, the compiler would turn this into double error = 0.0;</c->
  <c- b>double</c-> <c- n>error</c-> <c- o>=</c-> <c- p>(</c-><c- n>sum</c-> <c- o>-</c-> <c- n>a</c-><c- p>)</c-> <c- o>-</c-> <c- n>b</c-><c- p>;</c->
  <c- k>return</c-> <c- p>{</c-><c- n>sum</c-><c- p>,</c-> <c- n>error</c-><c- p>};</c->
<c- p>}</c->

<c- c1>// Return a more precise estimate of the sum of the values than naive summation</c->
<c- c1>// would give.</c->
<c- b>double</c-> <c- n>kahan_summation</c-><c- p>(</c-><c- n>std</c-><c- o>::</c-><c- n>valarray</c-><c- o>&lt;</c-><c- b>double</c-><c- o>></c-> <c- n>vals</c-><c- p>)</c-> <c- p>{</c->
  <c- n>std</c-><c- o>::</c-><c- n>pair</c-><c- o>&lt;</c-><c- b>double</c-><c- p>,</c-> <c- b>double</c-><c- o>></c-> <c- n>sum</c-> <c- o>=</c-> <c- p>{</c-><c- mi>0</c-><c- p>,</c-> <c- mi>0</c-><c- p>};</c->
  <c- k>for</c-> <c- p>(</c-><c- b>double</c-> <c- n>v</c-> <c- o>:</c-> <c- n>vals</c-><c- p>)</c-> <c- p>{</c->
    <c- n>sum</c-> <c- o>=</c-> <c- n>fast2sum</c-><c- p>(</c-><c- n>sum</c-><c- p>.</c-><c- n>first</c-><c- p>,</c-> <c- n>v</c-> <c- o>+</c-> <c- n>sum</c-><c- p>.</c-><c- n>second</c-><c- p>);</c->
  <c- p>}</c->
  <c- k>return</c-> <c- n>sum</c-><c- p>.</c-><c- n>first</c-><c- p>;</c->
<c- p>}</c->
</pre>
   <h4 class="heading settled" data-level="5.1.4" id="fma"><span class="secno">5.1.4. </span><span class="content">FMA contraction</span><a class="self-link" href="#fma"></a></h4>
   <p>Many, though not all, hardware floating-point units offer an FMA instruction,
that computes the value <code class="highlight"><c- n>a</c-> <c- o>*</c-> <c- n>b</c-> <c- o>+</c-> <c- n>c</c-></code> in a single step, without any intermediate
rounding. The resulting instruction is usually faster than doing the operation
as separate instructions, and usually the extra precision is more helpful for
the user (though there are times when it is better to do it as two separate
instructions). Converting the source expression in this manner is known as
contraction, and almost all contraction in practice tends to either be to an FMA
instruction or some instruction that differs only in the signs of the inputs.</p>
   <p>As the FMA operation is one of the core operations mandated by IEEE 754, there
is practically always an implementation of FMA available, even if the hardware
lacks such an instruction. However, the emulation of FMA in software for such
hardware is slow, and many users would rather use the two-instruction
multiply-and-add form if that is the faster alternative.</p>
   <p>Given the utility of FMA contraction, several languages do provide guidelines
for FMA formation. C provides a <code class="highlight"><c- cp>#pragma STDC FP_CONTRACT ON</c-></code> facility, that
allows contraction within expressions. This is subtly different from the
compiler flag (or equivalent pragma) provided by many compiler implementations,
which will contract across expressions as well. Fortran provides a general
expression rewriting ability which includes FMA contraction.</p>
   <p>From the perspective of an optimizer, an operation <code class="highlight"><c- n>fast_fma</c-></code> whose semantics
are "do an FMA operation unless an <code class="highlight"><c- n>fmul</c-></code>-then-<code class="highlight"><c- n>fadd</c-></code> is faster" turns out to be
easier to work with. The code would start out as a single operation and remain
as a single operation throughout the entire optimization sequence, with little
risk of an optimization moving only part of the operation to another location
(e.g., hoisting out of a loop); it is also easier to reason about which version
is desired by the user for the purposes of constant folding or evaluation.
Additionally, representation for two operations increases the risks that other
optimizations end up deleting optimization barriers that would have prevented
undesirable formations of FMA.</p>
   <p>The big problem with a <code class="highlight"><c- n>fast_fma</c-></code> approach, however, is that it is a ternary
operation and more cumbersome to use as an operator in otherwise typically
infix code, especially given that there exists a readily available syntax for
the operation via common operators (namely <code class="highlight"><c- o>*</c-></code> and <code class="highlight"><c- o>+</c-></code>). Furthermore, some users
may object to having to add extra methods to overload to make their custom
number-like types work well.</p>
   <p>Finally, it should be noted that FMA contraction is not always a good thing,
even on hardware where it is known to be fast. The expression <code class="highlight"><c- n>a</c-> <c- o>*</c-> <c- n>b</c-> <c- o>+</c-> <c- n>a</c-> <c- o>*</c-> <c- o>-</c-><c- n>b</c-></code>,
if evaluated as two multiplies and an add is guaranteed to be exactly <code class="highlight"><c- o>+</c-><c- mf>0.0</c-></code> so
long as the inputs are finite. But if it is evaluated with a multiply and an
FMA, then it is likely to be a small value. Similarly, there are expressions
where evaluation via solely multiplies and adds would guarantee the result to
be positive, but if done via FMAs, it could be negative depending on the
vicissitudes of rounding. Consequently, while it may be desirable to turn on
FMA contraction by default, it is absolutely necessary to retain the ability to
disable it for code that doesn’t want it.</p>
   <h4 class="heading settled" data-level="5.1.5" id="fast-math"><span class="secno">5.1.5. </span><span class="content">Fast-math</span><a class="self-link" href="#fast-math"></a></h4>
   <p>In general, fast-math optimizations are any floating-point optimization that
would be mathematically equivalent if the numbers were real numbers, but are not
equivalent for floating-point expressions. Reassociation and FMA contraction, as
discussed above, are two such optimizations, but there exist other ones that are
not worth calling out into a separate section. These optimizations tend to fall
into two buckets.</p>
   <p>The first bucket of fast-math optimizations are ones that ignore the existence
of the special floating-point values: negative zero, infinities, and NaNs. For
example, the expression <code class="highlight"><c- n>x</c-> <c- o>+</c-> <c- mf>0.0</c-></code> is equivalent to <code class="highlight"><c- n>x</c-></code> for all floating-point
values save <code class="highlight"><c- mf>-0.0</c-></code> (as <code class="highlight"><c- mf>-0.0</c-> <c- o>+</c-> <c- mf>0.0</c-></code> is <code class="highlight"><c- o>+</c-><c- mf>0.0</c-></code>). Just as unlikely integer
overflow can impede certain optimizations, the unlikely presence of these
special values too impede the ability to do some basic arithmetic optimization;
fast-math flags allow users to opt into these optimizations when they can
guarantee they will not be intentionally using these special values. It should
be noted that there is vociferous disagreement as to whether or not <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>isnan</c-><c- p>(</c-><c- n>nan</c-><c- p>)</c-></code> should be considered undefined or not when fast-math is in
effect.</p>
   <p>The second bucket of fast-math optimizations are ones that do not preserve the
precision of the resulting values. In addition to the optimizations discussed in
previous sections (which are all of this category), another common example is
being able to convert <code class="highlight"><c- n>a</c-> <c- o>/</c-> <c- n>b</c-></code> into <code class="highlight"><c- n>a</c-> <c- o>*</c-> <c- p>(</c-><c- mf>1.0</c-> <c- o>/</c-> <c- n>b</c-><c- p>)</c-></code>, with the reciprocal
expression hopefully being able to be hoisted out of a loop. Or one can convert <code class="highlight"><c- n>pow</c-><c- p>(</c-><c- n>x</c-><c- p>,</c-> <c- mf>0.5</c-><c- p>)</c-></code> to <code class="highlight"><c- n>sqrt</c-><c- p>(</c-><c- n>x</c-><c- p>)</c-></code> (although note that <code class="highlight"><c- n>pow</c-><c- p>(</c-><c- mf>-0.0</c-><c- p>,</c-> <c- mf>0.5</c-><c- p>)</c-></code> is <code class="highlight"><c- o>+</c-><c- mf>0.0</c-></code> while <code class="highlight"><c- n>sqrt</c-><c- p>(</c-><c- mf>-0.0</c-><c- p>)</c-></code> is <code class="highlight"><c- mf>-0.0</c-></code>).</p>
   <h4 class="heading settled" data-level="5.1.6" id="constexpr"><span class="secno">5.1.6. </span><span class="content">Constant expressions</span><a class="self-link" href="#constexpr"></a></h4>
   <p>In a strict floating-point model, the environment of floating-point operations
is important, and consequently, it matters a great deal whether or not a given
operation is to be evaluated at compile-time or at runtime. Here, the definition
of "compile-time" is specifically constant expression evaluation within the
frontend: the constant folding that may be done by an optimizer merely has to
preserve the illusion that it is done at runtime, and so long as the code
initially generated by the frontend has annotations that the operations interact
with the floating-point environment, that property is relatively easy to uphold
in the optimizer.</p>
   <p>When implementing C’s Annex F rules for floating-point environment, the
guideline for whether a given floating-point expression is evaluated at
compile-time or at runtime is clear: the initializer of an object with static or
thread storage duration is done at compile-time, while everything else must be
done (as if) at runtime. Of course, C lacks the <code class="highlight"><c- k>constexpr</c-></code> machinery of C++,
and thus there is very little opportunity to do interesting stuff at
compile-time, making such a simple rule easy to apply. C++ requires applying
more careful analysis.</p>
   <p>The most natural extension of C’s rules here is to say that any expression that
is part of a <em>core constant expression</em> needs to occur (as if) at compile-time;
any floating-point environment effects that are observed there would not be
observable in the program. Furthermore, any floating-point expression not part
of a core constant expression occurs (as if) at runtime. Thus, if the expression
is such that <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>is_constant_evaluated</c-><c- p>()</c-></code> would return <code class="highlight">true</code>, the user could
expect that the code will definitely be executed at compile time; and if it
would return <code class="highlight">false</code>, they would know that the effects would be visible to
runtime functions that manipulate the floating-point environment.</p>
   <p>Another issue with constant expressions in C++ is the role of environment during
constant expression evaluation. Since C++ allows for the ability to have
statements with side effects in constant expressions, it is possible to specify
that functions effecting the floating-point environment do so in constant
expressions as well, although it may not be desirable to do so.</p>
   <p>A final issue is that adjustments like fast-math optimizations are
unlikely to be implemented the same in the constant expression evaluator as they
are in the optimizer or the runtime evaluation. For example, if FMA contraction
is enabled, the constant expression evaluator generally has no way of knowing if
the runtime optimizer is capable of contracting the expression, and it is
unlikely to match. Constant expression evaluators today tend not to adapt to the
current fast-math flag state during constant expression evaluation.</p>
   <h4 class="heading settled" data-level="5.1.7" id="type-traits"><span class="secno">5.1.7. </span><span class="content">Type traits</span><a class="self-link" href="#type-traits"></a></h4>
   <p>C++ provides a few classes of type traits to indicate the properties of
floating-point types and their arithmetic operation. One of the issues with
these traits is that their interpretation is not fully clear in the presence of
fast-math optimizations, especially given that the ability to turn such
optimizations on for a finer-grained scope means that whether or not they are in
effect may change throughout a single translation unit.</p>
   <p>The most concrete example is to look at <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>numeric_limits</c-><c- o>&lt;</c-><c- b>float</c-><c- o>>::</c-><c- n>has_quiet_NaN</c-></code>. In the case of a fast-math
mode that makes use of NaN values undefined behavior, should this value return
true or false? At present, all implementations return <code class="highlight">true</code> for this statement,
which means that the behavior reflects whether the <em>format</em> supports qNaN values
rather than whether the computation actually supports it meaningfully. Similar
behavior can be observed for the meanings of <code class="highlight"><c- n>is_iec559</c-></code> (which, in practice,
amounts to "is this IEEE 754-format" and not "does this obey IEEE 754
arithmetic" rules.</p>
   <p>In principle, it’s possible to add methods to query the adherence to fast-math
behaviors. Clang and GCC already provide macros like <code class="highlight"><c- n>__FAST_MATH__</c-></code> that are
defined in fast-math mode. However, these macros similarly don’t capture the
behavior in place for a scope, only the request at the command-line.
Furthermore, as fast-math is a collection of individual properties, it’s not
immediately clear what the value should be if only some of the fast-math
optimizations are enabled. Replacing these macros with special standard library
functions is generally inadvisable because either the functions would return
incorrect results due to differences at the point of evaluation or it would
require a lot of machinery that doesn’t exist in compilers today.</p>
   <h3 class="heading settled" data-level="5.2" id="user"><span class="secno">5.2. </span><span class="content">Users' perspective</span><a class="self-link" href="#user"></a></h3>
   <p>Several of the issues mentioned above are also issues that matter to users (in
particular, fast-math is often motivated by users' desires rather than
implementers' whims), but there are a few issues which tend to be dominated by
the need of users to do particular things.</p>
   <h4 class="heading settled" data-level="5.2.1" id="reproducible"><span class="secno">5.2.1. </span><span class="content">Reproducible results</span><a class="self-link" href="#reproducible"></a></h4>
   <p>One of the main concerns for some users is the need to reproduce results that
are identical across a diverse array of platforms. This is particularly salient
in the video game industry, where slight variances can cause multiplayer games
to desync (fail in such a way as to cause players to be kicked out of the game).
While most numerical code tends to already be built on a general assumption of a
mild degree of inaccuracy already and can thus tolerate some degree of deviation
among diverse implementations, there are times when particular sequences are
exactly needed (e.g., in Kahan summation, as mentioned above), and thus defense
from a sufficiently smart compiler is necessary.</p>
   <p>Irreproducibility arises from several sources:</p>
   <ul>
    <li data-md>
     <p>Hardware doesn’t need to implement IEEE 754-based arithmetic. However, in
    practice, most hardware that people target does support at least the
    IEEE 754 formats, so this isn’t a major source in modern times.</p>
    <li data-md>
     <p>The behavior of IEEE 754 is underspecified in a few cases, most notably the
    handling of NaN payloads and the definition of tinyness for reporting
    underflow (although floating-point environment support is not relied on
    in most code anyways). IEEE 754 itself says that reproducible code
    should not rely on these behaviors anyways.</p>
    <li data-md>
     <p>Code compiling for the x87 FPU tends to want to use excess precision to avoid
    the performance penalty of emulating correct behavior on that FPU. Since
    this is a platform that has been obsolete for decades, it is declining
    in relevance for modern software.</p>
    <li data-md>
     <p>Fast-math optimizations, which include FMA contraction or reassociation, mean
    that the compiler has latitude to choose multiple variants when they are
    enabled and it should be expected that those choices differ from
    platform to platform.</p>
    <li data-md>
     <p>Denormal flushing may or may not be enabled on various platforms, and this can
    give different results. Also notable is that denormal flushing tends to
    partially rely on the definition of tinyness, so whether or not a given
    operation would be flushed to zero can itself change.</p>
    <li data-md>
     <p>The floating-point environment (which can include exotic flags like x87’s
    precision control flag) may have been adjusted by other libraries linked
    into an application, and this environment can affect the results.</p>
    <li data-md>
     <p>Differences in vector width on different platforms can result in vectorized
    algorithms being reassociated differently. Similarly, distributing work
    to a different number of threads can result in a reduction being done
    differently.</p>
    <li data-md>
     <p>Approximate instructions (such as the x86 <code class="highlight"><c- n>RSQRTSS</c-></code> instruction) can have
    different implementations by different vendors or even different
    microarchitectures of the same vendor.</p>
    <li data-md>
     <p>Standard math libraries do not have accuracy guarantees, and so the same
    compiler, linking to different standard libraries on different
    platforms, may produce different results for the same input. Libraries
    themselves may return different values for different library versions.
    Constant evaluation or constant folding may use the host library to
    evaluate these functions, and thus the result can vary based on the host
    platform of a cross-compiler, even given otherwise identical compiler
    and target platform.</p>
    <li data-md>
     <p>Math libraries such as BLAS routines may choose their kernels differently
    based on host parameters such as cache size, and thus minor variations
    in the chip may result in different values.</p>
   </ul>
   <p>Most users cannot be expected to know all of the ways that their floating-point
code is not reproducible. Thus, we need a feature that can reliably reproduce
floating-point code, even in the face of compiler flags saying "please make my
math irreproducible."</p>
   <h4 class="heading settled" data-level="5.2.2" id="round"><span class="secno">5.2.2. </span><span class="content">Rounding mode</span><a class="self-link" href="#round"></a></h4>
   <p>The default floating-point model used by most compilers does not allow reliable
access to the rounding mode or floating-point environment. As a consequences,
these features tend to go unused by implementations, even where they might be
helpful. Of the underused portions of the environment, the most useful is the
rounding mode. Furthermore, there is a growing trend in modern hardware to add
floating-point instructions where the rounding mode is an operand of the
instruction itself rather than relying on the rounding mode specified in the
floating-point environment, and it is useful to be able to have a language
facility that more directly maps to this style of hardware.</p>
   <h4 class="heading settled" data-level="5.2.3" id="fenv"><span class="secno">5.2.3. </span><span class="content">Environment access</span><a class="self-link" href="#fenv"></a></h4>
   <p>Being able to access the other bits of the floating-point environment are
occasionally useful. Floating-point exceptions do indicate erroneous situations,
after all, so being able to observe the error of individual operations is
helpful in some cases, much as users will sometimes want to test whether an
individual integer multiplication overflows. An example of some code that does
this looks as follows:</p>
<pre class="language-c++ highlight"><c- b>float</c-> <c- nf>scale</c-><c- p>(</c-><c- b>float</c-> <c- n>value</c-><c- p>,</c-> <c- b>float</c-> <c- n>pointScaleFactor</c-><c- p>)</c-> <c- p>{</c->
  <c- c1>// Ignore all previous exceptions that may have happened,</c->
  <c- c1>// we just care about this one operation.</c->
  <c- n>feclearexcept</c-><c- p>(</c-><c- n>FE_ALL_EXCEPT</c-><c- p>);</c->
  <c- b>float</c-> <c- n>result</c-> <c- o>=</c-> <c- n>value</c-> <c- o>*</c-> <c- n>pointScaleFactor</c-><c- p>;</c->
  <c- k>if</c-> <c- p>(</c-><c- n>fetestexcept</c-><c- p>(</c-><c- n>FE_OVERFLOW</c-> <c- o>|</c-> <c- n>FE_UNDERFLOW</c-><c- p>))</c-> <c- p>{</c->
    <c- c1>// report error ...</c->
  <c- p>}</c->
  <c- k>return</c-> <c- n>result</c-><c- p>;</c->
<c- p>}</c->
</pre>
   <p>The sticky nature of floating-point exception makes it easy to support multiple
operations or even entire numerical algorithms if that’s desired, but it also
does require clearing exceptions before doing the operations in question. Most
hardware implementations also provide the ability to turn floating-point
exceptions into traps, which could be combined with a software trap handler to
do fine-grained reporting of floating-point error conditions with minimal
overhead in the cases where conditions occur.</p>
   <p>A parallel to atomic memory references can even be drawn with floating-point
exceptions. In this model, what is generally desired is not that all
floating-point operations and their associated exceptions occur strictly
in accordance with the source behavior, but rather that they don’t get moved
across certain function calls. The calls to floating-point environment functions
can be seen as similar to atomic fences.</p>
   <h2 class="heading settled" data-level="6" id="solutions"><span class="secno">6. </span><span class="content">Solution space</span><a class="self-link" href="#solutions"></a></h2>
   <p>Having covered in detail the existing issues, the next thing to turn to is the
menu of options available to solve these problems. These options are not
mutually exclusive, nor is it necessary to pick the same option for different
features.</p>
   <h3 class="heading settled" data-level="6.1" id="rien"><span class="secno">6.1. </span><span class="content">Do nothing</span><a class="self-link" href="#rien"></a></h3>
   <p>It is always an option to not attempt to say anything about the precise details
of floating-point semantics. This is what C++ largely does today, and as the
survey of programming languages shows, many other languages are able to get by
with only vague hand waves to behavior.</p>
   <h3 class="heading settled" data-level="6.2" id="ub"><span class="secno">6.2. </span><span class="content">Unspecified behavior</span><a class="self-link" href="#ub"></a></h3>
   <p>Explicitly unspecified behavior is another avenue for some of the semantics. In
cases where some degree of nondeterminism is already expected, making the
floating-point behavior itself be nondeterministic can provide a lot of benefit
without adding much, if any, tax to the user’s mental model. Indeed, today C++
leverages this in its definition of <em><code class="highlight"><c- n>GENERALIZED_SUM</c-></code></em>.</p>
   <h3 class="heading settled" data-level="6.3" id="uber-alles"><span class="secno">6.3. </span><span class="content">Demand strict conformance</span><a class="self-link" href="#uber-alles"></a></h3>
   <p>Demanding strict conformance to IEEE 754 arithmetic in all aspects is the
extreme opposite of saying nothing. However, as already extensively detailed,
compilers deviate from IEEE 754 in a myriad of small ways, and they are
extremely unlikely to go for strict conformance just because the standard
demands it of them. I judge it better for the standard to admit reality here and
instead discuss how to cope with deviations from IEEE 754 than live in a
pretense that everybody is strictly conforming.</p>
   <h3 class="heading settled" data-level="6.4" id="please-pick-me"><span class="secno">6.4. </span><span class="content">Pragmas</span><a class="self-link" href="#please-pick-me"></a></h3>
   <p>While the committee may look unfavorably on pragmas in general, it is worth
bearing in mind that some times they are the most appropriate tool for the job.
When it comes to controlling the semantics of floating-point operators, pragmas
are by far the most common option chosen, with all of the languages that specify
means for users to control their behavior doing so via pragmas are pragma-like
equivalents (see section 4). Indeed, a majority of C++ implementations <em>already</em> support pragmas for some of these features (and even if other mechanisms are
chosen, it is substantially likely that they will be implemented via existing
pragmas).</p>
   <p>The main advantage of pragmas as a tool is that they are infinitely
generalizable. If a compiler decides to add a new knob to the floating-point
behavior, it is trivial to add user support for that knob via pragmas.</p>
   <p>Pragmas do have significant drawbacks though. They do not work well with generic
code, since there is currently no way for code to declare that it needs to
inherit the pragma state of its caller:</p>
<pre class="language-c++ highlight"><c- n>template</c-> <c- o>&lt;</c-><c- n>typename</c-> <c- n>T</c-><c- o>></c-> <c- k>struct</c-> <c- nc>wrapper</c-> <c- p>{</c->
  <c- n>T</c-> <c- n>val</c-><c- p>;</c->
  <c- n>wrapper</c-><c- p>(</c-><c- n>T</c-> <c- n>val</c-><c- p>)</c-> <c- o>:</c-> <c- n>val</c-><c- p>(</c-><c- n>val</c-><c- p>)</c-> <c- p>{}</c->
  <c- c1>// Given these function implementations ...</c->
  <c- n>wrapper</c-><c- o>&lt;</c-><c- n>T</c-><c- o>></c-> <c- n>operator</c-><c- o>+</c-><c- p>(</c-><c- n>wrapper</c-><c- o>&lt;</c-><c- n>T</c-><c- o>></c-> <c- n>o</c-><c- p>)</c-> <c- p>{</c-> <c- k>return</c-> <c- n>val</c-> <c- o>+</c-> <c- n>o</c-><c- p>.</c-><c- n>val</c-><c- p>;</c-> <c- p>}</c->
  <c- n>wrapper</c-><c- o>&lt;</c-><c- n>T</c-><c- o>></c-> <c- n>operator</c-><c- o>*</c-><c- p>(</c-><c- n>wrapper</c-><c- o>&lt;</c-><c- n>T</c-><c- o>></c-> <c- n>o</c-><c- p>)</c-> <c- p>{</c-> <c- k>return</c-> <c- n>val</c-> <c- o>*</c-> <c- n>o</c-><c- p>.</c-><c- n>val</c-><c- p>;</c-> <c- p>}</c->
<c- p>};</c->

<c- k>typedef</c-> <c- n>wrapper</c-><c- o>&lt;</c-><c- b>float</c-><c- o>></c-> <c- n>wfloat</c-><c- p>;</c->

<c- c1>// ... this should compile down into an FMA...</c->
<c- n>wfloat</c-> <c- nf>use_fma</c-><c- p>(</c-><c- n>wfloat</c-> <c- n>a</c-><c- p>,</c-> <c- n>wfloat</c-> <c- n>b</c-><c- p>,</c-> <c- n>wfloat</c-> <c- n>c</c-><c- p>)</c-> <c- p>{</c->
  <c- cp>#pragma STDC FP_CONTRACT ON</c->
  <c- k>return</c-> <c- n>a</c-> <c- o>*</c-> <c- n>b</c-> <c- o>+</c-> <c- n>c</c-><c- p>;</c->
<c- p>}</c->

<c- c1>// .. but this one shouldn’t...</c->
<c- n>wfloat</c-> <c- nf>dont_use_fma</c-><c- p>(</c-><c- n>wfloat</c-> <c- n>a</c-><c- p>,</c-> <c- n>wfloat</c-> <c- n>b</c-><c- p>,</c-> <c- n>wfloat</c-> <c- n>c</c-><c- p>)</c-> <c- p>{</c->
  <c- cp>#pragma STDC FP_CONTRACT OFF</c->
  <c- k>return</c-> <c- n>a</c-> <c- o>*</c-> <c- n>b</c-> <c- o>+</c-> <c- n>c</c-><c- p>;</c->
<c- p>}</c->

<c- c1>// ... but the pragmas can’t reach into the operator function definitions!</c->
</pre>
   <p>There are some extensions that might be able to mitigate this problem of
inheriting floating-point context. One can imagine an attribute that would
indicate that the function does so:</p>
<pre class="language-c++ highlight"><c- c1>// In addition to inheriting floating-point context, this would also signal the</c->
<c- c1>// equivalent of always_inline and forbid taking the address.</c->
<c- c1>// NOTE: this does violate standard attribute ignorability rules.</c->
<c- p>[[</c-><c- n>intrinsic</c-><c- p>]]</c-> <c- n>wrapper</c-><c- o>&lt;</c-><c- n>T</c-><c- o>></c-> <c- n>operator</c-><c- o>+</c-><c- p>(</c-><c- n>wrapper</c-><c- o>&lt;</c-><c- n>T</c-><c- o>></c-> <c- n>lhs</c-><c- p>,</c-> <c- n>wrapper</c-><c- o>&lt;</c-><c- n>T</c-><c- o>></c-> <c- n>rhs</c-><c- p>)</c-> <c- p>{</c->
  <c- k>return</c-> <c- n>lhs</c-><c- p>.</c-><c- n>val</c-> <c- o>+</c-> <c- n>rhs</c-><c- p>.</c-><c- n>val</c-><c- p>;</c->
<c- p>}</c->
</pre>
   <p>Or a template parameter that can inherit the floating-point context:</p>
<pre class="language-c++ highlight"><c- c1>// Special parameter value that inherits the context of pragma state from its</c->
<c- c1>// caller context.</c->
<c- n>template</c-> <c- o>&lt;</c-><c- n>float_context</c-> <c- n>ctx</c-> <c- o>=</c-> <c- n>inherit_float_context</c-><c- o>></c->
<c- n>wrapper</c-><c- o>&lt;</c-><c- n>T</c-><c- o>></c-> <c- n>operator</c-><c- o>+</c-><c- p>(</c-><c- n>wrapper</c-><c- o>&lt;</c-><c- n>T</c-><c- o>></c-> <c- n>lhs</c-><c- p>,</c-> <c- n>wrapper</c-><c- o>&lt;</c-><c- n>T</c-><c- o>></c-> <c- n>rhs</c-><c- p>)</c-> <c- p>{</c->
  <c- k>return</c-> <c- n>lhs</c-><c- p>.</c-><c- n>val</c-> <c- o>+</c-> <c- n>rhs</c-><c- p>.</c-><c- n>val</c-><c- p>;</c->
<c- p>}</c->
</pre>
   <h3 class="heading settled" data-level="6.5" id="a-bad-idea"><span class="secno">6.5. </span><span class="content">Attributes</span><a class="self-link" href="#a-bad-idea"></a></h3>
   <p>C++ attributes can attach to blocks and function definitions, which provide
sufficient functionality to do what the C floating-point pragmas do while
avoiding use of the preprocessor entirely. The rule that standard attributes
have to be ignorable limits their use to only controlling those floating-point
features that resemble fast-math flags rather than those that are making the
behavior stricter.</p>
   <h3 class="heading settled" data-level="6.6" id="a-worse-idea"><span class="secno">6.6. </span><span class="content">Fundamental types</span><a class="self-link" href="#a-worse-idea"></a></h3>
   <p>Another avenue of exploration is augmenting the floating-point types to
represent varying floating-point semantics. These augmentations can come in the
form of new types (similar to how <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>float16_t</c-></code> were added), in the form of
custom type specifiers and/or qualifiers, or in the form of standard library
templated types (discussed further in the next section).</p>
   <p>The primary advantage of representing floating-point semantics in this way is
that it tends to compose well with the use of templates for generic code. Any
code that needs to be generic over the precise floating-point semantics can
easily do so just by templating over these types, without need for any other
language features.</p>
   <p>The primary disadvantage of this representation is that it is not composable
with the highly tunable nature of floating-point semantics. Each knob creates a
combinatorial explosion of new types to handle. Type specifiers might at least
avoid the need to name each member, but they do not remove the need to provide a
new version of the function for each member of the power set of qualifiers (or
at least a new instantiation of a templated function).</p>
   <p>A more subtle disadvantage is that this approach attaches the behavior of
semantics to types rather than to operations themselves, and that makes the
task of mapping an operation to its semantics--especially when the operation has
heterogeneous types for parameters--more difficult not only for the implementer
but also for the user (in their mental model). Special care would also have to
be given to the behavior of implicit and explicit conversions for these types,
and such conversions are already a problem for floating-point types which can
have at least three distinct types representing the same underlying type today.</p>
   <h3 class="heading settled" data-level="6.7" id="please-dont-pick-me"><span class="secno">6.7. </span><span class="content">Type wrappers</span><a class="self-link" href="#please-dont-pick-me"></a></h3>
   <p>A commonly suggested approach for solving these problems is the use of templated
type wrappers for floats, something like <code class="highlight"><c- n>fast_float</c-><c- o>&lt;</c-><c- b>float</c-><c- o>></c-></code> or <code class="highlight"><c- n>reproducible</c-><c- o>&lt;</c-><c- b>float</c-><c- o>></c-></code>. These share much of their trade-offs with the previous
case of fundamental types, but they also have some interesting differences.</p>
   <p>First, they move the semantics from core language to the library portion of the
specification. In implementation terms, they still ultimately need some sort of
secret handshake between the compiler and the standard library, but this can
reuse existing compiler features. It also allows them to be experimented with
and tested without needing to use a custom compiler, making them an easier
vehicle to gain implementation experience.</p>
   <p>However, they also differ from something like qualifiers in that the syntax of
of templates creates additional burdens for the high multiplicity of control
knobs, which varies slightly depending on how these knobs are handled in
template form.</p>
   <p>One approach is to represent each knob as an independent template wrapper, for
example <code class="highlight"><c- n>reassociable</c-><c- o>&lt;</c-><c- n>T</c-><c- o>></c-></code> to enable reassociation, <code class="highlight"><c- n>contractable</c-><c- o>&lt;</c-><c- n>T</c-><c- o>></c-></code> to enable
FMA contraction, etc. This allows for a complete open set of properties--it’s
infinitely generalizable--but it is also prone to the problems that <code class="highlight"><c- n>reassociable</c-><c- o>&lt;</c-><c- n>contractable</c-><c- o>&lt;</c-><c- n>T</c-><c- o>>></c-></code> is a different type than <code class="highlight"><c- n>contractable</c-><c- o>&lt;</c-><c- n>reassociable</c-><c- o>&lt;</c-><c- n>T</c-><c- o>>></c-></code>.</p>
   <p>Another approach is to use just one single template wrapper, and have template
parameters for each possible knob, e.g., <code class="highlight"><c- k>template</c-> <c- o>&lt;</c-><c- k>typename</c-> <c- nc>T</c-><c- p>,</c-> <c- b>bool</c-> <c- n>contract</c-><c- p>,</c-> <c- b>bool</c-> <c- n>reassociate</c-><c- o>></c-> <c- k>struct</c-> <c- nc>fast_float</c-></code>. This
approach resolves the order of wrapping approach that independent wrapper types
would have. But in the process, it makes the set of available options
essentially a closed set.</p>
   <p>A third approach is to use a single template wrapper and a single configuration
parameter, but have the configuration parameter be a struct parameter and rely
on designated initializers to make the specification of the template be somewhat
tolerable for users:</p>
<pre class="language-c++ highlight"><c- k>struct</c-> <c- nc>fast_flags</c-> <c- p>{</c->
    <c- b>bool</c-> <c- n>nnan</c-><c- p>;</c->
    <c- b>bool</c-> <c- n>ninf</c-><c- p>;</c->
    <c- b>bool</c-> <c- n>nsz</c-><c- p>;</c->
    <c- b>bool</c-> <c- n>reassoc</c-><c- p>;</c->
    <c- b>bool</c-> <c- n>contract</c-><c- p>;</c->
    <c- b>bool</c-> <c- n>afn</c-><c- p>;</c->
    <c- b>bool</c-> <c- n>arcp</c-><c- p>;</c->
<c- p>};</c->
<c- n>template</c-> <c- o>&lt;</c-><c- n>typename</c-> <c- n>T</c-><c- p>,</c-> <c- n>fast_flags</c-> <c- n>f</c-><c- o>></c-> <c- k>struct</c-> <c- nc>fast</c-><c- p>;</c->
<c- n>fast</c-><c- o>&lt;</c-><c- b>float</c-><c- p>,</c-> <c- p>{</c-> <c- p>.</c-><c- n>nnan</c-> <c- o>=</c-> true<c- p>,</c-> <c- p>.</c-><c- n>ninf</c-> <c- o>=</c-> true <c- p>}</c-><c- o>></c-> <c- n>fast_val</c-><c- p>;</c->
</pre>
   <h3 class="heading settled" data-level="6.8" id="viva-la-resistance"><span class="secno">6.8. </span><span class="content">Free functions</span><a class="self-link" href="#viva-la-resistance"></a></h3>
   <p>As opposed to attaching the floating-point semantics to types, it is instead
possible to attach them to functions themselves, for example, providing a <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>fast_fma</c-></code> function that may optionally evaluate as one or two operations
for the purposes of rounding.</p>
   <p>The chief advantage of such an approach is that most of the semantic knobs tend
to be oriented around the actual approach; unlike attaching to types, where
there is a potential to mix heterogeneous semantic operands. Free functions also
lend themselves to introducing new operations that aren’t easily indicated the
via current operators used in regular infix notation for C++ types, e.g., the
FMA operation or Kahan summation.</p>
   <p>The disadvantages of free functions is that they do not necessarily play well
with custom wrapper types. Clever use of <code class="highlight"><c- k>if</c-> <c- k>constexpr</c-></code> can ameliorate this to a
degree, allowing an implementation to call an overload of <code class="highlight"><c- n>fma</c-></code> if it is
available or otherwise falling back to <code class="highlight"><c- n>a</c-> <c- o>*</c-> <c- n>b</c-> <c- o>+</c-> <c- n>c</c-></code>, but it still adds friction
to the design of such libraries.</p>
   <h3 class="heading settled" data-level="6.9" id="hand-of-merlin"><span class="secno">6.9. </span><span class="content">Special lambdas</span><a class="self-link" href="#hand-of-merlin"></a></h3>
   <p>A final category of change is to have standard library functions that take as
an argument a lambda whose body is compiled in a different mode. This has been
used with a degree of success by SYCL, where offloaded kernels are indicated by
this kind of mechanism:</p>
<pre class="language-c++ highlight"><c- n>cgh</c-><c- p>.</c-><c- n>parallel_for</c-><c- p>(</c-><c- mi>1024</c-><c- p>,</c-> <c- p>[</c-><c- o>=</c-><c- p>](</c-><c- n>id</c-><c- o>&lt;</c-><c- mi>1</c-><c- o>></c-> <c- n>idx</c-><c- p>)</c-> <c- p>{</c->
  <c- c1>// This body executes in an offloaded device context, not the host context.</c->
<c- p>});</c->
</pre>
   <p>The advantage of this kind of approach is that it creates a function call
barrier between the code in the lambda body and the code outside of it, and
function calls are very natural optimization barriers for a compiler.
Furthermore, lambdas' ability to capture their environment means there is
relatively little writing overhead to moving code that needs to be protected
into the lambda body.</p>
   <p>The main disadvantage is that this does not work very well in contexts where the
frontend needs to generate different IR for different floating-point contexts,
since a compiler can easily only compile the lambda body, and not the functions
it calls, in a different mode. Doing a call graph traversal to find recursively
called functions to generate their bodies in a different mode is ill-advised in
the frontend of the compiler, since it’s generally going to be less accurate and
will likely result in a slew of bugs where it misses various awkward implied
calls.</p>
   <h2 class="heading settled" data-level="7" id="proposal"><span class="secno">7. </span><span class="content">Proposal</span><a class="self-link" href="#proposal"></a></h2>
   <p>This section gives a summary of the current state of C++ with respect to the
issues mentioned in section 5, a discussion of how some of the existing issues
might be fixed, and the author’s proposed fixes, with rationale as to why.</p>
   <h3 class="heading settled" data-level="7.1" id="floating-point-formats"><span class="secno">7.1. </span><span class="content">Floating-point formats</span><a class="self-link" href="#floating-point-formats"></a></h3>
   <p>At present, <code class="highlight"><c- b>float</c-></code> and <code class="highlight"><c- b>double</c-></code> are not required to be IEEE 754 formats. It is
possible to strengthen the specification to require them to follow the IEEE 754
specification as far as the layout is concerned. There is very little existing
hardware which have hardware floating-point support but lack support for the
IEEE 754 formats. Some microcontrollers do map both <code class="highlight"><c- b>float</c-></code> and <code class="highlight"><c- b>double</c-></code> to
IEEE 754 single-precision format.</p>
   <p>The main benefit to dropping support for non-IEEE 754 formats is that it makes
it possible to omit consideration of types that lack infinities or NaNs for the
purposes of special-case behavior in math functions. However, the current
specification doesn’t go into any detail here anyways, and the C specification’s
discussion of various kind of issues is sufficient to cover this, if it were
adapted into the C++ specification.</p>
   <p>Recommendation: <strong>Do nothing</strong></p>
   <h3 class="heading settled" data-level="7.2" id="excess-precision"><span class="secno">7.2. </span><span class="content">Excess precision</span><a class="self-link" href="#excess-precision"></a></h3>
   <p>Excess precision is currently handled in C and C++ via the rules embodied by the
setting of the <code class="highlight"><c- n>FLT_EVAL_METHOD</c-></code> macro (there is no standard way for a user to
modify the setting in other pragmas, even with C’s pragmas), although there are
currently some unclear issues with the current rules, e.g., CWG2752.</p>
   <p>Given that compilers do not reliably implement the behavior required by them for <code class="highlight"><c- n>FLT_EVAL_METHOD</c-></code> on the one platform where it makes a difference (namely,
arithmetic using only the x87 FPU on x86 hardware), that this platform is of
declining importance, such compilers are today nonconforming and are unlikely to
become conforming in response to future standard changes. It is not a worthwhile
use of this committee’s time to further clarify the rules here if no one is
going to change to become conforming.</p>
   <p>Recommendation: <strong>Strip out support for excess precision entirely</strong></p>
   <h3 class="heading settled" data-level="7.3" id="denormal-flushing"><span class="secno">7.3. </span><span class="content">Denormal flushing</span><a class="self-link" href="#denormal-flushing"></a></h3>
   <p>The main difficulty with denormal flushing is that because the hardware
environment can be affected by link-time flags, it is largely unknowable in too
many cases by the compiler whether or not denormal flushing will actually be in
effect or not. Based on current hardware trends, the performance benefits of
enabling denormal flushing are likely to be nullified in the future. Thus, it is
reasonable to assume that, in several years' time, denormal flushing may end up
having little practical modern relevance, as has happened with excess precision.</p>
   <p>For current compilers to be conforming, denormal flushing can neither be
prohibited nor required; additionally, the explicit lack of requirement that
compile-time floating-point semantics exactly make runtime semantics serves to
make the behavior compliant on architectures where the default runtime
environment is changed by link-time flags (and thus intrinsically unknowable at
compile-time). Absent some possible clarification on the behavior of <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>numeric_limits</c-><c- o>::</c-><c- n>denorm_min</c-><c- p>()</c-></code>, there does not seem to be a need to change
anything with respect to denormal flushing at this time.</p>
   <p>Recommendation: <strong>Do nothing</strong></p>
   <h3 class="heading settled" data-level="7.4" id="fast-math①"><span class="secno">7.4. </span><span class="content">Fast-math</span><a class="self-link" href="#fast-math①"></a></h3>
   <p>The trouble with representing fast-math semantics is that it is an inherently
open class of flags (and compilers will include more than whatever the standard
requires) which can be independently toggled. The only language features we have
that easily accomodate such capabilities are pragmas or block attributes.
However, these approaches do not work well with generic code, as discussed in a
previous section.</p>
   <p>Some fast-math flags are describable as changing the set of allowable values for
a type. For example, the effect of <code class="highlight"><c- o>-</c-><c- n>ffinite</c-><c- o>-</c-><c- n>math</c-><c- o>-</c-><c- n>only</c-></code> is to make NaN and
infinity values into trap representations of floating-point numbers. Since they
have value effect, they actually map quite nicely to being described with a
type wrapper like <code class="highlight"><c- n>finite</c-><c- o>&lt;</c-><c- n>T</c-><c- o>></c-></code>, where any operation that would result in a NaN or
infinity value (such as <code class="highlight"><c- n>sqrt</c-><c- p>(</c-><c- mi>-1</c-><c- p>)</c-></code>) would instead cause undefined behavior (and
to have the desired effect on the optimizer, it is necessary that it cause
undefined behavior and have unpredictable results, as opposed to relying on
erroneous behavior or other more constrained forms of behavior).</p>
   <p>Type wrappers work poorly if there are many flags to be applied. Fortunately,
there is not a large number of value-based fast math properties: there are four
main classes of special values (<code class="highlight"><c- mi>-0</c-></code>, infinities, quiet NaNs, and signaling
NaNs), and even then, many of those combinations do not have great practical
value (it is not advantageous to support signaling NaNs but not quiet NaNs, for
example). Despite this low number, adding more than one type wrapper, or maybe
two if they are not orthogonal, seems inadvisable.</p>
   <p>The non-value-based fast-math flags, such as allowing reassociation, do not seem
particularly amenable to type wrappers, as their effect is largely in relation
to combinations of operations and do not have any clear value-based semantics.
In addition, since their effect is to enable certain rewrites of the code, for
the most part it is more beneficial that these take effect rather globally, as
allowing it to happen for a specific, narrow region of code could instead be
effected by just rewriting the code to the desired form. Instead, most uses are
more likely to be disable these optimizations for particular sensitive regions
rather than to enable them. However, there are two main exceptions, which are
covered in their own, subsequent sections.</p>
   <p>Recommendations: <strong>Let fast-math flags be conforming compiler extensions,
enabled or disabled by command-line flags or existing pragmas not described by
the standard. Pursue an approach to make pragmas work better with generic code.
Consider adopting a <code class="highlight"><c- n>finite</c-><c- o>&lt;</c-><c- n>T</c-><c- o>></c-></code>-like class that makes infinities and NaNs
undefined behaviors for instances of that class.</strong></p>
   <h3 class="heading settled" data-level="7.5" id="associativity"><span class="secno">7.5. </span><span class="content">Associativity</span><a class="self-link" href="#associativity"></a></h3>
   <p>The main use case for allowing free reassociation of variables is a loop, or
other reducing context, that is accumulating one or more results over multiple
iterations which could be replaced with some form of parallelized loop body,
which necessarily executes the reducing steps in a different order than the
regular iteration order implied by a serial execution.</p>
   <p>Already in C++, <a href="https://eel.is/c++draft/numerics.defns">GENERALIZED_SUM</a> has
sufficient specification to imply reassociation. Where a loop does but one
reduction, it can be to use <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>accumulate</c-></code> or <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>reduce</c-></code> which by using
GENERALIZED_SUM already imply reassociation. Similarly, the <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>execution</c-><c- o>::</c-><c- n>unseq</c-></code> execution policy also implies the ability to write a
more generic loop that the compiler may vectorize regardless of other legality
restrictions. So C++ already has something akin to a free function that will do
a reassociable reduction.</p>
   <p>Recommendation: <strong>Add no new facilities</strong></p>
   <h3 class="heading settled" data-level="7.6" id="fma-contraction"><span class="secno">7.6. </span><span class="content">FMA contraction</span><a class="self-link" href="#fma-contraction"></a></h3>
   <p>Being able to contract expressions into FMAs is arguably the most useful of
fast-math flags, since if hardware was FMA instructions, they are almost always
better to use than regular instructions. There are, however, cases where FMA
contraction is undesirable, so users need to have the ability to opt out of
FMA contraction at times.</p>
   <p>From a semantic perspective, the best approach to FMA contraction is to provide
a distinct free function that is an FMA operation if the hardware can do it
quickly or else a pair of multiply/add instructions. For example, the Julia
language provides such an operation. The advantage of such an approach is that
it is always clear what the user intends. The main disadvantage, though, is that
users would have to opt into the new code form, and it also requires more user
overloads to make a "floating-point-like" type. Additionally, assuming such a
facility is limited to floating-point types, it makes it harder to write basic
numerical utilities that are agnostic over their underlying types being integers
or floating-point types (or other kinds of algebraic categories).</p>
   <p>The primary way this feature is effected today within compilers is via pragmas
and command-line options. Like all pragma-based approaches, this suffers from
the current inability to write a generic function that can inherit the pragma
state of its caller. While IEEE 754 requires for its rules on contraction that
it can happen only within an expression, most compiler implementations ignore
this rule and instead freely contract any multiply and add that may happen to
wander near each other after optimizations kick in, even should they cross
function boundaries. Between this and the multiple phases of optimization, the
compiler will decide to contract or fail to contract an operation differently
even for the same operation in different contexts, and there is a steady trickle
of user complaints about this difference that are simply not fixable with this
design.</p>
   <p>The final major set of alternatives is to make contractability a part of the
floating-point type. But contractability is fairly orthogonal to all of the
other concerns of floating-point semantics, and as a result, the problem of
composing multiple type properties is particularly salient. Furthermore, the
feature is a property largely of the operations (and in particular, really just
addition and multiplication) and not of the types, which makes expressing it via
types a somewhat circuitous way to achieve the goals.</p>
   <p>None of these options can be advocated for as particularly good solutions to the
problem. Instead, a choice must be made as to which one is the least bad
solution.</p>
   <p>Recommendation: <strong>Pursue a free function for fast FMA to enable FMA
contraction</strong>.</p>
   <h3 class="heading settled" data-level="7.7" id="constant-expression-rules"><span class="secno">7.7. </span><span class="content">Constant expression rules</span><a class="self-link" href="#constant-expression-rules"></a></h3>
   <p><a href="https://eel.is/c++draft/expr.const#23">expr.const/p23</a> lists as recommended
practice that floating-point results be consistent between compile-time and
runtime execution.
Additionally, <a href="https://eel.is/c++draft/library.c#3">library.c/3</a> requires that math functions conform to the requirements of C’s Annex F as far
as possible for the purposes of constant expressions. These are the only
guidelines for floating-point constant expressions in C++ at present.</p>
   <p>The generalized constant expression support in C++ allows us to theoretically
support accessing and modifying the environment in constant-evaluated contexts.
However, given that it is not really possible to synchronize the compile-time
and runtime environments, and given that admitting advanced environment features
are more difficult for the compiler to emulate correctly at compile-time, it
seems most prudent to simply not allow general access to the environment at
compile-time. Rounding mode could be supported, but a static rounding mode
support as envisioned in P2746 is a superior interface, and there is no need for
a general environment access for that feature. Instead, the environment should
be fixed to its default for compile-time access.</p>
   <p>There is a related question about the behavior of floating-point expressions in
the presence of exceptions. C++ already requires that a call to C standard
library function that raises a floating-point exception is a non-constant
library call; it is possible to extend this rule to apply to all floating-point
expressions, even basic ones like addition or multiplication. Some compilers
already do this when the result of an operation is a NaN, but this does not
appear to happen in the case of overflows or underflows.</p>
   <p>Deviations between compile-time and runtime execution can happen for a few
reasons. Environment access might be different. The subtle differences in rules
around excess precision and denormal flushing can also produce a difference.
Finally, the frontend may fail to account for the rewritten code caused by
generic fast-math optimizations, especially ones like FMA contraction, as the
frontend is not capable of perfectly predicting what the optimizer will do. As
a result, it is not really possible to mandate that the compile-time and runtime
execution follow the same rules, and likely implementations would simply ignore
such a mandate even if it were to exist, due to the intrinsic difficulties in
doing so.</p>
   <p>Recommendations: <strong>Continue to not require equivalent semantics for
compile-time and runtime execution of floating-point. Do not make any
floating-point environment manipulation or introspection functions <code class="highlight"><c- k>constexpr</c-></code>.
Explore making floating-point exceptions in regular operations non-<code class="highlight"><c- k>constexpr</c-></code>.</strong></p>
   <h3 class="heading settled" data-level="7.8" id="type-traits①"><span class="secno">7.8. </span><span class="content">Type traits</span><a class="self-link" href="#type-traits①"></a></h3>
   <p>C++ has a set of type traits centered around <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>numeric_limits</c-></code> that indicate
the properties of floating-point hardware. All of the members of <code class="highlight"><c- n>std</c-><c- o>::</c-><c- n>numeric_limits</c-></code> are <code class="highlight"><c- k>constexpr</c-></code>, which mean the compiler has to fix a
choice for the value for the entire execution, and the traits are not capable of
reflecting the dynamic environment if the hardware is capable of modifying
behavior dynamically (e.g., flushing denormals).</p>
   <p>With the existing wording, it is not clear what the value of (e.g.) <code class="highlight"><c- n>has_infinity</c-></code> should be if compiled with fast-math flags that make infinities
equivalent to undefined behavior: the format supports it, but the execution
environment does not. Given the ability in most implementations to vary the
behavior of fast-math-like flags on a finer grained unit than the entire
translation unit, the effects of these flags should be similar to the effect of
the dynamic floating-point environment on the flags, which is to say none. This
is already how implementations interpret these flags, so it should be made
clearer in the specification that this is intended behavior.</p>
   <p>Recommendation: <strong>Clarify in wording that type traits do not reflect
fast-math flags.</strong></p>
   <h3 class="heading settled" data-level="7.9" id="rounding-mode"><span class="secno">7.9. </span><span class="content">Rounding mode</span><a class="self-link" href="#rounding-mode"></a></h3>
   <p>Currently, C++ borrows the interface for rounding modes from C, but doesn’t
adopt the <code class="highlight"><c- n>FENV_ROUND</c-></code> pragma that was recently added in C23. The issues with
the dynamic rounding mode functions are fairly well laid out in the existing
series of papers on P2746, which proposes to deprecate the existing
functionality and replace it with what are effectively free functions for
doing operations in a fixed static rounding mode (including <code class="highlight"><c- k>constexpr</c-></code>)
support.</p>
   <p>As P2746 has already made substantial progress within WG21, there is no reason
to disturb that progress, and it is only mentioned here for the sake of
completeness on the topic.</p>
   <p>Recommendation: <strong>Continue work on P2746</strong></p>
   <h3 class="heading settled" data-level="7.10" id="reproducible-results"><span class="secno">7.10. </span><span class="content">Reproducible results</span><a class="self-link" href="#reproducible-results"></a></h3>
   <p>P3375 proposes that C++ add some feature to indicate reproducible results for
the compiler. It is still at an early stage of discussion, and does not have a
specific design for the feature, but strongly leans toward a type wrapper or
new fundamental type approach.</p>
   <p>For the narrow use case of ensuring that the numerical results are identical
across diverse compilation environments, attaching this information via a
type wrapper or fundamental type works well. The big problem with such
approaches is that type properties do not compose well, but a type annotation of
"disregard all other instructions to loosen semantics" implies that there is no
composition at all--applying fast-math flags to such a type defeats the purpose
of the type in the first place. Furthermore, type-based properties work the best
at ensuring that they will be picked up in generic code, which is especially
important for this use case.</p>
   <p>Recommendation: <strong>Pursue a type-based approach that enforces a precise
floating-point model for some operations, without the ability to mix with other
fast-math flags.</strong></p>
   <h3 class="heading settled" data-level="7.11" id="environment-access"><span class="secno">7.11. </span><span class="content">Environment access</span><a class="self-link" href="#environment-access"></a></h3>
   <p>C++ relies on the C library functions to manage access to the environment, and
other than a comment not requiring support for C’s <code class="highlight"><c- n>FENV_ACCESS</c-></code> pragma
(necessary for these functions to have effect in C), is silent on any details.
In practice, code needing environment access tends to rely on the use of
compiler flags to put the compiler in a strict floating-point model.</p>
   <p>But even without these flags, it is often possible to get the compiler to mostly
reliably support environment access by following two rules. First, ensure that
the floating-point code in question is actually executed at runtime rather than
being potentially executed at compile-time (which largely means preventing the
compile-time optimization of constant-folding from kicking in). Second, ensure
that the compiler does not have the freedom (or at least the desire) to move the
floating-point code around the calls to the environment function. After all, in
the absence of fast-math flags, there tends to be rather little in the way of
optimizations that can be applied to floating-point code outside of the usual
universal optimizations of constant folding, dead code elimination, and some
forms of code motion.</p>
   <p>Given these constraints, for the purposes of checking the exceptions of a
floating-point operation, the approach that is most worth pursuing is probably
something in the form of a library function that wraps a lambda and returns
the floating-point exceptions:</p>
<pre class="language-c++ highlight"><c- b>float</c-> <c- nf>scale</c-><c- p>(</c-><c- b>float</c-> <c- n>value</c-><c- p>,</c-> <c- b>float</c-> <c- n>pointScaleFactor</c-><c- p>)</c-> <c- p>{</c->
  <c- b>float</c-> <c- n>result</c-><c- p>;</c->
  <c- c1>// Returns the exceptions, if any, raised by any floating-point operation</c->
  <c- c1>// invoked by the lambda.</c->
  <c- k>auto</c-> <c- n>exception</c-> <c- o>=</c-> <c- n>std</c-><c- o>::</c-><c- n>check_fp_except</c-><c- p>([</c-><c- o>&amp;</c-><c- p>]()</c-> <c- p>{</c->
    <c- n>result</c-> <c- o>=</c-> <c- n>value</c-> <c- o>*</c-> <c- n>pointScaleFactor</c-><c- p>;</c->
  <c- p>});</c->
  <c- k>if</c-> <c- p>(</c-><c- n>exception</c-> <c- o>&amp;</c-> <c- p>(</c-><c- n>FE_OVERFLOW</c-> <c- o>|</c-> <c- n>FE_UNDERFLOW</c-><c- p>))</c-> <c- p>{</c->
    <c- c1>// report error...</c->
  <c- p>}</c->
  <c- k>return</c-> <c- n>result</c-><c- p>;</c->
<c- p>}</c->
</pre>
   <p>By wrapping the code in what the optimizer will see as a separate function call,
there is a natural optimization barrier between the code being checked for
exceptions and the code where the exceptions don’t matter. The function call can
also do the task of clearing any prior exceptions that may have been caused
(which is usually necessary anyways). Ideally, the lambda would be specifically
compiled in a strict floating-point mode, and this effect would trickle down to
all of the functions recursively called by the lambda body, but even if the
compiler fails to do this, the natural optimization barrier of the function call
will often be sufficient to keep the code working in many production codebases.</p>
   <p>An alternative to the library function would be to more directly use a <code class="highlight"><c- k>try</c-></code>/<code class="highlight"><c- k>catch</c-></code> syntax for floating-point exceptions. Mapping that to C++'s
existing keywords would create confusion with the existing exception-handling
mechanism, as floating-point exceptions work very differently from C++'s
exceptions. TS 18661-5 adds a similar <code class="highlight"><c- k>try</c-></code>/<code class="highlight"><c- k>catch</c-></code>-like mechanism for doing
alternation floating-point exception handling, but expressed via a cumbersome
pragma syntax that has no known implementations and received little support from
the broader WG14 committee when portions of the floating-point TSes were
integrated in C23. C++ having lambdas enables the library function to replace
language keywords or pragmas to enable this feature, although it does not
provide a complete solution for enforcing the floating-point model within the
lambda.</p>
   <p>Using a type-based approach to enable or disable the ability of floating-point
code to interact with the environment is an extremely poor match. The
environment is inherently a shared (thread-local) data structure, and hardware
instructions that touch the environment are not particularly fast. Repeatedly
turning on and off for individual operations is not a good idea, outside of
rounding mode where static rounding mode instructions exist on many hardware
classes (and static rounding mode being integrated into major languages like
C++ would push more hardware vendors to include support for it for performance
reasons).</p>
   <p>Outside of the accessing the currently raised floating-point exceptions, the
other major components of the floating-point environment are the current dynamic
rounding mode, the denormal flushing mode(s), bits to enable traps on
floating-point exceptions, and other bits generally unaccounted for in
compilers' model of floating-point. Rounding mode is already discussed earlier,
with a preference for relying on static rounding mode rather than a dynamic
rounding mode. The trap bits can be viewed as a way to make the overhead of
testing for floating-point exceptions cheaper on hardware that supports it, and
probably suffice to be used via existing mechanisms. Similarly, the rest of the
floating-point environment tends to be poorly modeled by compilers, and to use
them effectively, a user already needs to force the compiler into a very strict
floating-point model globally. Given that there is little commonality across
architectures on these extra bits, code that truly cares about them already
needs to rely on implementation-specific features like inline assembly to access
them, and the benefit of the language standardizing means of access to them is
little.</p>
   <p>Recommendation: <strong>Pursue a library function that test for floating-point
exceptions occurring within the execution of a lambda argument.</strong></p>
   <h2 class="heading settled" data-level="8" id="questions"><span class="secno">8. </span><span class="content">Questions</span><a class="self-link" href="#questions"></a></h2>
   <ul>
    <li data-md>
     <p>Is WG21 interested in pursuing something like C’s Annex F for C++?</p>
    <li data-md>
     <p>Is WG21 in favor of removing support for excess precision in C++?</p>
    <li data-md>
     <p>Is WG21 in favor of adding support for methods to dynamically control
    denormal flushing?</p>
    <li data-md>
     <p>Is WG21 interested in ways to improve pragma compatibility with generic code?</p>
    <li data-md>
     <p>Is WG21 interested in a feature like <code class="highlight"><c- n>finite</c-><c- o>&lt;</c-><c- b>double</c-><c- o>></c-></code> for representing
    floating-point types that have UB for non-finite values?</p>
    <li data-md>
     <p>Is WG21 interested in favor of a <code class="highlight"><c- n>fma_fast</c-></code>-like function for enabling FMA
    contraction?</p>
    <li data-md>
     <p>Is WG21 interested in enabling floating-point environment support in <code class="highlight"><c- k>constexpr</c-></code> contexts?</p>
    <li data-md>
     <p>Is WG21 interested in making floating-point exceptions (other than <code class="highlight"><c- n>FE_INEXACT</c-></code>) fail to be evaluated at compile time?</p>
    <li data-md>
     <p>Is WG21 interested in adding a function for testing for floating-point
    exceptions that occur within the evaluation of a lambda argument?</p>
   </ul>
  </main>
<script>
(function() {
  "use strict";
  var collapseSidebarText = '<span aria-hidden="true">←</span> '
                          + '<span>Collapse Sidebar</span>';
  var expandSidebarText   = '<span aria-hidden="true">→</span> '
                          + '<span>Pop Out Sidebar</span>';
  var tocJumpText         = '<span aria-hidden="true">↑</span> '
                          + '<span>Jump to Table of Contents</span>';

  var sidebarMedia = window.matchMedia('screen and (min-width: 78em)');
  var autoToggle   = function(e){ toggleSidebar(e.matches) };
  if(sidebarMedia.addListener) {
    sidebarMedia.addListener(autoToggle);
  }

  function toggleSidebar(on) {
    if (on == undefined) {
      on = !document.body.classList.contains('toc-sidebar');
    }

    /* Don’t scroll to compensate for the ToC if we’re above it already. */
    var headY = 0;
    var head = document.querySelector('.head');
    if (head) {
      // terrible approx of "top of ToC"
      headY += head.offsetTop + head.offsetHeight;
    }
    var skipScroll = window.scrollY < headY;

    var toggle = document.getElementById('toc-toggle');
    var tocNav = document.getElementById('toc');
    if (on) {
      var tocHeight = tocNav.offsetHeight;
      document.body.classList.add('toc-sidebar');
      document.body.classList.remove('toc-inline');
      toggle.innerHTML = collapseSidebarText;
      if (!skipScroll) {
        window.scrollBy(0, 0 - tocHeight);
      }
      tocNav.focus();
      sidebarMedia.addListener(autoToggle); // auto-collapse when out of room
    }
    else {
      document.body.classList.add('toc-inline');
      document.body.classList.remove('toc-sidebar');
      toggle.innerHTML = expandSidebarText;
      if (!skipScroll) {
        window.scrollBy(0, tocNav.offsetHeight);
      }
      if (toggle.matches(':hover')) {
        /* Unfocus button when not using keyboard navigation,
           because I don’t know where else to send the focus. */
        toggle.blur();
      }
    }
  }

  function createSidebarToggle() {
    /* Create the sidebar toggle in JS; it shouldn’t exist when JS is off. */
    var toggle = document.createElement('a');
      /* This should probably be a button, but appearance isn’t standards-track.*/
    toggle.id = 'toc-toggle';
    toggle.class = 'toc-toggle';
    toggle.href = '#toc';
    toggle.innerHTML = collapseSidebarText;

    sidebarMedia.addListener(autoToggle);
    var toggler = function(e) {
      e.preventDefault();
      sidebarMedia.removeListener(autoToggle); // persist explicit off states
      toggleSidebar();
      return false;
    }
    toggle.addEventListener('click', toggler, false);


    /* Get <nav id=toc-nav>, or make it if we don’t have one. */
    var tocNav = document.getElementById('toc-nav');
    if (!tocNav) {
      tocNav = document.createElement('p');
      tocNav.id = 'toc-nav';
      /* Prepend for better keyboard navigation */
      document.body.insertBefore(tocNav, document.body.firstChild);
    }
    /* While we’re at it, make sure we have a Jump to Toc link. */
    var tocJump = document.getElementById('toc-jump');
    if (!tocJump) {
      tocJump = document.createElement('a');
      tocJump.id = 'toc-jump';
      tocJump.href = '#toc';
      tocJump.innerHTML = tocJumpText;
      tocNav.appendChild(tocJump);
    }

    tocNav.appendChild(toggle);
  }

  var toc = document.getElementById('toc');
  if (toc) {
    createSidebarToggle();
    toggleSidebar(sidebarMedia.matches);

    /* If the sidebar has been manually opened and is currently overlaying the text
       (window too small for the MQ to add the margin to body),
       then auto-close the sidebar once you click on something in there. */
    toc.addEventListener('click', function(e) {
      if(e.target.tagName.toLowerCase() == "a" && document.body.classList.contains('toc-sidebar') && !sidebarMedia.matches) {
        toggleSidebar(false);
      }
    }, false);
  }
  else {
    console.warn("Can’t find Table of Contents. Please use <nav id='toc'> around the ToC.");
  }

  /* Wrap tables in case they overflow */
  var tables = document.querySelectorAll(':not(.overlarge) > table.data, :not(.overlarge) > table.index');
  var numTables = tables.length;
  for (var i = 0; i < numTables; i++) {
    var table = tables[i];
    var wrapper = document.createElement('div');
    wrapper.className = 'overlarge';
    table.parentNode.insertBefore(wrapper, table);
    wrapper.appendChild(table);
  }

})();
</script>