<!DOCTYPE html>

<html lang="en">

<head>
    <meta charset="utf-8">
    <meta http-equiv="X-UA-Compatible" content="IE=edge">
    <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=no">
    <meta name="apple-mobile-web-app-capable" content="yes">
    <meta name="apple-mobile-web-app-status-bar-style" content="black">
    <meta name="mobile-web-app-capable" content="yes">
    <title>
        Atomic Reduction Operations
    </title>

    <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.3.7/css/bootstrap.min.css" integrity="sha256-916EbMg70RQy9LHiGkXzG8hSg9EdNy97GazNG/aiY1w=" crossorigin="anonymous" />
    <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.7.0/css/font-awesome.min.css" integrity="sha256-eZrrJcwDc/3uDhsdt61sL2oOBY362qM3lon1gyExkL0=" crossorigin="anonymous" />
    <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/ionicons/2.0.1/css/ionicons.min.css" integrity="sha256-3iu9jgsy9TpTwXKb7bNQzqWekRX7pPK+2OLj3R922fo=" crossorigin="anonymous" />
    <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/octicons/3.5.0/octicons.min.css" integrity="sha256-QiWfLIsCT02Sdwkogf6YMiQlj4NE84MKkzEMkZnMGdg=" crossorigin="anonymous" />
    <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/prism/1.5.1/themes/prism.min.css" integrity="sha256-vtR0hSWRc3Tb26iuN2oZHt3KRUomwTufNIf5/4oeCyg=" crossorigin="anonymous" />
    <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@hackmd/emojify.js@2.1.0/dist/css/basic/emojify.min.css" integrity="sha256-UOrvMOsSDSrW6szVLe8ZDZezBxh5IoIfgTwdNDgTjiU=" crossorigin="anonymous" />
    <style>
        @import url(https://fonts.googleapis.com/css?family=Roboto:300,300i,400,400i,500,500i|Source+Code+Pro:300,400,500|Source+Sans+Pro:300,300i,400,400i,600,600i|Source+Serif+Pro&subset=latin-ext);.hljs{background:#fff;color:#333;display:block;overflow-x:auto;padding:.5em}.hljs-comment,.hljs-meta{color:#969896}.hljs-emphasis,.hljs-quote,.hljs-string,.hljs-strong,.hljs-template-variable,.hljs-variable{color:#df5000}.hljs-keyword,.hljs-selector-tag,.hljs-type{color:#a71d5d}.hljs-attribute,.hljs-bullet,.hljs-literal,.hljs-number,.hljs-symbol{color:#0086b3}.hljs-built_in,.hljs-builtin-name{color:#005cc5}.hljs-name,.hljs-section{color:#63a35c}.hljs-tag{color:#333}.hljs-attr,.hljs-selector-attr,.hljs-selector-class,.hljs-selector-id,.hljs-selector-pseudo,.hljs-title{color:#795da3}.hljs-addition{background-color:#eaffea;color:#55a532}.hljs-deletion{background-color:#ffecec;color:#bd2c00}.hljs-link{text-decoration:underline}.markdown-body{word-wrap:break-word;font-size:16px;line-height:1.5}.markdown-body:after,.markdown-body:before{content:"";display:table}.markdown-body:after{clear:both}.markdown-body>:first-child{margin-top:0!important}.markdown-body>:last-child{margin-bottom:0!important}.markdown-body a:not([href]){color:inherit;text-decoration:none}.markdown-body .absent{color:#c00}.markdown-body .anchor{float:left;line-height:1;margin-left:-20px;padding-right:4px}.markdown-body .anchor:focus{outline:none}.markdown-body blockquote,.markdown-body dl,.markdown-body ol,.markdown-body p,.markdown-body pre,.markdown-body table,.markdown-body ul{margin-bottom:16px;margin-top:0}.markdown-body hr{background-color:#e7e7e7;border:0;height:.25em;margin:24px 0;padding:0}.markdown-body blockquote{border-left:.25em solid #ddd;color:#777;font-size:16px;padding:0 1em}.markdown-body blockquote>:first-child{margin-top:0}.markdown-body blockquote>:last-child{margin-bottom:0}.markdown-body kbd,.popover kbd{background-color:#fcfcfc;border:1px solid;border-color:#ccc #ccc #bbb;border-radius:3px;box-shadow:inset 0 -1px 0 #bbb;color:#555;display:inline-block;font-size:11px;line-height:10px;padding:3px 5px;vertical-align:middle}.markdown-body .loweralpha{list-style-type:lower-alpha}.markdown-body h1,.markdown-body h2,.markdown-body h3,.markdown-body h4,.markdown-body h5,.markdown-body h6{font-weight:600;line-height:1.25;margin-bottom:16px;margin-top:24px}.markdown-body h1 .octicon-link,.markdown-body h2 .octicon-link,.markdown-body h3 .octicon-link,.markdown-body h4 .octicon-link,.markdown-body h5 .octicon-link,.markdown-body h6 .octicon-link{color:#000;vertical-align:middle;visibility:hidden}.markdown-body h1:hover .anchor,.markdown-body h2:hover .anchor,.markdown-body h3:hover .anchor,.markdown-body h4:hover .anchor,.markdown-body h5:hover .anchor,.markdown-body h6:hover .anchor{text-decoration:none}.markdown-body h1:hover .anchor .octicon-link,.markdown-body h2:hover .anchor .octicon-link,.markdown-body h3:hover .anchor .octicon-link,.markdown-body h4:hover .anchor .octicon-link,.markdown-body h5:hover .anchor .octicon-link,.markdown-body h6:hover .anchor .octicon-link{visibility:visible}.markdown-body h1 code,.markdown-body h1 tt,.markdown-body h2 code,.markdown-body h2 tt,.markdown-body h3 code,.markdown-body h3 tt,.markdown-body h4 code,.markdown-body h4 tt,.markdown-body h5 code,.markdown-body h5 tt,.markdown-body h6 code,.markdown-body h6 tt{font-size:inherit}.markdown-body h1{font-size:2em}.markdown-body h1,.markdown-body h2{border-bottom:1px solid #eee;padding-bottom:.3em}.markdown-body h2{font-size:1.5em}.markdown-body h3{font-size:1.25em}.markdown-body h4{font-size:1em}.markdown-body h5{font-size:.875em}.markdown-body h6{color:#777;font-size:.85em}.markdown-body ol,.markdown-body ul{padding-left:2em}.markdown-body ol.no-list,.markdown-body ul.no-list{list-style-type:none;padding:0}.markdown-body ol ol,.markdown-body ol ul,.markdown-body ul ol,.markdown-body ul ul{margin-bottom:0;margin-top:0}.markdown-body li>p{margin-top:16px}.markdown-body li+li{padding-top:.25em}.markdown-body dl{padding:0}.markdown-body dl dt{font-size:1em;font-style:italic;font-weight:700;margin-top:16px;padding:0}.markdown-body dl dd{margin-bottom:16px;padding:0 16px}.markdown-body table{display:block;overflow:auto;width:100%;word-break:normal;word-break:keep-all}.markdown-body table th{font-weight:700}.markdown-body table td,.markdown-body table th{border:1px solid #ddd;padding:6px 13px}.markdown-body table tr{background-color:#fff;border-top:1px solid #ccc}.markdown-body table tr:nth-child(2n){background-color:#f8f8f8}.markdown-body img{background-color:#fff;box-sizing:initial;max-width:100%}.markdown-body img[align=right]{padding-left:20px}.markdown-body img[align=left]{padding-right:20px}.markdown-body .emoji{background-color:initial;max-width:none;vertical-align:text-top}.markdown-body span.frame{display:block;overflow:hidden}.markdown-body span.frame>span{border:1px solid #ddd;display:block;float:left;margin:13px 0 0;overflow:hidden;padding:7px;width:auto}.markdown-body span.frame span img{display:block;float:left}.markdown-body span.frame span span{clear:both;color:#333;display:block;padding:5px 0 0}.markdown-body span.align-center{clear:both;display:block;overflow:hidden}.markdown-body span.align-center>span{display:block;margin:13px auto 0;overflow:hidden;text-align:center}.markdown-body span.align-center span img{margin:0 auto;text-align:center}.markdown-body span.align-right{clear:both;display:block;overflow:hidden}.markdown-body span.align-right>span{display:block;margin:13px 0 0;overflow:hidden;text-align:right}.markdown-body span.align-right span img{margin:0;text-align:right}.markdown-body span.float-left{display:block;float:left;margin-right:13px;overflow:hidden}.markdown-body span.float-left span{margin:13px 0 0}.markdown-body span.float-right{display:block;float:right;margin-left:13px;overflow:hidden}.markdown-body span.float-right>span{display:block;margin:13px auto 0;overflow:hidden;text-align:right}.markdown-body code,.markdown-body tt{background-color:#0000000a;border-radius:3px;font-size:85%;margin:0;padding:.2em 0}.markdown-body code:after,.markdown-body code:before,.markdown-body tt:after,.markdown-body tt:before{content:"\00a0";letter-spacing:-.2em}.markdown-body code br,.markdown-body tt br{display:none}.markdown-body del code{text-decoration:inherit}.markdown-body pre{word-wrap:normal}.markdown-body pre>code{background:#0000;border:0;font-size:100%;margin:0;padding:0;white-space:pre;word-break:normal}.markdown-body .highlight{margin-bottom:16px}.markdown-body .highlight pre{margin-bottom:0;word-break:normal}.markdown-body .highlight pre,.markdown-body pre{border-radius:3px;font-size:85%;line-height:1.45;overflow:auto}.markdown-body:not(.next-editor) pre{background-color:#f7f7f7;padding:16px}.markdown-body pre code,.markdown-body pre tt{word-wrap:normal;background-color:initial;border:0;display:inline;line-height:inherit;margin:0;max-width:auto;overflow:visible;padding:0}.markdown-body pre code:after,.markdown-body pre code:before,.markdown-body pre tt:after,.markdown-body pre tt:before{content:normal}.markdown-body .csv-data td,.markdown-body .csv-data th{font-size:12px;line-height:1;overflow:hidden;padding:5px;text-align:left;white-space:nowrap}.markdown-body .csv-data .blob-line-num{background:#fff;border:0;padding:10px 8px 9px;text-align:right}.markdown-body .csv-data tr{border-top:0}.markdown-body .csv-data th{background:#f8f8f8;border-top:0;font-weight:700}.news .alert .markdown-body blockquote{border:0;padding:0 0 0 40px}.activity-tab .news .alert .commits,.activity-tab .news .markdown-body blockquote{padding-left:0}.task-list-item{list-style-type:none}.task-list-item label{font-weight:400}.task-list-item.enabled label{cursor:pointer}.task-list-item+.task-list-item{margin-top:3px}.task-list-item-checkbox{cursor:default!important;float:left;margin:.31em 0 .2em -1.3em!important;vertical-align:middle}.markdown-alert{border-left-style:solid;border-left-width:4px;color:inherit;margin-bottom:16px;padding:8px 16px}.markdown-alert .markdown-alert-title{align-items:center;display:flex;font-weight:500;line-height:1;white-space:break-spaces}.markdown-body .markdown-alert>*{margin-bottom:0;margin-top:16px}.markdown-body .markdown-alert .selection-popover,.markdown-body .markdown-alert>:first-child{margin-top:0}.markdown-alert.markdown-alert-note{border-left-color:#0969da}.markdown-alert.markdown-alert-note .markdown-alert-title{fill:currentColor;color:#0969da}.markdown-alert.markdown-alert-tip{border-left-color:#1a7f37}.markdown-alert.markdown-alert-tip .markdown-alert-title{fill:currentColor;color:#1a7f37}.markdown-alert.markdown-alert-important{border-left-color:#8250df}.markdown-alert.markdown-alert-important .markdown-alert-title{fill:currentColor;color:#8250df}.markdown-alert.markdown-alert-warning{border-left-color:#9a6700}.markdown-alert.markdown-alert-warning .markdown-alert-title{fill:currentColor;color:#9a6700}.markdown-alert.markdown-alert-caution{border-left-color:#d1242f}.markdown-alert.markdown-alert-caution .markdown-alert-title{fill:currentColor;color:#d1242f}.markdown-body{max-width:758px;overflow:visible!important;padding-bottom:40px;padding-top:40px;position:relative}.markdown-body.next-editor{overflow-x:hidden!important}.markdown-body .emoji{vertical-align:top}.markdown-body pre{border:inherit!important}.markdown-body code{color:inherit!important}.markdown-body pre code .wrapper{display:-moz-inline-flex;display:-ms-inline-flex;display:-o-inline-flex;display:inline-flex}.markdown-body pre code .gutter{float:left;overflow:hidden;-webkit-user-select:none;user-select:none}.markdown-body pre code .gutter.linenumber{border-right:3px solid #6ce26c!important;box-sizing:initial;color:#afafaf!important;cursor:default;display:inline-block;min-width:20px;padding:0 8px 0 0;position:relative;text-align:right;z-index:4}.markdown-body pre code .gutter.linenumber>span:before{content:attr(data-linenumber)}.markdown-body pre code .code{float:left;margin:0 0 0 16px}.markdown-body .gist .line-numbers{border-bottom:none;border-left:none;border-top:none}.markdown-body .gist .line-data{border:none}.markdown-body .gist table{border-collapse:inherit!important;border-spacing:0}.markdown-body code[data-gist-id]{background:none;padding:0}.markdown-body code[data-gist-id]:after,.markdown-body code[data-gist-id]:before{content:""}.markdown-body code[data-gist-id] .blob-num{border:unset}.markdown-body code[data-gist-id] table{margin-bottom:unset;overflow:unset}.markdown-body code[data-gist-id] table tr{background:unset}.markdown-body[dir=rtl] pre{direction:ltr}.markdown-body[dir=rtl] code{direction:ltr;unicode-bidi:embed}.markdown-body .alert{display:flex;flex-direction:column;gap:16px}.markdown-body .alert>*{margin:0}.markdown-body pre.abc,.markdown-body pre.flow-chart,.markdown-body pre.graphviz,.markdown-body pre.mermaid,.markdown-body pre.sequence-diagram,.markdown-body pre.vega{background-color:inherit;border-radius:0;overflow:visible;text-align:center;white-space:inherit}.markdown-body pre.abc>code,.markdown-body pre.flow-chart>code,.markdown-body pre.graphviz>code,.markdown-body pre.mermaid>code,.markdown-body pre.sequence-diagram>code,.markdown-body pre.vega>code{text-align:left}.markdown-body pre.abc>svg,.markdown-body pre.flow-chart>svg,.markdown-body pre.graphviz>svg,.markdown-body pre.mermaid>svg,.markdown-body pre.sequence-diagram>svg,.markdown-body pre.vega>svg{height:100%;max-width:100%}.markdown-body pre>code.wrap{word-wrap:break-word;white-space:pre-wrap;white-space:-moz-pre-wrap;white-space:-pre-wrap;white-space:-o-pre-wrap}.markdown-body pre.pseudocode{white-space-collapse:collapse}.markdown-body summary{display:list-item}.markdown-body summary:focus{outline:none}.markdown-body details summary{cursor:pointer}.markdown-body details:not([open])>:not(summary){display:none}.markdown-body figure{margin:1em 40px}.markdown-body .mark,.markdown-body mark{background-color:#fff1a7}.vimeo,.youtube{background-color:#000;background-position:50%;background-repeat:no-repeat;background-size:contain;cursor:pointer;display:table;overflow:hidden;text-align:center}.vimeo,.youtube{position:relative;width:100%}.youtube{padding-bottom:56.25%}.vimeo img{object-fit:contain;width:100%;z-index:0}.youtube img{object-fit:cover;z-index:0}.vimeo iframe,.youtube iframe,.youtube img{height:100%;left:0;position:absolute;top:0;width:100%}.vimeo iframe,.youtube iframe{vertical-align:middle;z-index:1}.vimeo .icon,.youtube .icon{color:#fff;height:auto;left:50%;opacity:.3;position:absolute;top:50%;transform:translate(-50%,-50%);transition:opacity .2s;width:auto;z-index:0}.vimeo:hover .icon,.youtube:hover .icon{opacity:.6;transition:opacity .2s}.slideshare .inner,.speakerdeck .inner{position:relative;width:100%}.slideshare .inner iframe,.speakerdeck .inner iframe{bottom:0;height:100%;left:0;position:absolute;right:0;top:0;width:100%}.figma{display:table;padding-bottom:56.25%;position:relative;width:100%}.figma iframe{border:1px solid #eee;bottom:0;height:100%;left:0;position:absolute;right:0;top:0;width:100%}.markmap-container{height:300px}.markmap-container>svg{height:100%;width:100%}.MJX_Assistive_MathML{display:none}#MathJax_Message{z-index:1000!important}.ui-infobar{color:#777;margin:25px auto -25px;max-width:760px;position:relative;z-index:2}.toc .invisable-node{list-style-type:none}.ui-toc{bottom:20px;position:fixed;z-index:998}.ui-toc.both-mode{margin-left:8px}.ui-toc.both-mode .ui-toc-label{border-bottom-left-radius:0;border-top-left-radius:0;height:40px;padding:10px 4px}.ui-toc-label{background-color:#e6e6e6;border:none;color:#868686;transition:opacity .2s}.ui-toc .open .ui-toc-label{color:#fff;opacity:1;transition:opacity .2s}.ui-toc-label:focus{background-color:#ccc;color:#000;opacity:.3}.ui-toc-label:hover{background-color:#ccc;opacity:1;transition:opacity .2s}.ui-toc-dropdown{margin-bottom:20px;margin-top:20px;max-height:70vh;max-width:45vw;overflow:auto;padding-left:10px;padding-right:10px;text-align:inherit;width:25vw}.ui-toc-dropdown>.toc{max-height:calc(70vh - 100px);overflow:auto}.ui-toc-dropdown[dir=rtl] .nav{letter-spacing:.0029em;padding-right:0}.ui-toc-dropdown a{overflow:hidden;text-overflow:ellipsis;white-space:pre}.ui-toc-dropdown .nav>li>a{color:#767676;display:block;font-size:13px;font-weight:500;padding:4px 20px}.ui-toc-dropdown .nav>li:first-child:last-child>ul,.ui-toc-dropdown .toc.expand ul{display:block}.ui-toc-dropdown .nav>li>a:focus,.ui-toc-dropdown .nav>li>a:hover{background-color:initial;border-left:1px solid #000;color:#000;padding-left:19px;text-decoration:none}.ui-toc-dropdown[dir=rtl] .nav>li>a:focus,.ui-toc-dropdown[dir=rtl] .nav>li>a:hover{border-left:none;border-right:1px solid #000;padding-right:19px}.ui-toc-dropdown .nav>.active:focus>a,.ui-toc-dropdown .nav>.active:hover>a,.ui-toc-dropdown .nav>.active>a{background-color:initial;border-left:2px solid #000;color:#000;font-weight:700;padding-left:18px}.ui-toc-dropdown[dir=rtl] .nav>.active:focus>a,.ui-toc-dropdown[dir=rtl] .nav>.active:hover>a,.ui-toc-dropdown[dir=rtl] .nav>.active>a{border-left:none;border-right:2px solid #000;padding-right:18px}.ui-toc-dropdown .nav .nav{display:none;padding-bottom:10px}.ui-toc-dropdown .nav>.active>ul{display:block}.ui-toc-dropdown .nav .nav>li>a{font-size:12px;font-weight:400;padding-bottom:1px;padding-left:30px;padding-top:1px}.ui-toc-dropdown[dir=rtl] .nav .nav>li>a{padding-right:30px}.ui-toc-dropdown .nav .nav>li>ul>li>a{font-size:12px;font-weight:400;padding-bottom:1px;padding-left:40px;padding-top:1px}.ui-toc-dropdown[dir=rtl] .nav .nav>li>ul>li>a{padding-right:40px}.ui-toc-dropdown .nav .nav>li>a:focus,.ui-toc-dropdown .nav .nav>li>a:hover{padding-left:29px}.ui-toc-dropdown[dir=rtl] .nav .nav>li>a:focus,.ui-toc-dropdown[dir=rtl] .nav .nav>li>a:hover{padding-right:29px}.ui-toc-dropdown .nav .nav>li>ul>li>a:focus,.ui-toc-dropdown .nav .nav>li>ul>li>a:hover{padding-left:39px}.ui-toc-dropdown[dir=rtl] .nav .nav>li>ul>li>a:focus,.ui-toc-dropdown[dir=rtl] .nav .nav>li>ul>li>a:hover{padding-right:39px}.ui-toc-dropdown .nav .nav>.active:focus>a,.ui-toc-dropdown .nav .nav>.active:hover>a,.ui-toc-dropdown .nav .nav>.active>a{font-weight:500;padding-left:28px}.ui-toc-dropdown[dir=rtl] .nav .nav>.active:focus>a,.ui-toc-dropdown[dir=rtl] .nav .nav>.active:hover>a,.ui-toc-dropdown[dir=rtl] .nav .nav>.active>a{padding-right:28px}.ui-toc-dropdown .nav .nav>.active>.nav>.active:focus>a,.ui-toc-dropdown .nav .nav>.active>.nav>.active:hover>a,.ui-toc-dropdown .nav .nav>.active>.nav>.active>a{font-weight:500;padding-left:38px}.ui-toc-dropdown[dir=rtl] .nav .nav>.active>.nav>.active:focus>a,.ui-toc-dropdown[dir=rtl] .nav .nav>.active>.nav>.active:hover>a,.ui-toc-dropdown[dir=rtl] .nav .nav>.active>.nav>.active>a{padding-right:38px}.markdown-body{font-family:-apple-system,BlinkMacSystemFont,Segoe UI,Helvetica Neue,Helvetica,Roboto,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol}html[lang^=ja] .markdown-body{font-family:-apple-system,BlinkMacSystemFont,Segoe UI,Helvetica Neue,Helvetica,Roboto,Arial,Hiragino Kaku Gothic Pro,ヒラギノ角ゴ Pro W3,Osaka,Meiryo,メイリオ,MS Gothic,ＭＳ ゴシック,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol}html[lang=zh-tw] .markdown-body{font-family:-apple-system,BlinkMacSystemFont,Segoe UI,Helvetica Neue,Helvetica,Roboto,Arial,PingFang TC,Microsoft JhengHei,微軟正黑,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol}html[lang=zh-cn] .markdown-body{font-family:-apple-system,BlinkMacSystemFont,Segoe UI,Helvetica Neue,Helvetica,Roboto,Arial,PingFang SC,Microsoft YaHei,微软雅黑,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol}html .markdown-body[lang^=ja]{font-family:-apple-system,BlinkMacSystemFont,Segoe UI,Helvetica Neue,Helvetica,Roboto,Arial,Hiragino Kaku Gothic Pro,ヒラギノ角ゴ Pro W3,Osaka,Meiryo,メイリオ,MS Gothic,ＭＳ ゴシック,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol}html .markdown-body[lang=zh-tw]{font-family:-apple-system,BlinkMacSystemFont,Segoe UI,Helvetica Neue,Helvetica,Roboto,Arial,PingFang TC,Microsoft JhengHei,微軟正黑,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol}html .markdown-body[lang=zh-cn]{font-family:-apple-system,BlinkMacSystemFont,Segoe UI,Helvetica Neue,Helvetica,Roboto,Arial,PingFang SC,Microsoft YaHei,微软雅黑,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol}html[lang^=ja] .ui-toc-dropdown{font-family:Source Sans Pro,Helvetica,Arial,Meiryo UI,MS PGothic,ＭＳ Ｐゴシック,sans-serif}html[lang=zh-tw] .ui-toc-dropdown{font-family:Source Sans Pro,Helvetica,Arial,Microsoft JhengHei UI,微軟正黑UI,sans-serif}html[lang=zh-cn] .ui-toc-dropdown{font-family:Source Sans Pro,Helvetica,Arial,Microsoft YaHei UI,微软雅黑UI,sans-serif}html .ui-toc-dropdown[lang^=ja]{font-family:Source Sans Pro,Helvetica,Arial,Meiryo UI,MS PGothic,ＭＳ Ｐゴシック,sans-serif}html .ui-toc-dropdown[lang=zh-tw]{font-family:Source Sans Pro,Helvetica,Arial,Microsoft JhengHei UI,微軟正黑UI,sans-serif}html .ui-toc-dropdown[lang=zh-cn]{font-family:Source Sans Pro,Helvetica,Arial,Microsoft YaHei UI,微软雅黑UI,sans-serif}.ui-affix-toc{max-height:70vh;max-width:15vw;overflow:auto;position:fixed;top:0}.back-to-top,.expand-toggle,.go-to-bottom{color:#999;display:block;font-size:12px;font-weight:500;margin-left:10px;margin-top:10px;padding:4px 10px}.back-to-top:focus,.back-to-top:hover,.expand-toggle:focus,.expand-toggle:hover,.go-to-bottom:focus,.go-to-bottom:hover{color:#563d7c;text-decoration:none}.back-to-top,.go-to-bottom{margin-top:0}.ui-user-icon{background-position:50%;background-repeat:no-repeat;background-size:cover;border-radius:50%;display:block;height:20px;margin-bottom:2px;margin-right:5px;margin-top:2px;width:20px}.ui-user-icon.small{display:inline-block;height:18px;margin:0 0 .2em;vertical-align:middle;width:18px}.ui-infobar>small>span{line-height:22px}.ui-infobar>small .dropdown{display:inline-block}.ui-infobar>small .dropdown a:focus,.ui-infobar>small .dropdown a:hover{text-decoration:none}.ui-more-info{color:#888;cursor:pointer;vertical-align:middle}.ui-more-info .fa{font-size:16px}.ui-connectedGithub,.ui-published-note{color:#888}.ui-connectedGithub{line-height:23px;white-space:nowrap}.ui-connectedGithub a.file-path{color:#888;padding-left:22px;text-decoration:none}.ui-connectedGithub a.file-path:active,.ui-connectedGithub a.file-path:hover{color:#888;text-decoration:underline}.ui-connectedGithub .fa{font-size:20px}.ui-published-note .fa{font-size:20px;vertical-align:top}.unselectable{-webkit-user-select:none;-o-user-select:none;user-select:none}.selectable{-webkit-user-select:text;-o-user-select:text;user-select:text}.inline-spoiler-section{cursor:pointer}.inline-spoiler-section .spoiler-text{background-color:#333;border-radius:2px}.inline-spoiler-section .spoiler-text>*{opacity:0}.inline-spoiler-section .spoiler-img{filter:blur(10px)}.inline-spoiler-section.raw{background-color:#333;border-radius:2px}.inline-spoiler-section.raw>*{opacity:0}.inline-spoiler-section.unveil{cursor:auto}.inline-spoiler-section.unveil .spoiler-text{background-color:#3333331a}.inline-spoiler-section.unveil .spoiler-text>*{opacity:1}.inline-spoiler-section.unveil .spoiler-img{filter:none}@media print{blockquote,div,img,pre,table{page-break-inside:avoid!important}a[href]:after{font-size:12px!important}}.markdown-body.slides{color:#222;position:relative;z-index:1}.markdown-body.slides:before{background-color:currentColor;bottom:0;box-shadow:0 0 0 50vw;content:"";display:block;left:0;position:absolute;right:0;top:0;z-index:-1}.markdown-body.slides section[data-markdown]{background-color:#fff;margin-bottom:1.5em;position:relative;text-align:center}.markdown-body.slides section[data-markdown] code{text-align:left}.markdown-body.slides section[data-markdown]:before{content:"";display:block;padding-bottom:56.23%}.markdown-body.slides section[data-markdown]>div:first-child{left:1em;max-height:100%;overflow:hidden;position:absolute;right:1em;top:50%;transform:translateY(-50%)}.markdown-body.slides section[data-markdown]>ul{display:inline-block}.markdown-body.slides>section>section+section:after{border:3px solid #777;content:"";height:1.5em;position:absolute;right:1em;top:-1.5em}.site-ui-font{font-family:Source Sans Pro,Helvetica,Arial,sans-serif}html[lang^=ja] .site-ui-font{font-family:Source Sans Pro,Helvetica,Arial,Hiragino Kaku Gothic Pro,ヒラギノ角ゴ Pro W3,Osaka,Meiryo,メイリオ,MS Gothic,ＭＳ ゴシック,sans-serif}html[lang=zh-tw] .site-ui-font{font-family:Source Sans Pro,Helvetica,Arial,PingFang TC,Microsoft JhengHei,微軟正黑,sans-serif}html[lang=zh-cn] .site-ui-font{font-family:Source Sans Pro,Helvetica,Arial,PingFang SC,Microsoft YaHei,微软雅黑,sans-serif}body{font-smoothing:subpixel-antialiased!important;-webkit-font-smoothing:subpixel-antialiased!important;-moz-osx-font-smoothing:auto!important;-webkit-overflow-scrolling:touch;font-family:Source Sans Pro,Helvetica,Arial,sans-serif;letter-spacing:.025em}html[lang^=ja] body{font-family:Source Sans Pro,Helvetica,Arial,Hiragino Kaku Gothic Pro,ヒラギノ角ゴ Pro W3,Osaka,Meiryo,メイリオ,MS Gothic,ＭＳ ゴシック,sans-serif}html[lang=zh-tw] body{font-family:Source Sans Pro,Helvetica,Arial,PingFang TC,Microsoft JhengHei,微軟正黑,sans-serif}html[lang=zh-cn] body{font-family:Source Sans Pro,Helvetica,Arial,PingFang SC,Microsoft YaHei,微软雅黑,sans-serif}abbr[title]{border-bottom:none;text-decoration:underline;-webkit-text-decoration:underline dotted;text-decoration:underline dotted}abbr[data-original-title],abbr[title]{cursor:help}body.modal-open{overflow-y:auto;padding-right:0!important}svg{text-shadow:none}
    </style>
    <!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media queries -->
    <!-- WARNING: Respond.js doesn't work if you view the page via file:// -->
    <!--[if lt IE 9]>
    	<script src="https://cdnjs.cloudflare.com/ajax/libs/html5shiv/3.7.3/html5shiv.min.js" integrity="sha256-3Jy/GbSLrg0o9y5Z5n1uw0qxZECH7C6OQpVBgNFYa0g=" crossorigin="anonymous"></script>
    	<script src="https://cdnjs.cloudflare.com/ajax/libs/respond.js/1.4.2/respond.min.js" integrity="sha256-g6iAfvZp+nDQ2TdTR/VVKJf3bGro4ub5fvWSWVRi2NE=" crossorigin="anonymous"></script>
		<script src="https://cdnjs.cloudflare.com/ajax/libs/es5-shim/4.5.9/es5-shim.min.js" integrity="sha256-8E4Is26QH0bD52WoQpcB+R/tcWQtpzlCojrybUd7Mxo=" crossorigin="anonymous"></script>
    <![endif]-->
</head>

<body>
    <div id="doc" class="markdown-body container-fluid comment-enabled" data-hard-breaks="true"><p><strong><span>Document number</span></strong><span>: P3111R0.</span><br>
<strong><span>Date</span></strong><span>: 2024-5-16.</span><br>
<strong><span>Authors</span></strong><span>: Gonzalo Brito Gadeschi, Simon Cooksey, Daniel Lustig.</span><br>
<strong><span>Reply to</span></strong><span>: Gonzalo Brito Gadeschi &lt;gonzalob _at_ </span><a href="http://nvidia.com" target="_blank" rel="noopener"><span>nvidia.com</span></a><span>&gt;.</span><br>
<strong><span>Audience</span></strong><span>: SG1, SG6.</span></p><style>
ins {
    color:green;
    text-decoration:underline;
}
del {
    color:red;
    background-color:yellow;
    text-decoration:line-through;
}
bdi {
    color:black;
    background-color:lightblue;
    text-decoration:underline;
}
.markdown-body {
    max-width: 900px;
    text-align: justify;
}
</style><p><big><span>Table of Contents</span></big></p><p><span class="toc"><ul>
<li><a href="#Atomic-Reduction-Operations" title="Atomic Reduction Operations">Atomic Reduction Operations</a><ul>
<li><a href="#Introduction" title="Introduction">Introduction</a></li>
<li><a href="#Motivation" title="Motivation">Motivation</a><ul>
<li><a href="#Hardware-Exposure" title="Hardware Exposure">Hardware Exposure</a></li>
<li><a href="#Performance" title="Performance">Performance</a></li>
</ul>
</li>
<li><a href="#Design" title="Design">Design</a><ul>
<li><a href="#Alternative-optimizations-for-fetch_ltkeygt" title="Alternative: optimizations for fetch_<key>">Alternative: optimizations for fetch_&lt;key&gt;</a></li>
<li><a href="#Forward-progress" title="Forward progress">Forward progress</a></li>
<li><a href="#Generalized-Atomic-Reduction-Operations" title="Generalized Atomic Reduction Operations">Generalized Atomic Reduction Operations</a></li>
<li><a href="#Memory-Ordering" title="Memory Ordering">Memory Ordering</a></li>
<li><a href="#Formalization" title="Formalization">Formalization</a></li>
</ul>
</li>
<li><a href="#Wording" title="Wording">Wording</a><ul>
<li><a href="#Unsequenced-support" title="Unsequenced support">Unsequenced support</a></li>
<li><a href="#Forward-progress1" title="Forward progress">Forward progress</a></li>
<li><a href="#No-acquire-sequences-support" title="No acquire sequences support">No acquire sequences support</a></li>
<li><a href="#Atomic-Reduction-Operation-APIs" title="Atomic Reduction Operation APIs">Atomic Reduction Operation APIs</a></li>
</ul>
</li>
</ul>
</li>
</ul>
</span></p><h1 id="Atomic-Reduction-Operations" data-id="Atomic-Reduction-Operations"><a class="anchor hidden-xs" href="#Atomic-Reduction-Operations" title="Atomic-Reduction-Operations"><span class="octicon octicon-link"></span></a><span>Atomic Reduction Operations</span></h1><p><em><span>Atomic Reduction Operations</span></em><span> are atomic read-modify-write (RMW) operations (like </span><code>fetch_add</code><span>) that do not "fetch" the old value and are not reads from the Memory Model perspective. This enables implementations to leverage hardware acceleration available in modern CPU and GPU architectures.</span></p><p><span>Furthermore, we propose to allow atomic memory operations that aren't reads in unsequenced execution, and to extend atomic arithmetic reduction operations for floating-point types with operations that assume floating-point arithmetic is associative.</span></p><h2 id="Introduction" data-id="Introduction"><a class="anchor hidden-xs" href="#Introduction" title="Introduction"><span class="octicon octicon-link"></span></a><span>Introduction</span></h2><p><span>Concurrent algorithms performing atomic RMW operations that discard the old fetched value are very common in high-performance computing, e.g., finite-element matrix assembly, data analytics (e.g. building histograms), etc.</span></p><p><span>Consider the following parallel algorithm to build a histogram (</span><a href="https://clang.godbolt.org/z/zscrhEPYj" target="_blank" rel="noopener"><span class="ui-comment-inline-span">full implementation</span></a><span>):</span></p><pre><code class="cpp hljs"><div class="wrapper"><div class="gutter linenumber"><span></span>
<span></span>
<span></span>
<span></span>
<span></span>
<span></span>
<span></span>
<span></span>
<span></span>
<span></span>
<span></span>
<span></span>
<span></span></div><div class="code"><span class="token comment">// Example 0: Histogram.</span>
span<span class="token operator">&lt;</span><span class="token keyword">unsigned</span><span class="token operator">&gt;</span> data<span class="token punctuation">;</span>

array<span class="token operator">&lt;</span>atomic<span class="token operator">&lt;</span><span class="token keyword">unsigned</span><span class="token operator">&gt;</span><span class="token punctuation">,</span> N<span class="token operator">&gt;</span> buckets<span class="token punctuation">;</span>
<span class="token keyword">constexpr</span> T bucket_sz <span class="token operator">=</span> <span class="token class-name">numeric_limits</span><span class="token operator">&lt;</span>T<span class="token operator">&gt;</span><span class="token double-colon punctuation">::</span><span class="token function">max</span><span class="token punctuation">(</span><span class="token punctuation">)</span> <span class="token operator">/</span> <span class="token punctuation">(</span>T<span class="token punctuation">)</span>N<span class="token punctuation">;</span>
<span class="token keyword">unsigned</span> nthreads <span class="token operator">=</span> thread<span class="token double-colon punctuation">::</span><span class="token function">hardware_concurrency</span><span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">;</span>
<span class="token function">for_each_n</span><span class="token punctuation">(</span>execution<span class="token double-colon punctuation">::</span>par_unseq<span class="token punctuation">,</span> views<span class="token double-colon punctuation">::</span><span class="token function">iota</span><span class="token punctuation">(</span><span class="token number">0</span><span class="token punctuation">)</span><span class="token punctuation">.</span><span class="token function">begin</span><span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">,</span> nthreads<span class="token punctuation">,</span> 
 <span class="token punctuation">[</span><span class="token operator">&amp;</span><span class="token punctuation">]</span><span class="token punctuation">(</span><span class="token keyword">int</span> thread<span class="token punctuation">)</span> <span class="token punctuation">{</span>
  <span class="token keyword">unsigned</span> data_per_thread <span class="token operator">=</span> data<span class="token punctuation">.</span><span class="token function">size</span><span class="token punctuation">(</span><span class="token punctuation">)</span> <span class="token operator">/</span> nthreads<span class="token punctuation">;</span>
  T<span class="token operator">*</span> data_thread <span class="token operator">=</span> data<span class="token punctuation">.</span><span class="token function">data</span><span class="token punctuation">(</span><span class="token punctuation">)</span> <span class="token operator">+</span> data_per_thread <span class="token operator">*</span> thread<span class="token punctuation">;</span>
  <span class="token keyword">for</span> <span class="token punctuation">(</span><span class="token keyword">auto</span> e <span class="token operator">:</span> <span class="token generic-function"><span class="token function">span</span><span class="token generic class-name"><span class="token operator">&lt;</span>T<span class="token operator">&gt;</span></span></span><span class="token punctuation">(</span>data_thread<span class="token punctuation">,</span> data_per_thread<span class="token punctuation">)</span><span class="token punctuation">)</span> 
    buckets<span class="token punctuation">[</span>e <span class="token operator">/</span> bucket_sz<span class="token punctuation">]</span><span class="token punctuation">.</span><span class="token function">fetch_add</span><span class="token punctuation">(</span><span class="token number">1</span><span class="token punctuation">,</span> memory_order_relaxed<span class="token punctuation">)</span><span class="token punctuation">;</span>
<span class="token punctuation">}</span><span class="token punctuation">)</span><span class="token punctuation">;</span>
</div></div></code></pre><p><span>This program has two main issues:</span></p><ul>
<li><strong><span>Correctness</span></strong><span> (undefined behavior): The program should have used the </span><code>par</code><span> execution policy to avoid undefined behavior since the atomic operation is not:</span>
<ul>
<li><span>Potentially concurrent and therefore exhibits a data-race in unsequenced contexts ([intro.execution]).</span></li>
<li><span>Vectorization-safe </span><a href="https://eel.is/c++draft/algorithms.parallel.defns#5" target="_blank" rel="noopener"><span>[algorithms.parallel.defns#5]</span></a><span> since it is specificed to synchronize with other function invocations.</span></li>
</ul>
</li>
<li><strong><span>Performance</span></strong><span>: Sophisticated compiler analysis required to optimize the above program for scalable hardware architectures with atomic reduction operations.</span></li>
</ul><p><span>Atomic reduction operations address both shortcomings:</span></p><table>
<tbody><tr>
<td><b>Before</b> (<a href="https://clang.godbolt.org/z/xq8efq1WK" target="_blank" rel="noopener">compiler-explorer</a>)</td>
<td><b>After</b></td>
</tr>
<tr>
<td>
<pre><code class="cpp hljs"><div class="wrapper"><div class="gutter linenumber"><span></span>
<span></span>
<span></span>
<span></span>
<span></span>
<span></span>
<span></span>
<span></span>
<span></span>
<span></span>
<span></span>
<span></span>
<span></span>
<span></span>
<span></span>
<span></span>
<span></span>
<span></span></div><div class="code"><span class="token macro property"><span class="token directive-hash">#</span><span class="token directive keyword">include</span> <span class="token string">&lt;algorithm&gt;</span></span>
<span class="token macro property"><span class="token directive-hash">#</span><span class="token directive keyword">include</span> <span class="token string">&lt;atomic&gt;</span></span>
<span class="token macro property"><span class="token directive-hash">#</span><span class="token directive keyword">include</span> <span class="token string">&lt;execution&gt;</span></span>
<span class="token keyword">using</span> <span class="token keyword">namespace</span> std<span class="token punctuation">;</span>
<span class="token keyword">using</span> execution<span class="token double-colon punctuation">::</span>par_unseq<span class="token punctuation">;</span>

<span class="token keyword">int</span> <span class="token function">main</span><span class="token punctuation">(</span><span class="token punctuation">)</span> <span class="token punctuation">{</span>
  size_t N <span class="token operator">=</span> <span class="token number">10000</span><span class="token punctuation">;</span>
  vector<span class="token operator">&lt;</span><span class="token keyword">int</span><span class="token operator">&gt;</span> <span class="token function">v</span><span class="token punctuation">(</span>N<span class="token punctuation">,</span> <span class="token number">0</span><span class="token punctuation">)</span><span class="token punctuation">;</span>
  atomic<span class="token operator">&lt;</span><span class="token keyword">int</span><span class="token operator">&gt;</span> atom <span class="token operator">=</span> <span class="token number">0</span><span class="token punctuation">;</span>
  <span class="token function">for_each_n</span><span class="token punctuation">(</span>par_unseq<span class="token punctuation">,</span> 
     v<span class="token punctuation">.</span><span class="token function">begin</span><span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">,</span> N<span class="token punctuation">,</span>
    <span class="token punctuation">[</span><span class="token operator">&amp;</span><span class="token punctuation">]</span><span class="token punctuation">(</span><span class="token keyword">auto</span><span class="token operator">&amp;</span> e<span class="token punctuation">)</span> <span class="token punctuation">{</span>
      <span class="token comment">// UB+SLOW:</span>
      atom<span class="token punctuation">.</span><span class="token function">fetch_add</span><span class="token punctuation">(</span>e<span class="token punctuation">)</span><span class="token punctuation">;</span>
  <span class="token punctuation">}</span><span class="token punctuation">)</span><span class="token punctuation">;</span> 
  <span class="token keyword">return</span> atom<span class="token punctuation">.</span><span class="token function">load</span><span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">;</span>
<span class="token punctuation">}</span>
</div></div></code></pre>
</td>
<td>
<pre><code class="cpp hljs"><div class="wrapper"><div class="gutter linenumber"><span></span>
<span></span>
<span></span>
<span></span>
<span></span>
<span></span>
<span></span>
<span></span>
<span></span>
<span></span>
<span></span>
<span></span>
<span></span>
<span></span>
<span></span>
<span></span>
<span></span>
<span></span></div><div class="code"><span class="token macro property"><span class="token directive-hash">#</span><span class="token directive keyword">include</span> <span class="token string">&lt;algorithm&gt;</span></span>
<span class="token macro property"><span class="token directive-hash">#</span><span class="token directive keyword">include</span> <span class="token string">&lt;atomic&gt;</span></span>
<span class="token macro property"><span class="token directive-hash">#</span><span class="token directive keyword">include</span> <span class="token string">&lt;execution&gt;</span></span>
<span class="token keyword">using</span> <span class="token keyword">namespace</span> std<span class="token punctuation">;</span>
<span class="token keyword">using</span> execution<span class="token double-colon punctuation">::</span>par_unseq<span class="token punctuation">;</span>

<span class="token keyword">int</span> <span class="token function">main</span><span class="token punctuation">(</span><span class="token punctuation">)</span> <span class="token punctuation">{</span>
  size_t N <span class="token operator">=</span> <span class="token number">10000</span><span class="token punctuation">;</span>
  vector<span class="token operator">&lt;</span><span class="token keyword">int</span><span class="token operator">&gt;</span> <span class="token function">v</span><span class="token punctuation">(</span>N<span class="token punctuation">,</span> <span class="token number">0</span><span class="token punctuation">)</span><span class="token punctuation">;</span>
  atomic<span class="token operator">&lt;</span><span class="token keyword">int</span><span class="token operator">&gt;</span> atom <span class="token operator">=</span> <span class="token number">0</span><span class="token punctuation">;</span>
  <span class="token function">for_each_n</span><span class="token punctuation">(</span>par_unseq<span class="token punctuation">,</span> 
     v<span class="token punctuation">.</span><span class="token function">begin</span><span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">,</span> N<span class="token punctuation">,</span>
    <span class="token punctuation">[</span><span class="token operator">&amp;</span><span class="token punctuation">]</span><span class="token punctuation">(</span><span class="token keyword">auto</span><span class="token operator">&amp;</span> e<span class="token punctuation">)</span> <span class="token punctuation">{</span>
      <span class="token comment">// OK+FAST</span>
      atom<span class="token punctuation">.</span><span class="token function">add</span><span class="token punctuation">(</span>e<span class="token punctuation">)</span><span class="token punctuation">;</span>
  <span class="token punctuation">}</span><span class="token punctuation">)</span><span class="token punctuation">;</span> 
  <span class="token keyword">return</span> atom<span class="token punctuation">.</span><span class="token function">load</span><span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">;</span>
<span class="token punctuation">}</span>      
</div></div></code></pre>
</td>
</tr>
</tbody></table><p><span>This new operation can then be used in the Histogram Example (example 0), to replace the </span><code>fetch_add</code><span> with just </span><code>add</code><span>.</span></p><h2 id="Motivation" data-id="Motivation"><a class="anchor hidden-xs" href="#Motivation" title="Motivation"><span class="octicon octicon-link"></span></a><span>Motivation</span></h2><h3 id="Hardware-Exposure" data-id="Hardware-Exposure"><a class="anchor hidden-xs" href="#Hardware-Exposure" title="Hardware-Exposure"><span class="octicon octicon-link"></span></a><span>Hardware Exposure</span></h3><p><span>The following ISAs provide Atomic Reduction Operation:</span></p><table>
<thead>
<tr>
<th><span>Architecture</span></th>
<th><span>Instructions</span></th>
</tr>
</thead>
<tbody>
<tr>
<td><span>PTX</span></td>
<td><a href="https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-red" target="_blank" rel="noopener"><code>red</code></a><span>.</span></td>
</tr>
<tr>
<td><span>ARM</span></td>
<td><a href="https://developer.arm.com/documentation/ddi0602/2022-06/Base-Instructions/LDADD--LDADDA--LDADDAL--LDADDL--Atomic-add-on-word-or-doubleword-in-memory-?lang=en" target="_blank" rel="noopener"><code>LDADD RZ</code></a><span>, </span><a href="https://developer.arm.com/documentation/ddi0602/2022-06/Base-Instructions/STADD--STADDL--Atomic-add-on-word-or-doubleword-in-memory--without-return--an-alias-of-LDADD--LDADDA--LDADDAL--LDADDL-" target="_blank" rel="noopener"><code>STADD</code></a><span>, </span><a href="https://developer.arm.com/documentation/ddi0602/2022-06/Base-Instructions/SWP--SWPA--SWPAL--SWPL--Swap-word-or-doubleword-in-memory-?lang=en" target="_blank" rel="noopener"><code>SWP RZ</code></a><span>, </span><a href="https://developer.arm.com/documentation/ddi0602/2022-06/Base-Instructions/CAS--CASA--CASAL--CASL--Compare-and-Swap-word-or-doubleword-in-memory-?lang=en" target="_blank" rel="noopener"><code>CAS RZ</code></a><span>.</span></td>
</tr>
<tr>
<td><span>x86-64</span></td>
<td><a href="https://cdrdv2-public.intel.com/671368/architecture-instruction-set-extensions-programming-reference.pdf" target="_blank" rel="noopener"><span>Remote Atomic Operations (RAO)</span></a><span>: AADD, AAND, AOR, AXOR.</span></td>
</tr>
<tr>
<td><span>RISC-V</span></td>
<td><span>None (note: AMOs are always loads and stores).</span></td>
</tr>
<tr>
<td><span>PP64LE</span></td>
<td><span>None.</span></td>
</tr>
</tbody>
</table><p><span>Some of these instructions lack a destination operand (</span><a href="https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-red" target="_blank" rel="noopener"><code>red</code></a><span>, </span><a href="https://developer.arm.com/documentation/ddi0602/2022-06/Base-Instructions/STADD--STADDL--Atomic-add-on-word-or-doubleword-in-memory--without-return--an-alias-of-LDADD--LDADDA--LDADDAL--LDADDL-" target="_blank" rel="noopener"><code>STADD</code></a><span>, AADD). Others change semantics if the destination register used discards the result (Arm's </span><a href="https://developer.arm.com/documentation/ddi0602/2022-06/Base-Instructions/LDADD--LDADDA--LDADDAL--LDADDL--Atomic-add-on-word-or-doubleword-in-memory-?lang=en" target="_blank" rel="noopener"><code>LDADD RZ</code></a><span>, </span><a href="https://developer.arm.com/documentation/ddi0602/2022-06/Base-Instructions/SWP--SWPA--SWPAL--SWPL--Swap-word-or-doubleword-in-memory-?lang=en" target="_blank" rel="noopener"><code>SWP RZ</code></a><span>, </span><a href="https://developer.arm.com/documentation/ddi0602/2022-06/Base-Instructions/CAS--CASA--CASAL--CASL--Compare-and-Swap-word-or-doubleword-in-memory-?lang=en" target="_blank" rel="noopener"><code>CAS RZ</code></a><span>).</span></p><p><span>All ISAs provide the same sematics: these are not loads from the point-of-view of the Memory Model, and therefore do not participate in acquire sequences, but they do participate in release sequences:</span></p><ul>
<li><span>PTX Specification: </span><code>red</code><span> is "</span><a href="https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#operation-types" target="_blank" rel="noopener"><span>not a read operation</span></a><span>".</span></li>
<li><span>Arm ARM: "</span><a href="https://developer.arm.com/documentation/ddi0487/latest" target="_blank" rel="noopener"><span>where the destination register is WZR or XZR, are not regarded as doing a read for the purpose of a DMB LD barrier</span></a><span>".</span></li>
<li><span>x86-64: </span><a href="https://cdrdv2-public.intel.com/671368/architecture-instruction-set-extensions-programming-reference.pdf" target="_blank" rel="noopener"><span>"since they do not load data from memory into the processor."</span></a></li>
</ul><p><span>These architectures provide both "relaxed" and "release" orderings for the reductions (e.g. </span><code>red.relaxed</code><span>/</span><code>red.release</code><span>, </span><code>STADD</code><span>/</span><code>STADDL</code><span>).</span></p><h3 id="Performance" data-id="Performance"><a class="anchor hidden-xs" href="#Performance" title="Performance"><span class="octicon octicon-link"></span></a><span>Performance</span></h3><p><span>On hardware architectures that implement these as far atomics, the exposed latency of Atomic Reduction Operations may be as low as half that of "</span><code>fetch_&lt;key&gt;</code><span>" operations.</span></p><p><span>Example: on an NVIDIA Hopper H100 GPU, replacing </span><code>atomic.fetch_add</code><span> with </span><code>atomic.add</code><span> on the Histogram Example (Example 0) improves throughput by 1.2x.</span></p><h2 id="Design" data-id="Design"><a class="anchor hidden-xs" href="#Design" title="Design"><span class="octicon octicon-link"></span></a><span>Design</span></h2><p><span>For each atomic </span><code>fetch_{OP}</code><span> in the </span><code>atomic&lt;T&gt;</code><span> and </span><code>atomic_ref&lt;T&gt;</code><span> class templates and their specializations, we introduce new </span><code>{OP}</code><span> member functions that return </span><code>void</code><span>.</span></p><p><span>These can be conservatively implemented on top of </span><code>fetch_{OP}</code><span> APIs.</span></p><h3 id="Alternative-optimizations-for-fetch_ltkeygt" data-id="Alternative-optimizations-for-fetch_ltkeygt"><a class="anchor hidden-xs" href="#Alternative-optimizations-for-fetch_ltkeygt" title="Alternative-optimizations-for-fetch_ltkeygt"><span class="octicon octicon-link"></span></a><span>Alternative: optimizations for </span><code>fetch_&lt;key&gt;</code></h3><p><span>Attempting to improve application performance by implementing compiler-optimizations to leverage Atomic Reduction Operations from </span><code>fetch_&lt;key&gt;</code><span> APIs has become a rite of passage for compiler engineers, e.g., </span><a href="https://gcc.gnu.org/pipermail/gcc-patches/2018-October/509632.html" target="_blank" rel="noopener"><span>GCC#509632</span></a><span>, </span><a href="https://github.com/llvm/llvm-project/issues/68428" target="_blank" rel="noopener"><span>LLVM#68428</span></a><span>, </span><a href="https://github.com/llvm/llvm-project/pull/72747" target="_blank" rel="noopener"><span>LLVM#72747</span></a><span>, </span><span class="smartypants">…</span><span> Unfortunately, "simple" optimization strategies break backward compatibility in the following litmus tests (among others).</span></p><p><strong><span>Litmus Test 0</span></strong><span>: from </span><a href="https://gcc.gnu.org/pipermail/gcc-patches/2018-October/509632.html" target="_blank" rel="noopener"><span>Will Deacon</span></a><span>. Performing the optimization to replace the  introduces the </span><code>y == 2 &amp;&amp; r0 == 1 &amp;&amp; r1 == 0</code><span> outcome:</span></p><pre><code class="cpp hljs"><div class="wrapper"><div class="gutter linenumber"><span></span>
<span></span>
<span></span>
<span></span>
<span></span>
<span></span>
<span></span>
<span></span>
<span></span>
<span></span>
<span></span>
<span></span>
<span></span>
<span></span>
<span></span></div><div class="code"><span class="token keyword">void</span> <span class="token function">thread0</span><span class="token punctuation">(</span>atomic_int<span class="token operator">*</span> y<span class="token punctuation">,</span>atomic_int<span class="token operator">*</span> x<span class="token punctuation">)</span> <span class="token punctuation">{</span>
  <span class="token function">atomic_store_explicit</span><span class="token punctuation">(</span>x<span class="token punctuation">,</span><span class="token number">1</span><span class="token punctuation">,</span>memory_order_relaxed<span class="token punctuation">)</span><span class="token punctuation">;</span>
  <span class="token function">atomic_thread_fence</span><span class="token punctuation">(</span>memory_order_release<span class="token punctuation">)</span><span class="token punctuation">;</span>
  <span class="token function">atomic_store_explicit</span><span class="token punctuation">(</span>y<span class="token punctuation">,</span><span class="token number">1</span><span class="token punctuation">,</span>memory_order_relaxed<span class="token punctuation">)</span><span class="token punctuation">;</span>
<span class="token punctuation">}</span>

<span class="token keyword">void</span> <span class="token function">thread1</span><span class="token punctuation">(</span>atomic_int<span class="token operator">*</span> y<span class="token punctuation">,</span>atomic_int<span class="token operator">*</span> x<span class="token punctuation">)</span> <span class="token punctuation">{</span>
  <span class="token function">atomic_fetch_add_explicit</span><span class="token punctuation">(</span>y<span class="token punctuation">,</span><span class="token number">1</span><span class="token punctuation">,</span>memory_order_relaxed<span class="token punctuation">)</span><span class="token punctuation">;</span>
  <span class="token function">atomic_thread_fence</span><span class="token punctuation">(</span>memory_order_acquire<span class="token punctuation">)</span><span class="token punctuation">;</span>
  <span class="token keyword">int</span> r0 <span class="token operator">=</span> <span class="token function">atomic_load_explicit</span><span class="token punctuation">(</span>x<span class="token punctuation">,</span>memory_order_relaxed<span class="token punctuation">)</span><span class="token punctuation">;</span>
<span class="token punctuation">}</span>

<span class="token keyword">void</span> <span class="token function">thread2</span><span class="token punctuation">(</span>atomic_int<span class="token operator">*</span> y<span class="token punctuation">)</span> <span class="token punctuation">{</span>
  <span class="token keyword">int</span> r1 <span class="token operator">=</span> <span class="token function">atomic_load_explicit</span><span class="token punctuation">(</span>y<span class="token punctuation">,</span>memory_order_relaxed<span class="token punctuation">)</span><span class="token punctuation">;</span>
<span class="token punctuation">}</span>
</div></div></code></pre><p><strong><span>Litmus Test 1</span></strong><span>: from </span><a href="https://github.com/llvm/llvm-project/issues/68428#issue-1930595855" target="_blank" rel="noopener"><span>Luke Geeson</span></a><span>. Performing the optimization of replacing the exchange with a store introduces the </span><code>r0 == 0 &amp;&amp; y == 2</code><span> outcome:</span></p><pre><code class="cpp hljs"><div class="wrapper"><div class="gutter linenumber"><span></span>
<span></span>
<span></span>
<span></span>
<span></span>
<span></span>
<span></span>
<span></span>
<span></span>
<span></span></div><div class="code"><span class="token keyword">void</span> <span class="token function">thread0</span><span class="token punctuation">(</span>atomic_int<span class="token operator">*</span> y<span class="token punctuation">,</span>atomic_int<span class="token operator">*</span> x<span class="token punctuation">)</span> <span class="token punctuation">{</span>
  <span class="token function">atomic_store_explicit</span><span class="token punctuation">(</span>x<span class="token punctuation">,</span><span class="token number">1</span><span class="token punctuation">,</span>memory_order_relaxed<span class="token punctuation">)</span><span class="token punctuation">;</span>
  <span class="token function">atomic_thread_fence</span><span class="token punctuation">(</span>memory_order_release<span class="token punctuation">)</span><span class="token punctuation">;</span>
  <span class="token function">atomic_store_explicit</span><span class="token punctuation">(</span>y<span class="token punctuation">,</span><span class="token number">1</span><span class="token punctuation">,</span>memory_order_relaxed<span class="token punctuation">)</span><span class="token punctuation">;</span>
<span class="token punctuation">}</span>
<span class="token keyword">void</span> <span class="token function">thread1</span><span class="token punctuation">(</span>atomic_int<span class="token operator">*</span> y<span class="token punctuation">,</span>atomic_int<span class="token operator">*</span> x<span class="token punctuation">)</span> <span class="token punctuation">{</span>
  <span class="token function">atomic_exchange_explicit</span><span class="token punctuation">(</span>y<span class="token punctuation">,</span><span class="token number">2</span><span class="token punctuation">,</span>memory_order_release<span class="token punctuation">)</span><span class="token punctuation">;</span>
  <span class="token function">atomic_thread_fence</span><span class="token punctuation">(</span>memory_order_acquire<span class="token punctuation">)</span><span class="token punctuation">;</span>
  <span class="token keyword">int</span> r0 <span class="token operator">=</span> <span class="token function">atomic_load_explicit</span><span class="token punctuation">(</span>x<span class="token punctuation">,</span>memory_order_relaxed<span class="token punctuation">)</span><span class="token punctuation">;</span>
<span class="token punctuation">}</span>
</div></div></code></pre><p><span>In some architectures, Atomic Reduction Operations can write to memory pages that are not readable, and need a reliable programming model that does not depend on compiler-optimizations for functionality.</span></p><h3 id="Forward-progress" data-id="Forward-progress"><a class="anchor hidden-xs" href="#Forward-progress" title="Forward-progress"><span class="octicon octicon-link"></span></a><span>Forward progress</span></h3><p><span>Currently, all atomic memory operations are </span><a href="https://eel.is/c++draft/algorithms.parallel.defns#5" target="_blank" rel="noopener"><span>vectorization-unsafe</span></a><span> and therefore not allowed in element access functions of parallel algorithms when the </span><code>unseq</code><span> or </span><code>par_unseq</code><span> execution policies are used (see </span><a href="https://eel.is/c++draft/algorithms.parallel#exec-5" target="_blank" rel="noopener"><span>[algorithms.parallel.exec.5]</span></a><span> and </span><a href="https://eel.is/c++draft/algorithms.parallel#exec-7" target="_blank" rel="noopener"><span>[algorithms.parallel.exec.7]</span></a><span>). Atomic memory operations that "read" (e.g. </span><code>load</code><span>, </span><code>fetch_&lt;key&gt;</code><span>, </span><code>compare_exchange</code><span>, </span><code>exchange</code><span>, </span><span class="smartypants">…</span><span>) enable building synchronization edges that block, which within </span><code>unseq</code><span>/</span><code>par_unseq</code><span> leads to dead-locks. </span><a href="https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2014/n4070.html" target="_blank" rel="noopener"><span>N4070</span></a><span> solved this by tightening the wording to disallow any synchronization API from being called from within </span><code>unseq</code><span>/</span><code>par_unseq</code><span>.</span></p><p><span>Allowing Atomic Writes and Atomic Reduction Operations in unsequenced execution increases the set of concurrent algorithms that can be implemented in the lowest-common denominator of hardware that C++ supports. In particular, many hardware architectures that can accelerate </span><code>unseq</code><span>/</span><code>par_unseq</code><span> but cannot accelerate </span><code>par</code><span> (e.g. most non-NVIDIA GPUs), provide acceleration for atomic reduction operations.</span></p><p><span>We propose to make lock-free atomic operations that are not reads vectorization safe to enable calling them from unsequenced execution. Atomic operations that read remain vectorization-unsafe and therefore UB:</span></p><pre><code class="cpp hljs"><span class="token function">for_each</span><span class="token punctuation">(</span>par_unseq<span class="token punctuation">,</span> <span class="token punctuation">.</span><span class="token punctuation">.</span><span class="token punctuation">.</span><span class="token punctuation">,</span> <span class="token punctuation">[</span><span class="token operator">&amp;</span><span class="token punctuation">]</span><span class="token punctuation">(</span><span class="token punctuation">.</span><span class="token punctuation">.</span><span class="token punctuation">.</span><span class="token punctuation">)</span> <span class="token punctuation">{</span>
    <span class="token function">assert</span><span class="token punctuation">(</span>atom<span class="token punctuation">.</span>is_lockfree<span class="token punctuation">)</span><span class="token punctuation">;</span> <span class="token comment">// Only for lock-free atomics</span>
    atom<span class="token punctuation">.</span><span class="token function">store</span><span class="token punctuation">(</span><span class="token number">42</span><span class="token punctuation">)</span><span class="token punctuation">;</span>            <span class="token comment">// OK: vectorization-safe</span>
    atom<span class="token punctuation">.</span><span class="token function">add</span><span class="token punctuation">(</span><span class="token number">1</span><span class="token punctuation">)</span><span class="token punctuation">;</span>               <span class="token comment">// OK: vectorization-safe</span>
    atom<span class="token punctuation">.</span><span class="token function">fetch_add</span><span class="token punctuation">(</span><span class="token number">1</span><span class="token punctuation">)</span><span class="token punctuation">;</span>         <span class="token comment">// UB: vectorization-unsafe</span>
    atom<span class="token punctuation">.</span><span class="token function">exchange</span><span class="token punctuation">(</span><span class="token number">42</span><span class="token punctuation">)</span><span class="token punctuation">;</span>         <span class="token comment">// UB: vectorization-unsafe</span>
    <span class="token keyword">while</span> <span class="token punctuation">(</span>atom<span class="token punctuation">.</span><span class="token function">load</span><span class="token punctuation">(</span><span class="token punctuation">)</span> <span class="token operator">&lt;</span> <span class="token number">42</span><span class="token punctuation">)</span><span class="token punctuation">;</span>  <span class="token comment">// UB: vectorization-unsafe</span>
<span class="token punctuation">}</span><span class="token punctuation">)</span><span class="token punctuation">;</span> 
</code></pre><p><span>The rest of this proposal is not blocked on this extension, which could be split into a separate proposal.</span></p><h4 id="Implementation-impact" data-id="Implementation-impact"><a class="anchor hidden-xs" href="#Implementation-impact" title="Implementation-impact"><span class="octicon octicon-link"></span></a><span>Implementation impact</span></h4><p><span>There is no impact on implementations using:</span></p><ul>
<li><span>OpenMP </span><a href="https://www.openmp.org/spec-html/5.0/openmpsu42.html" target="_blank" rel="noopener"><span>simd</span></a><span> pragma for </span><code>unseq</code><span> and </span><code>par_unseq</code><span>, since OpenMP supports atomics within </span><code>simd</code><span> regions.</span></li>
<li><a href="https://clang.llvm.org/docs/LanguageExtensions.html#extensions-for-loop-hint-optimizations" target="_blank" rel="noopener"><span>pragma clang loop</span></a><span> is a hint.</span></li>
</ul><h3 id="Generalized-Atomic-Reduction-Operations" data-id="Generalized-Atomic-Reduction-Operations"><a class="anchor hidden-xs" href="#Generalized-Atomic-Reduction-Operations" title="Generalized-Atomic-Reduction-Operations"><span class="octicon octicon-link"></span></a><span>Generalized Atomic Reduction Operations</span></h3><p><span>The outcome </span><code>x == a + (b + c)</code><span> is not valid for the following litmus test because either the atomic reduction of thread0 happens-before that of thread 1, or vice-versa, and floating-point arithmetic is not associative:</span></p><pre><code class="cpp hljs"><div class="wrapper"><div class="gutter linenumber"><span></span>
<span></span>
<span></span>
<span></span></div><div class="code"><span class="token comment">// Litmus test 2:</span>
atomic<span class="token operator">&lt;</span><span class="token keyword">float</span><span class="token operator">&gt;</span> x <span class="token operator">=</span> a<span class="token punctuation">;</span>
<span class="token keyword">void</span> <span class="token function">thread0</span><span class="token punctuation">(</span><span class="token punctuation">)</span> <span class="token punctuation">{</span> x<span class="token punctuation">.</span><span class="token function">add</span><span class="token punctuation">(</span>b<span class="token punctuation">,</span> memory_order_relaxed<span class="token punctuation">)</span><span class="token punctuation">;</span> <span class="token punctuation">}</span>
<span class="token keyword">void</span> <span class="token function">thread1</span><span class="token punctuation">(</span><span class="token punctuation">)</span> <span class="token punctuation">{</span> x<span class="token punctuation">.</span><span class="token function">add</span><span class="token punctuation">(</span>c<span class="token punctuation">,</span> memory_order_relaxed<span class="token punctuation">)</span><span class="token punctuation">;</span> <span class="token punctuation">}</span>
</div></div></code></pre><p><span>The value of this limitation seems small, since each execution may pick a different non-deterministic order.</span></p><p><span>The cost of this limitation is significant, since it requires implementations to perform reductions sequentially using </span><code>O(N)</code><span> operations instead of with </span><code>O(log(N))</code><span> tree-reduction algorithms. On GPU architectures, performing an horizontal reduction for then issuing a single atomic operation per thread group, reduces the number of atomic operation issued by up to the size of the thread group.</span></p><p><span>We propose providing generalized atomic reduction operations, defined in an analogous way to  </span><code>GENERALIZED_SUM</code><span> (see </span><a href="https://eel.is/c++draft/numerics.defns" target="_blank" rel="noopener"><span>[numerics.defns]</span></a><span>). The "</span><code>add</code><span>" and "</span><code>add_generalized</code><span>" operations below are different, and the latter provides implementations with the flexibility to perform a tree-reduction.</span></p><p><span>We could either only provide operations with the </span><code>GENERALIZED_...</code><span> semantics for </span><code>floating-point</code><span>, or prodivde them as separate methods:</span></p><pre><code class="cpp hljs"><div class="wrapper"><div class="gutter linenumber"><span></span>
<span></span>
<span></span>
<span></span>
<span></span></div><div class="code"><span class="token keyword">template</span> <span class="token operator">&lt;</span>floating<span class="token operator">-</span>point<span class="token operator">&gt;</span>
<span class="token keyword">class</span> <span class="token class-name">atomic</span> <span class="token punctuation">{</span>
  <span class="token keyword">void</span> <span class="token function">add</span><span class="token punctuation">(</span>floating<span class="token operator">-</span>point<span class="token punctuation">,</span> memory_order<span class="token punctuation">)</span><span class="token punctuation">;</span>
  <span class="token keyword">void</span> <span class="token function">add_generalizedg</span><span class="token punctuation">(</span>floating<span class="token operator">-</span>point<span class="token punctuation">,</span> memory_order<span class="token punctuation">)</span><span class="token punctuation">;</span>
<span class="token punctuation">}</span><span class="token punctuation">;</span>
</div></div></code></pre><p><span>Given the non-determinism inherent to concurrent atomic operations, the value of providing a version that differs from </span><code>GENERALIZED_..</code><span> for </span><code>floating-point</code><span> seems low. That is, we give the </span><code>atomic&lt;floating-point&gt;::add/sub/...</code><span> methods </span><code>GENERALIZED_...</code><span> semantics, and do not provide any explicit methods to pick.</span></p><p><span>The rest of this proposal is not blocked on this extension, which could be split into a separate proposal.</span></p><h3 id="Memory-Ordering" data-id="Memory-Ordering"><a class="anchor hidden-xs" href="#Memory-Ordering" title="Memory-Ordering"><span class="octicon octicon-link"></span></a><span>Memory Ordering</span></h3><p><span>We choose to support </span><code>memory_order_relaxed</code><span>, </span><code>memory_order_release</code><span>, and </span><code>memory_order_seq_cst</code><span>, since we do not see any issues with doing that. If this proves to be controversial, only exposing </span><code>memory_order_relaxed</code><span> would still be valuable.</span></p><h3 id="Formalization" data-id="Formalization"><a class="anchor hidden-xs" href="#Formalization" title="Formalization"><span class="octicon octicon-link"></span></a><span>Formalization</span></h3><p><span>Herd already support these for </span><code>STADD</code><span> on Arm, and the NVIDIA Volta Memory Model supports these for </span><code>red</code><span> on PTX. If we decide to pursue this exposure direction, this proposal should be blocked on extending Herd's</span><span class="ui-comment-inline-span"> RC11 with this extension to ensure it is sound.</span></p><h2 id="Wording" data-id="Wording"><a class="anchor hidden-xs" href="#Wording" title="Wording"><span class="octicon octicon-link"></span></a><span>Wording</span></h2><h3 id="Unsequenced-support" data-id="Unsequenced-support"><a class="anchor hidden-xs" href="#Unsequenced-support" title="Unsequenced-support"><span class="octicon octicon-link"></span></a><span>Unsequenced support</span></h3><p><span>Add to </span><a href="https://eel.is/c++draft/basic.exec#intro.execution-10" target="_blank" rel="noopener"><span>[intro.execution]</span></a><span>:</span></p><blockquote>
<p><span>Except where noted, evaluations of operands of individual operators and of subexpressions of individual expressions are unsequenced.</span><br>
<span>[Note 5: In an expression that is evaluated more than once during the execution of a program, unsequenced and indeterminately sequenced evaluations of its subexpressions need not be performed consistently in different evaluations. — end note]</span></p>
<p><span>The value computations of the operands of an operator are sequenced before the value computation of the result of the operator. If a side effect on a memory location ([intro.memory]) is unsequenced relative to either another side effect on the same memory location or a value computation using the value of any object in the same memory location, and they are not </span><ins><span>lock-free atomic read operations ([atomics]) or </span></ins><span>potentially concurrent ([intro.multithread]), the behavior is undefined.</span><br>
<span>[Note 6: The next subclause imposes similar, but more complex restrictions on potentially concurrent computations. — end note]</span></p>
</blockquote><blockquote>
<p><span>[Example 3:</span></p>
<pre><code class="cpp hljs"><span class="token keyword">void</span> <span class="token function">g</span><span class="token punctuation">(</span><span class="token keyword">int</span> i<span class="token punctuation">)</span> <span class="token punctuation">{</span>
 i <span class="token operator">=</span> <span class="token number">7</span><span class="token punctuation">,</span> i<span class="token operator">++</span><span class="token punctuation">,</span> i<span class="token operator">++</span><span class="token punctuation">;</span>              <span class="token comment">// i becomes 9</span>

 i <span class="token operator">=</span> i<span class="token operator">++</span> <span class="token operator">+</span> <span class="token number">1</span><span class="token punctuation">;</span>                  <span class="token comment">// the value of i is incremented</span>
 i <span class="token operator">=</span> i<span class="token operator">++</span> <span class="token operator">+</span> i<span class="token punctuation">;</span>                  <span class="token comment">// undefined behavior</span>
 i <span class="token operator">=</span> i <span class="token operator">+</span> <span class="token number">1</span><span class="token punctuation">;</span>                    <span class="token comment">// the value of i is incremented</span>
<span class="token punctuation">}</span>
</code></pre>
<p><span>— end example]</span></p>
</blockquote><p><span>Add to </span><a href="https://eel.is/c++draft/algorithms.parallel.defns#5" target="_blank" rel="noopener"><span>[algorithms.parallel.defns]</span></a><span>:</span></p><p><bdi><strong><span>Note</span></strong><span>: we should consider exempting stores as well.</span></bdi></p><blockquote>
<p><span>A standard library function is vectorization-unsafe if it is specified to synchronize with another function invocation, or another function invocation is specified to synchronize with it, and if it is not a memory allocation or deallocation function</span><ins><span class="ui-comment-inline-span">, or lock-free atomic reduction operation</span></ins><span class="ui-comment-inline-span">.</span></p>
<p><span>[Note 2: Implementations must ensure that internal synchronization inside standard library functions does not prevent forward progress when those functions are executed by threads of execution with weakly parallel forward progress guarantees. — end note]</span></p>
<p><span>[Example 2:</span></p>
<pre><code class="cpp hljs"><span class="token keyword">int</span> x <span class="token operator">=</span> <span class="token number">0</span><span class="token punctuation">;</span>
std<span class="token double-colon punctuation">::</span>mutex m<span class="token punctuation">;</span>
<span class="token keyword">void</span> <span class="token function">f</span><span class="token punctuation">(</span><span class="token punctuation">)</span> <span class="token punctuation">{</span>
  <span class="token keyword">int</span> a<span class="token punctuation">[</span><span class="token punctuation">]</span> <span class="token operator">=</span> <span class="token punctuation">{</span><span class="token number">1</span><span class="token punctuation">,</span><span class="token number">2</span><span class="token punctuation">}</span><span class="token punctuation">;</span>
  std<span class="token double-colon punctuation">::</span><span class="token function">for_each</span><span class="token punctuation">(</span>std<span class="token double-colon punctuation">::</span>execution<span class="token double-colon punctuation">::</span>par_unseq<span class="token punctuation">,</span> std<span class="token double-colon punctuation">::</span><span class="token function">begin</span><span class="token punctuation">(</span>a<span class="token punctuation">)</span><span class="token punctuation">,</span> std<span class="token double-colon punctuation">::</span><span class="token function">end</span><span class="token punctuation">(</span>a<span class="token punctuation">)</span><span class="token punctuation">,</span> <span class="token punctuation">[</span><span class="token operator">&amp;</span><span class="token punctuation">]</span><span class="token punctuation">(</span><span class="token keyword">int</span><span class="token punctuation">)</span> <span class="token punctuation">{</span>
    std<span class="token double-colon punctuation">::</span>lock_guard<span class="token operator">&lt;</span>mutex<span class="token operator">&gt;</span> <span class="token function">guard</span><span class="token punctuation">(</span>m<span class="token punctuation">)</span><span class="token punctuation">;</span> <span class="token comment">// incorrect: lock_guard constructor calls m.lock()</span>
    <span class="token operator">++</span>x<span class="token punctuation">;</span>
  <span class="token punctuation">}</span><span class="token punctuation">)</span><span class="token punctuation">;</span>
<span class="token punctuation">}</span>
</code></pre>
<p><span>The above program may result in two consecutive calls to m.lock() on the same thread of execution (which may deadlock), because the applications of the function object are not guaranteed to run on different threads of execution. — end example]</span></p>
</blockquote><h3 id="Forward-progress1" data-id="Forward-progress"><a class="anchor hidden-xs" href="#Forward-progress1" title="Forward-progress1"><span class="octicon octicon-link"></span></a><span>Forward progress</span></h3><p><span>Modify </span><a href="https://eel.is/c++draft/intro.progress#1" target="_blank" rel="noopener"><span>intro.progress#1</span></a><span> as follows:</span></p><p><bdi><strong><span>Note</span></strong><span>: we should consider exempting stores as well.</span></bdi></p><blockquote>
<p><span>The implementation may assume that any thread will eventually do one of the following:</span></p>
<ol>
<li><span>terminate,</span></li>
<li><span>make a call to a library I/O function,</span></li>
<li><span>perform an access through a volatile glvalue, or</span></li>
<li><span>perform a synchronization operation or an atomic operation </span><ins><span> that is not an atomic reduction operation</span></ins><span> .</span></li>
</ol>
<p><span>[Note 1: This is intended to allow compiler transformations such as removal of empty loops, even when termination cannot be proven. — end note]</span></p>
</blockquote><p><span>Modify </span><a href="https://eel.is/c++draft/intro.progress#3" target="_blank" rel="noopener"><span>intro.progress#3</span></a><span> as follows:</span></p><p><bdi><strong><span>Note</span></strong><span>: we should consider exempting stores as well.</span></bdi></p><blockquote>
<p><span>During the execution of a thread of execution, each of the following is termed an execution step:</span></p>
<ol>
<li><span>termination of the thread of execution,</span></li>
<li><span>performing an access through a volatile glvalue, or</span></li>
<li><span>completion of a call to a library I/O function, </span><ins><span>or</span></ins><span> a synchronization operation</span><del><span>,</span></del><span> or an atomic operation</span><ins><span> that is not an atomic reduction operation</span></ins><span>.</span></li>
</ol>
</blockquote><h3 id="No-acquire-sequences-support" data-id="No-acquire-sequences-support"><a class="anchor hidden-xs" href="#No-acquire-sequences-support" title="No-acquire-sequences-support"><span class="octicon octicon-link"></span></a><span>No acquire sequences support</span></h3><p><span>Modify </span><a href="https://eel.is/c++draft/atomics.fences" target="_blank" rel="noopener"><span>[atomics.fences]</span></a><span> as follows:</span></p><blockquote>
<p><span>33.5.11 Fences[atomics.fences]</span></p>
<ol>
<li><span>This subclause introduces synchronization primitives called fences. Fences can have acquire semantics, release semantics, or both. A fence with acquire semantics is called an acquire fence. A fence with release semantics is called a release fence.</span></li>
<li><span>A release fence A synchronizes with an acquire fence B if there exist atomic operation</span><del><span>s</span></del><span> X and </span><ins><span>non-reduction-atomic operation </span></ins><span>Y, both operating on some atomic object M, such that A is sequenced before X, X modifies M, Y is sequenced before B, </span><span class="ui-comment-inline-span">and Y reads the value written </span><span>by X or a value written by any side effect in the hypothetical release sequence X would head if it were a release operation.</span></li>
<li><span>A release fence A synchronizes with an atomic operation B that performs an acquire operation on an atomic object M if there exists an atomic operation X such that A is sequenced before X, X modifies M, and B reads the value written by X or a value written by any side effect in the hypothetical release sequence X would head if it were a release operation.</span></li>
<li><span>An atomic operation A that is a release operation on an atomic object M synchronizes with an acquire fence B if there exists some atomic operation X on M such that X is sequenced before B and reads the value written by A or a value written by any side effect in the release sequence headed by A.</span></li>
</ol>
</blockquote><h3 id="Atomic-Reduction-Operation-APIs" data-id="Atomic-Reduction-Operation-APIs"><a class="anchor hidden-xs" href="#Atomic-Reduction-Operation-APIs" title="Atomic-Reduction-Operation-APIs"><span class="octicon octicon-link"></span></a><span>Atomic Reduction Operation APIs</span></h3><p><span>The following operations perform arithmetic computations. The correspondence among key, operator, and computation is specified in </span><a href="https://eel.is/c++draft/atomics#tab:atomic.types.int.comp" target="_blank" rel="noopener"><span>Table 145</span></a><span>:</span></p><table>
<thead>
<tr>
<th><span>key</span></th>
<th><span>op</span></th>
<th><span>computation</span></th>
</tr>
</thead>
<tbody>
<tr>
<td><span>add</span></td>
<td><code>+</code></td>
<td><span>addition</span></td>
</tr>
<tr>
<td><span>sub</span></td>
<td><code>-</code></td>
<td><span>subtraction</span></td>
</tr>
<tr>
<td><span>max</span></td>
<td></td>
<td><span>maximum</span></td>
</tr>
<tr>
<td><span>min</span></td>
<td></td>
<td><span>minimum</span></td>
</tr>
<tr>
<td><span>and</span></td>
<td><code>&amp;</code></td>
<td><span>bitwise and</span></td>
</tr>
<tr>
<td><span>or</span></td>
<td><code>|</code></td>
<td><span>bitwise inclusive or</span></td>
</tr>
<tr>
<td><span>xor</span></td>
<td><code>^</code></td>
<td><span>bitwise exclusive or</span></td>
</tr>
</tbody>
</table><p><span>Add to </span><a href="https://eel.is/c++draft/atomics.ref.int" target="_blank" rel="noopener"><span>[atomics.ref.int]</span></a><span>:</span></p><pre><code><ins>void key(integral-type operand, memory_order order = memory_order_seq_cst) const noexcept;</ins></code></pre><ul>
<li><ins><em><span>Preconditions</span></em><span>: </span><code>order</code><span> is </span><code>memory_order_relaxed</code><span>, </span><code>memory_order_release</code><span>, or </span><code>memory_order_seq_cst</code><span>.</span></ins></li>
<li><ins><em><span>Effects</span></em><span>: Atomically replaces the value referenced by </span><code>*ptr</code><span> with the result of the computation applied to the value referenced by </span><code>*ptr</code><span> and the given </span><code>operand</code><span>. Memory is affected according to the value of </span><code>order</code><span>. These operations are atomic read-modify-write operations ([intro.races]) </span><strong><span>and atomic reduction operations ([atomics.fences]). Lock-free atomic reduction operations are vectorization-safe ([algorithms.parallel.defns])</span></strong><span>.</span></ins></li>
<li><ins><em><span>Remarks</span></em><span>: For signed integer types, the result is as if the object value and parameters were converted to their corresponding unsigned types, the computation performed on those types, and the result converted back to the signed type.</span><br>
<span>[Note 2: There are no undefined results arising from the computation. — end note]</span></ins></li>
<li><ins><em><span>Remarks</span></em><span>: For floating point types, </span><code>add</code><span> and </span><code>sub</code><span> perform </span><em><span>generalized</span></em><span> [numerics.defns] arithmetic.</span></ins></li>
</ul><p><span>Analogously for all other </span><code>std::atomic</code><span> and </span><code>std::atomic_ref</code><span> types and specializations: </span><a href="https://eel.is/c++draft/atomics.types.int" target="_blank" rel="noopener"><span>[atomics.types.int]</span></a><span>, [atomics.types.float], [atomics.types.pointer] (with </span><code>difference_type</code><span> operand), [atomics.ref.float], [atomics.ref.pointer] (with </span><code>difference_type</code><span> operand), etc.</span></p></div>
    <div class="ui-toc dropup unselectable hidden-print" style="display:none;">
        <div class="pull-right dropdown">
            <a id="tocLabel" class="ui-toc-label btn btn-default" data-toggle="dropdown" href="#" role="button" aria-haspopup="true" aria-expanded="false" title="Table of content">
                <i class="fa fa-bars"></i>
            </a>
            <ul id="ui-toc" class="ui-toc-dropdown dropdown-menu" aria-labelledby="tocLabel">
                <div class="toc"><ul class="nav">
<li><a href="#Atomic-Reduction-Operations" title="Atomic Reduction Operations">Atomic Reduction Operations</a><ul class="nav">
<li><a href="#Introduction" title="Introduction">Introduction</a></li>
<li><a href="#Motivation" title="Motivation">Motivation</a><ul class="nav">
<li><a href="#Hardware-Exposure" title="Hardware Exposure">Hardware Exposure</a></li>
<li><a href="#Performance" title="Performance">Performance</a></li>
</ul>
</li>
<li><a href="#Design" title="Design">Design</a><ul class="nav">
<li><a href="#Alternative-optimizations-for-fetch_ltkeygt" title="Alternative: optimizations for fetch_<key>">Alternative: optimizations for fetch_&lt;key&gt;</a></li>
<li><a href="#Forward-progress" title="Forward progress">Forward progress</a></li>
<li><a href="#Generalized-Atomic-Reduction-Operations" title="Generalized Atomic Reduction Operations">Generalized Atomic Reduction Operations</a></li>
<li><a href="#Memory-Ordering" title="Memory Ordering">Memory Ordering</a></li>
<li><a href="#Formalization" title="Formalization">Formalization</a></li>
</ul>
</li>
<li><a href="#Wording" title="Wording">Wording</a><ul class="nav">
<li><a href="#Unsequenced-support" title="Unsequenced support">Unsequenced support</a></li>
<li><a href="#Forward-progress1" title="Forward progress">Forward progress</a></li>
<li><a href="#No-acquire-sequences-support" title="No acquire sequences support">No acquire sequences support</a></li>
<li><a href="#Atomic-Reduction-Operation-APIs" title="Atomic Reduction Operation APIs">Atomic Reduction Operation APIs</a></li>
</ul>
</li>
</ul>
</li>
</ul>
</div><div class="toc-menu"><a class="expand-toggle" href="#">Expand all</a><a class="back-to-top" href="#">Back to top</a><a class="go-to-bottom" href="#">Go to bottom</a></div>
            </ul>
        </div>
    </div>
    <div id="ui-toc-affix" class="ui-affix-toc ui-toc-dropdown unselectable hidden-print" data-spy="affix" style="top:17px;display:none;"  >
        <div class="toc"><ul class="nav">
<li><a href="#Atomic-Reduction-Operations" title="Atomic Reduction Operations">Atomic Reduction Operations</a><ul class="nav">
<li><a href="#Introduction" title="Introduction">Introduction</a></li>
<li><a href="#Motivation" title="Motivation">Motivation</a><ul class="nav">
<li><a href="#Hardware-Exposure" title="Hardware Exposure">Hardware Exposure</a></li>
<li><a href="#Performance" title="Performance">Performance</a></li>
</ul>
</li>
<li><a href="#Design" title="Design">Design</a><ul class="nav">
<li><a href="#Alternative-optimizations-for-fetch_ltkeygt" title="Alternative: optimizations for fetch_<key>">Alternative: optimizations for fetch_&lt;key&gt;</a></li>
<li><a href="#Forward-progress" title="Forward progress">Forward progress</a></li>
<li><a href="#Generalized-Atomic-Reduction-Operations" title="Generalized Atomic Reduction Operations">Generalized Atomic Reduction Operations</a></li>
<li><a href="#Memory-Ordering" title="Memory Ordering">Memory Ordering</a></li>
<li><a href="#Formalization" title="Formalization">Formalization</a></li>
</ul>
</li>
<li><a href="#Wording" title="Wording">Wording</a><ul class="nav">
<li><a href="#Unsequenced-support" title="Unsequenced support">Unsequenced support</a></li>
<li><a href="#Forward-progress1" title="Forward progress">Forward progress</a></li>
<li><a href="#No-acquire-sequences-support" title="No acquire sequences support">No acquire sequences support</a></li>
<li><a href="#Atomic-Reduction-Operation-APIs" title="Atomic Reduction Operation APIs">Atomic Reduction Operation APIs</a></li>
</ul>
</li>
</ul>
</li>
</ul>
</div><div class="toc-menu"><a class="expand-toggle" href="#">Expand all</a><a class="back-to-top" href="#">Back to top</a><a class="go-to-bottom" href="#">Go to bottom</a></div>
    </div>
    <script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.1.1/jquery.min.js" integrity="sha256-hVVnYaiADRTO2PzUGmuLJr8BLUSjGIZsDYGmIJLv2b8=" crossorigin="anonymous"></script>
    <script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.3.7/js/bootstrap.min.js" integrity="sha256-U5ZEeKfGNOja007MMD3YBI0A3OSZOQbeG6z2f2Y0hu8=" crossorigin="anonymous" defer></script>
    <script src="https://cdnjs.cloudflare.com/ajax/libs/gist-embed/2.6.0/gist-embed.min.js" integrity="sha256-KyF2D6xPIJUW5sUDSs93vWyZm+1RzIpKCexxElmxl8g=" crossorigin="anonymous" defer></script>
    <script>
        var markdown = $(".markdown-body");
        //smooth all hash trigger scrolling
        function smoothHashScroll() {
            var hashElements = $("a[href^='#']").toArray();
            for (var i = 0; i < hashElements.length; i++) {
                var element = hashElements[i];
                var $element = $(element);
                var hash = element.hash;
                if (hash) {
                    $element.on('click', function (e) {
                        // store hash
                        var hash = this.hash;
                        if ($(hash).length <= 0) return;
                        // prevent default anchor click behavior
                        e.preventDefault();
                        // animate
                        $('body, html').stop(true, true).animate({
                            scrollTop: $(hash).offset().top
                        }, 100, "linear", function () {
                            // when done, add hash to url
                            // (default click behaviour)
                            window.location.hash = hash;
                        });
                    });
                }
            }
        }

        smoothHashScroll();
        var toc = $('.ui-toc');
        var tocAffix = $('.ui-affix-toc');
        var tocDropdown = $('.ui-toc-dropdown');
        //toc
        tocDropdown.click(function (e) {
            e.stopPropagation();
        });

        var enoughForAffixToc = true;

        function generateScrollspy() {
            $(document.body).scrollspy({
                target: ''
            });
            $(document.body).scrollspy('refresh');
            if (enoughForAffixToc) {
                toc.hide();
                tocAffix.show();
            } else {
                tocAffix.hide();
                toc.show();
            }
            $(document.body).scroll();
        }

        function windowResize() {
            //toc right
            var paddingRight = parseFloat(markdown.css('padding-right'));
            var right = ($(window).width() - (markdown.offset().left + markdown.outerWidth() - paddingRight));
            toc.css('right', right + 'px');
            //affix toc left
            var newbool;
            var rightMargin = (markdown.parent().outerWidth() - markdown.outerWidth()) / 2;
            //for ipad or wider device
            if (rightMargin >= 133) {
                newbool = true;
                var affixLeftMargin = (tocAffix.outerWidth() - tocAffix.width()) / 2;
                var left = markdown.offset().left + markdown.outerWidth() - affixLeftMargin;
                tocAffix.css('left', left + 'px');
            } else {
                newbool = false;
            }
            if (newbool != enoughForAffixToc) {
                enoughForAffixToc = newbool;
                generateScrollspy();
            }
        }
        $(window).resize(function () {
            windowResize();
        });
        $(document).ready(function () {
            windowResize();
            generateScrollspy();
        });

        //remove hash
        function removeHash() {
            window.location.hash = '';
        }

        var backtotop = $('.back-to-top');
        var gotobottom = $('.go-to-bottom');

        backtotop.click(function (e) {
            e.preventDefault();
            e.stopPropagation();
            if (scrollToTop)
                scrollToTop();
            removeHash();
        });
        gotobottom.click(function (e) {
            e.preventDefault();
            e.stopPropagation();
            if (scrollToBottom)
                scrollToBottom();
            removeHash();
        });

        var toggle = $('.expand-toggle');
        var tocExpand = false;

        checkExpandToggle();
        toggle.click(function (e) {
            e.preventDefault();
            e.stopPropagation();
            tocExpand = !tocExpand;
            checkExpandToggle();
        })

        function checkExpandToggle () {
            var toc = $('.ui-toc-dropdown .toc');
            var toggle = $('.expand-toggle');
            if (!tocExpand) {
                toc.removeClass('expand');
                toggle.text('Expand all');
            } else {
                toc.addClass('expand');
                toggle.text('Collapse all');
            }
        }

        function scrollToTop() {
            $('body, html').stop(true, true).animate({
                scrollTop: 0
            }, 100, "linear");
        }

        function scrollToBottom() {
            $('body, html').stop(true, true).animate({
                scrollTop: $(document.body)[0].scrollHeight
            }, 100, "linear");
        }
    </script>
</body>

</html>
