<!DOCTYPE html>

<html lang="en">

<head>
    <meta charset="utf-8">
    <meta http-equiv="X-UA-Compatible" content="IE=edge">
    <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=no">
    <meta name="apple-mobile-web-app-capable" content="yes">
    <meta name="apple-mobile-web-app-status-bar-style" content="black">
    <meta name="mobile-web-app-capable" content="yes">
    <title>
        barrier’s phase completion guarantees
    </title>
    <link rel="icon" type="image/ico" href="https://isocpp.org/favicon.ico">    

    <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.3.7/css/bootstrap.min.css" integrity="sha256-916EbMg70RQy9LHiGkXzG8hSg9EdNy97GazNG/aiY1w=" crossorigin="anonymous" />
    <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.7.0/css/font-awesome.min.css" integrity="sha256-eZrrJcwDc/3uDhsdt61sL2oOBY362qM3lon1gyExkL0=" crossorigin="anonymous" />
    <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/ionicons/2.0.1/css/ionicons.min.css" integrity="sha256-3iu9jgsy9TpTwXKb7bNQzqWekRX7pPK+2OLj3R922fo=" crossorigin="anonymous" />
    <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/octicons/3.5.0/octicons.min.css" integrity="sha256-QiWfLIsCT02Sdwkogf6YMiQlj4NE84MKkzEMkZnMGdg=" crossorigin="anonymous" />
    <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/prism/1.5.1/themes/prism.min.css" integrity="sha256-vtR0hSWRc3Tb26iuN2oZHt3KRUomwTufNIf5/4oeCyg=" crossorigin="anonymous" />
    <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@hackmd/emojify.js@2.1.0/dist/css/basic/emojify.min.css" integrity="sha256-UOrvMOsSDSrW6szVLe8ZDZezBxh5IoIfgTwdNDgTjiU=" crossorigin="anonymous" />
    <style>
        @import url(https://fonts.googleapis.com/css?family=Roboto:300,300i,400,400i,500,500i|Source+Code+Pro:300,400,500|Source+Sans+Pro:300,300i,400,400i,600,600i|Source+Serif+Pro&subset=latin-ext);.hljs{background:#fff;color:#333;display:block;overflow-x:auto;padding:.5em}.hljs-comment,.hljs-meta{color:#969896}.hljs-emphasis,.hljs-quote,.hljs-string,.hljs-strong,.hljs-template-variable,.hljs-variable{color:#df5000}.hljs-keyword,.hljs-selector-tag,.hljs-type{color:#a71d5d}.hljs-attribute,.hljs-bullet,.hljs-literal,.hljs-number,.hljs-symbol{color:#0086b3}.hljs-built_in,.hljs-builtin-name{color:#005cc5}.hljs-name,.hljs-section{color:#63a35c}.hljs-tag{color:#333}.hljs-attr,.hljs-selector-attr,.hljs-selector-class,.hljs-selector-id,.hljs-selector-pseudo,.hljs-title{color:#795da3}.hljs-addition{background-color:#eaffea;color:#55a532}.hljs-deletion{background-color:#ffecec;color:#bd2c00}.hljs-link{text-decoration:underline}.markdown-body{word-wrap:break-word;font-size:16px;line-height:1.5}.markdown-body:after,.markdown-body:before{content:"";display:table}.markdown-body:after{clear:both}.markdown-body>:first-child{margin-top:0!important}.markdown-body>:last-child{margin-bottom:0!important}.markdown-body a:not([href]){color:inherit;text-decoration:none}.markdown-body .absent{color:#c00}.markdown-body .anchor{float:left;line-height:1;margin-left:-20px;padding-right:4px}.markdown-body .anchor:focus{outline:none}.markdown-body blockquote,.markdown-body dl,.markdown-body ol,.markdown-body p,.markdown-body pre,.markdown-body table,.markdown-body ul{margin-bottom:16px;margin-top:0}.markdown-body hr{background-color:#e7e7e7;border:0;height:.25em;margin:24px 0;padding:0}.markdown-body blockquote{border-left:.25em solid #ddd;color:#777;font-size:16px;padding:0 1em}.markdown-body blockquote>:first-child{margin-top:0}.markdown-body blockquote>:last-child{margin-bottom:0}.markdown-body kbd,.popover kbd{background-color:#fcfcfc;border:1px solid;border-color:#ccc #ccc #bbb;border-radius:3px;box-shadow:inset 0 -1px 0 #bbb;color:#555;display:inline-block;font-size:11px;line-height:10px;padding:3px 5px;vertical-align:middle}.markdown-body .loweralpha{list-style-type:lower-alpha}.markdown-body h1,.markdown-body h2,.markdown-body h3,.markdown-body h4,.markdown-body h5,.markdown-body h6{font-weight:600;line-height:1.25;margin-bottom:16px;margin-top:24px}.markdown-body h1 .octicon-link,.markdown-body h2 .octicon-link,.markdown-body h3 .octicon-link,.markdown-body h4 .octicon-link,.markdown-body h5 .octicon-link,.markdown-body h6 .octicon-link{color:#000;vertical-align:middle;visibility:hidden}.markdown-body h1:hover .anchor,.markdown-body h2:hover .anchor,.markdown-body h3:hover .anchor,.markdown-body h4:hover .anchor,.markdown-body h5:hover .anchor,.markdown-body h6:hover .anchor{text-decoration:none}.markdown-body h1:hover .anchor .octicon-link,.markdown-body h2:hover .anchor .octicon-link,.markdown-body h3:hover .anchor .octicon-link,.markdown-body h4:hover .anchor .octicon-link,.markdown-body h5:hover .anchor .octicon-link,.markdown-body h6:hover .anchor .octicon-link{visibility:visible}.markdown-body h1 code,.markdown-body h1 tt,.markdown-body h2 code,.markdown-body h2 tt,.markdown-body h3 code,.markdown-body h3 tt,.markdown-body h4 code,.markdown-body h4 tt,.markdown-body h5 code,.markdown-body h5 tt,.markdown-body h6 code,.markdown-body h6 tt{font-size:inherit}.markdown-body h1{font-size:2em}.markdown-body h1,.markdown-body h2{border-bottom:1px solid #eee;padding-bottom:.3em}.markdown-body h2{font-size:1.5em}.markdown-body h3{font-size:1.25em}.markdown-body h4{font-size:1em}.markdown-body h5{font-size:.875em}.markdown-body h6{color:#777;font-size:.85em}.markdown-body ol,.markdown-body ul{padding-left:2em}.markdown-body ol.no-list,.markdown-body ul.no-list{list-style-type:none;padding:0}.markdown-body ol ol,.markdown-body ol ul,.markdown-body ul ol,.markdown-body ul ul{margin-bottom:0;margin-top:0}.markdown-body li>p{margin-top:16px}.markdown-body li+li{padding-top:.25em}.markdown-body dl{padding:0}.markdown-body dl dt{font-size:1em;font-style:italic;font-weight:700;margin-top:16px;padding:0}.markdown-body dl dd{margin-bottom:16px;padding:0 16px}.markdown-body table{display:block;overflow:auto;width:100%;word-break:normal;word-break:keep-all}.markdown-body table th{font-weight:700}.markdown-body table td,.markdown-body table th{border:1px solid #ddd;padding:6px 13px}.markdown-body table tr{background-color:#fff;border-top:1px solid #ccc}.markdown-body table tr:nth-child(2n){background-color:#f8f8f8}.markdown-body img{background-color:#fff;box-sizing:initial;max-width:100%}.markdown-body img[align=right]{padding-left:20px}.markdown-body img[align=left]{padding-right:20px}.markdown-body .emoji{background-color:initial;max-width:none;vertical-align:text-top}.markdown-body span.frame{display:block;overflow:hidden}.markdown-body span.frame>span{border:1px solid #ddd;display:block;float:left;margin:13px 0 0;overflow:hidden;padding:7px;width:auto}.markdown-body span.frame span img{display:block;float:left}.markdown-body span.frame span span{clear:both;color:#333;display:block;padding:5px 0 0}.markdown-body span.align-center{clear:both;display:block;overflow:hidden}.markdown-body span.align-center>span{display:block;margin:13px auto 0;overflow:hidden;text-align:center}.markdown-body span.align-center span img{margin:0 auto;text-align:center}.markdown-body span.align-right{clear:both;display:block;overflow:hidden}.markdown-body span.align-right>span{display:block;margin:13px 0 0;overflow:hidden;text-align:right}.markdown-body span.align-right span img{margin:0;text-align:right}.markdown-body span.float-left{display:block;float:left;margin-right:13px;overflow:hidden}.markdown-body span.float-left span{margin:13px 0 0}.markdown-body span.float-right{display:block;float:right;margin-left:13px;overflow:hidden}.markdown-body span.float-right>span{display:block;margin:13px auto 0;overflow:hidden;text-align:right}.markdown-body code,.markdown-body tt{background-color:#0000000a;border-radius:3px;font-size:85%;margin:0;padding:.2em 0}.markdown-body code:after,.markdown-body code:before,.markdown-body tt:after,.markdown-body tt:before{content:"\00a0";letter-spacing:-.2em}.markdown-body code br,.markdown-body tt br{display:none}.markdown-body del code{text-decoration:inherit}.markdown-body pre{word-wrap:normal}.markdown-body pre>code{background:#0000;border:0;font-size:100%;margin:0;padding:0;white-space:pre;word-break:normal}.markdown-body .highlight{margin-bottom:16px}.markdown-body .highlight pre{margin-bottom:0;word-break:normal}.markdown-body .highlight pre,.markdown-body pre{background-color:#f7f7f7;border-radius:3px;font-size:85%;line-height:1.45;overflow:auto;padding:16px}.markdown-body pre code,.markdown-body pre tt{word-wrap:normal;background-color:initial;border:0;display:inline;line-height:inherit;margin:0;max-width:auto;overflow:visible;padding:0}.markdown-body pre code:after,.markdown-body pre code:before,.markdown-body pre tt:after,.markdown-body pre tt:before{content:normal}.markdown-body .csv-data td,.markdown-body .csv-data th{font-size:12px;line-height:1;overflow:hidden;padding:5px;text-align:left;white-space:nowrap}.markdown-body .csv-data .blob-line-num{background:#fff;border:0;padding:10px 8px 9px;text-align:right}.markdown-body .csv-data tr{border-top:0}.markdown-body .csv-data th{background:#f8f8f8;border-top:0;font-weight:700}.news .alert .markdown-body blockquote{border:0;padding:0 0 0 40px}.activity-tab .news .alert .commits,.activity-tab .news .markdown-body blockquote{padding-left:0}.task-list-item{list-style-type:none}.task-list-item label{font-weight:400}.task-list-item.enabled label{cursor:pointer}.task-list-item+.task-list-item{margin-top:3px}.task-list-item-checkbox{cursor:default!important;float:left;margin:.31em 0 .2em -1.3em!important;vertical-align:middle}.markdown-body{max-width:758px;overflow:visible!important;padding-bottom:40px;padding-top:40px;position:relative}.markdown-body .emoji{vertical-align:top}.markdown-body pre{border:inherit!important}.markdown-body code{color:inherit!important}.markdown-body pre code .wrapper{display:-moz-inline-flex;display:-ms-inline-flex;display:-o-inline-flex;display:inline-flex}.markdown-body pre code .gutter{float:left;overflow:hidden;-webkit-user-select:none;user-select:none}.markdown-body pre code .gutter.linenumber{border-right:3px solid #6ce26c!important;box-sizing:initial;color:#afafaf!important;cursor:default;display:inline-block;min-width:20px;padding:0 8px 0 0;position:relative;text-align:right;z-index:4}.markdown-body pre code .gutter.linenumber>span:before{content:attr(data-linenumber)}.markdown-body pre code .code{float:left;margin:0 0 0 16px}.markdown-body .gist .line-numbers{border-bottom:none;border-left:none;border-top:none}.markdown-body .gist .line-data{border:none}.markdown-body .gist table{border-collapse:inherit!important;border-spacing:0}.markdown-body code[data-gist-id]{background:none;padding:0}.markdown-body code[data-gist-id]:after,.markdown-body code[data-gist-id]:before{content:""}.markdown-body code[data-gist-id] .blob-num{border:unset}.markdown-body code[data-gist-id] table{margin-bottom:unset;overflow:unset}.markdown-body code[data-gist-id] table tr{background:unset}.markdown-body[dir=rtl] pre{direction:ltr}.markdown-body[dir=rtl] code{direction:ltr;unicode-bidi:embed}.markdown-body .alert>p:last-child{margin-bottom:0}.markdown-body pre.abc,.markdown-body pre.flow-chart,.markdown-body pre.graphviz,.markdown-body pre.mermaid,.markdown-body pre.sequence-diagram,.markdown-body pre.vega{background-color:inherit;border-radius:0;overflow:visible;text-align:center;white-space:inherit}.markdown-body pre.abc>code,.markdown-body pre.flow-chart>code,.markdown-body pre.graphviz>code,.markdown-body pre.mermaid>code,.markdown-body pre.sequence-diagram>code,.markdown-body pre.vega>code{text-align:left}.markdown-body pre.abc>svg,.markdown-body pre.flow-chart>svg,.markdown-body pre.graphviz>svg,.markdown-body pre.mermaid>svg,.markdown-body pre.sequence-diagram>svg,.markdown-body pre.vega>svg{height:100%;max-width:100%}.markdown-body pre>code.wrap{word-wrap:break-word;white-space:pre-wrap;white-space:-moz-pre-wrap;white-space:-pre-wrap;white-space:-o-pre-wrap}.markdown-body .alert>p:last-child,.markdown-body .alert>ul:last-child{margin-bottom:0}.markdown-body summary{display:list-item}.markdown-body summary:focus{outline:none}.markdown-body details summary{cursor:pointer}.markdown-body details:not([open])>:not(summary){display:none}.markdown-body figure{margin:1em 40px}.markdown-body .mark,.markdown-body mark{background-color:#fff1a7}.vimeo,.youtube{background-color:#000;background-position:50%;background-repeat:no-repeat;background-size:contain;cursor:pointer;display:table;overflow:hidden;text-align:center}.vimeo,.youtube{position:relative;width:100%}.youtube{padding-bottom:56.25%}.vimeo img{object-fit:contain;width:100%;z-index:0}.youtube img{object-fit:cover;z-index:0}.vimeo iframe,.youtube iframe,.youtube img{height:100%;left:0;position:absolute;top:0;width:100%}.vimeo iframe,.youtube iframe{vertical-align:middle;z-index:1}.vimeo .icon,.youtube .icon{color:#fff;height:auto;left:50%;opacity:.3;position:absolute;top:50%;transform:translate(-50%,-50%);transition:opacity .2s;width:auto;z-index:0}.vimeo:hover .icon,.youtube:hover .icon{opacity:.6;transition:opacity .2s}.slideshare .inner,.speakerdeck .inner{position:relative;width:100%}.slideshare .inner iframe,.speakerdeck .inner iframe{bottom:0;height:100%;left:0;position:absolute;right:0;top:0;width:100%}.figma{display:table;padding-bottom:56.25%;position:relative;width:100%}.figma iframe{border:1px solid #eee;bottom:0;height:100%;left:0;position:absolute;right:0;top:0;width:100%}.markmap-container{height:300px}.markmap-container>svg{height:100%;width:100%}.MJX_Assistive_MathML{display:none}#MathJax_Message{z-index:1000!important}.ui-infobar{color:#777;margin:25px auto -25px;max-width:760px;position:relative;z-index:2}.toc .invisable-node{list-style-type:none}.ui-toc{bottom:20px;position:fixed;z-index:998}.ui-toc.both-mode{margin-left:8px}.ui-toc.both-mode .ui-toc-label{border-bottom-left-radius:0;border-top-left-radius:0;height:40px;padding:10px 4px}.ui-toc-label{background-color:#e6e6e6;border:none;color:#868686;transition:opacity .2s}.ui-toc .open .ui-toc-label{color:#fff;opacity:1;transition:opacity .2s}.ui-toc-label:focus{background-color:#ccc;color:#000;opacity:.3}.ui-toc-label:hover{background-color:#ccc;opacity:1;transition:opacity .2s}.ui-toc-dropdown{margin-bottom:20px;margin-top:20px;max-height:70vh;max-width:45vw;overflow:auto;padding-left:10px;padding-right:10px;text-align:inherit;width:25vw}.ui-toc-dropdown>.toc{max-height:calc(70vh - 100px);overflow:auto}.ui-toc-dropdown[dir=rtl] .nav{letter-spacing:.0029em;padding-right:0}.ui-toc-dropdown a{overflow:hidden;text-overflow:ellipsis;white-space:pre}.ui-toc-dropdown .nav>li>a{color:#767676;display:block;font-size:13px;font-weight:500;padding:4px 20px}.ui-toc-dropdown .nav>li:first-child:last-child>ul,.ui-toc-dropdown .toc.expand ul{display:block}.ui-toc-dropdown .nav>li>a:focus,.ui-toc-dropdown .nav>li>a:hover{background-color:initial;border-left:1px solid #000;color:#000;padding-left:19px;text-decoration:none}.ui-toc-dropdown[dir=rtl] .nav>li>a:focus,.ui-toc-dropdown[dir=rtl] .nav>li>a:hover{border-left:none;border-right:1px solid #000;padding-right:19px}.ui-toc-dropdown .nav>.active:focus>a,.ui-toc-dropdown .nav>.active:hover>a,.ui-toc-dropdown .nav>.active>a{background-color:initial;border-left:2px solid #000;color:#000;font-weight:700;padding-left:18px}.ui-toc-dropdown[dir=rtl] .nav>.active:focus>a,.ui-toc-dropdown[dir=rtl] .nav>.active:hover>a,.ui-toc-dropdown[dir=rtl] .nav>.active>a{border-left:none;border-right:2px solid #000;padding-right:18px}.ui-toc-dropdown .nav .nav{display:none;padding-bottom:10px}.ui-toc-dropdown .nav>.active>ul{display:block}.ui-toc-dropdown .nav .nav>li>a{font-size:12px;font-weight:400;padding-bottom:1px;padding-left:30px;padding-top:1px}.ui-toc-dropdown[dir=rtl] .nav .nav>li>a{padding-right:30px}.ui-toc-dropdown .nav .nav>li>ul>li>a{font-size:12px;font-weight:400;padding-bottom:1px;padding-left:40px;padding-top:1px}.ui-toc-dropdown[dir=rtl] .nav .nav>li>ul>li>a{padding-right:40px}.ui-toc-dropdown .nav .nav>li>a:focus,.ui-toc-dropdown .nav .nav>li>a:hover{padding-left:29px}.ui-toc-dropdown[dir=rtl] .nav .nav>li>a:focus,.ui-toc-dropdown[dir=rtl] .nav .nav>li>a:hover{padding-right:29px}.ui-toc-dropdown .nav .nav>li>ul>li>a:focus,.ui-toc-dropdown .nav .nav>li>ul>li>a:hover{padding-left:39px}.ui-toc-dropdown[dir=rtl] .nav .nav>li>ul>li>a:focus,.ui-toc-dropdown[dir=rtl] .nav .nav>li>ul>li>a:hover{padding-right:39px}.ui-toc-dropdown .nav .nav>.active:focus>a,.ui-toc-dropdown .nav .nav>.active:hover>a,.ui-toc-dropdown .nav .nav>.active>a{font-weight:500;padding-left:28px}.ui-toc-dropdown[dir=rtl] .nav .nav>.active:focus>a,.ui-toc-dropdown[dir=rtl] .nav .nav>.active:hover>a,.ui-toc-dropdown[dir=rtl] .nav .nav>.active>a{padding-right:28px}.ui-toc-dropdown .nav .nav>.active>.nav>.active:focus>a,.ui-toc-dropdown .nav .nav>.active>.nav>.active:hover>a,.ui-toc-dropdown .nav .nav>.active>.nav>.active>a{font-weight:500;padding-left:38px}.ui-toc-dropdown[dir=rtl] .nav .nav>.active>.nav>.active:focus>a,.ui-toc-dropdown[dir=rtl] .nav .nav>.active>.nav>.active:hover>a,.ui-toc-dropdown[dir=rtl] .nav .nav>.active>.nav>.active>a{padding-right:38px}.markdown-body{font-family:-apple-system,BlinkMacSystemFont,Segoe UI,Helvetica Neue,Helvetica,Roboto,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol}html[lang^=ja] .markdown-body{font-family:-apple-system,BlinkMacSystemFont,Segoe UI,Helvetica Neue,Helvetica,Roboto,Arial,Hiragino Kaku Gothic Pro,ヒラギノ角ゴ Pro W3,Osaka,Meiryo,メイリオ,MS Gothic,ＭＳ ゴシック,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol}html[lang=zh-tw] .markdown-body{font-family:-apple-system,BlinkMacSystemFont,Segoe UI,Helvetica Neue,Helvetica,Roboto,Arial,PingFang TC,Microsoft JhengHei,微軟正黑,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol}html[lang=zh-cn] .markdown-body{font-family:-apple-system,BlinkMacSystemFont,Segoe UI,Helvetica Neue,Helvetica,Roboto,Arial,PingFang SC,Microsoft YaHei,微软雅黑,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol}html .markdown-body[lang^=ja]{font-family:-apple-system,BlinkMacSystemFont,Segoe UI,Helvetica Neue,Helvetica,Roboto,Arial,Hiragino Kaku Gothic Pro,ヒラギノ角ゴ Pro W3,Osaka,Meiryo,メイリオ,MS Gothic,ＭＳ ゴシック,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol}html .markdown-body[lang=zh-tw]{font-family:-apple-system,BlinkMacSystemFont,Segoe UI,Helvetica Neue,Helvetica,Roboto,Arial,PingFang TC,Microsoft JhengHei,微軟正黑,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol}html .markdown-body[lang=zh-cn]{font-family:-apple-system,BlinkMacSystemFont,Segoe UI,Helvetica Neue,Helvetica,Roboto,Arial,PingFang SC,Microsoft YaHei,微软雅黑,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol}html[lang^=ja] .ui-toc-dropdown{font-family:Source Sans Pro,Helvetica,Arial,Meiryo UI,MS PGothic,ＭＳ Ｐゴシック,sans-serif}html[lang=zh-tw] .ui-toc-dropdown{font-family:Source Sans Pro,Helvetica,Arial,Microsoft JhengHei UI,微軟正黑UI,sans-serif}html[lang=zh-cn] .ui-toc-dropdown{font-family:Source Sans Pro,Helvetica,Arial,Microsoft YaHei UI,微软雅黑UI,sans-serif}html .ui-toc-dropdown[lang^=ja]{font-family:Source Sans Pro,Helvetica,Arial,Meiryo UI,MS PGothic,ＭＳ Ｐゴシック,sans-serif}html .ui-toc-dropdown[lang=zh-tw]{font-family:Source Sans Pro,Helvetica,Arial,Microsoft JhengHei UI,微軟正黑UI,sans-serif}html .ui-toc-dropdown[lang=zh-cn]{font-family:Source Sans Pro,Helvetica,Arial,Microsoft YaHei UI,微软雅黑UI,sans-serif}.ui-affix-toc{max-height:70vh;max-width:15vw;overflow:auto;position:fixed;top:0}.back-to-top,.expand-toggle,.go-to-bottom{color:#999;display:block;font-size:12px;font-weight:500;margin-left:10px;margin-top:10px;padding:4px 10px}.back-to-top:focus,.back-to-top:hover,.expand-toggle:focus,.expand-toggle:hover,.go-to-bottom:focus,.go-to-bottom:hover{color:#563d7c;text-decoration:none}.back-to-top,.go-to-bottom{margin-top:0}.ui-user-icon{background-position:50%;background-repeat:no-repeat;background-size:cover;border-radius:50%;display:block;height:20px;margin-bottom:2px;margin-right:5px;margin-top:2px;width:20px}.ui-user-icon.small{display:inline-block;height:18px;margin:0 0 .2em;vertical-align:middle;width:18px}.ui-infobar>small>span{line-height:22px}.ui-infobar>small .dropdown{display:inline-block}.ui-infobar>small .dropdown a:focus,.ui-infobar>small .dropdown a:hover{text-decoration:none}.ui-more-info{color:#888;cursor:pointer;vertical-align:middle}.ui-more-info .fa{font-size:16px}.ui-connectedGithub,.ui-published-note{color:#888}.ui-connectedGithub{line-height:23px;white-space:nowrap}.ui-connectedGithub a.file-path{color:#888;padding-left:22px;text-decoration:none}.ui-connectedGithub a.file-path:active,.ui-connectedGithub a.file-path:hover{color:#888;text-decoration:underline}.ui-connectedGithub .fa{font-size:20px}.ui-published-note .fa{font-size:20px;vertical-align:top}.unselectable{-webkit-user-select:none;-o-user-select:none;user-select:none}.selectable{-webkit-user-select:text;-o-user-select:text;user-select:text}.inline-spoiler-section{cursor:pointer}.inline-spoiler-section .spoiler-text{background-color:#333;border-radius:2px}.inline-spoiler-section .spoiler-text>*{opacity:0}.inline-spoiler-section .spoiler-img{filter:blur(10px)}.inline-spoiler-section.raw{background-color:#333;border-radius:2px}.inline-spoiler-section.raw>*{opacity:0}.inline-spoiler-section.unveil{cursor:auto}.inline-spoiler-section.unveil .spoiler-text{background-color:#3333331a}.inline-spoiler-section.unveil .spoiler-text>*{opacity:1}.inline-spoiler-section.unveil .spoiler-img{filter:none}@media print{blockquote,div,img,pre,table{page-break-inside:avoid!important}a[href]:after{font-size:12px!important}}.markdown-body.slides{color:#222;position:relative;z-index:1}.markdown-body.slides:before{background-color:currentColor;bottom:0;box-shadow:0 0 0 50vw;content:"";display:block;left:0;position:absolute;right:0;top:0;z-index:-1}.markdown-body.slides section[data-markdown]{background-color:#fff;margin-bottom:1.5em;position:relative;text-align:center}.markdown-body.slides section[data-markdown] code{text-align:left}.markdown-body.slides section[data-markdown]:before{content:"";display:block;padding-bottom:56.23%}.markdown-body.slides section[data-markdown]>div:first-child{left:1em;max-height:100%;overflow:hidden;position:absolute;right:1em;top:50%;transform:translateY(-50%)}.markdown-body.slides section[data-markdown]>ul{display:inline-block}.markdown-body.slides>section>section+section:after{border:3px solid #777;content:"";height:1.5em;position:absolute;right:1em;top:-1.5em}.site-ui-font{font-family:Source Sans Pro,Helvetica,Arial,sans-serif}html[lang^=ja] .site-ui-font{font-family:Source Sans Pro,Helvetica,Arial,Hiragino Kaku Gothic Pro,ヒラギノ角ゴ Pro W3,Osaka,Meiryo,メイリオ,MS Gothic,ＭＳ ゴシック,sans-serif}html[lang=zh-tw] .site-ui-font{font-family:Source Sans Pro,Helvetica,Arial,PingFang TC,Microsoft JhengHei,微軟正黑,sans-serif}html[lang=zh-cn] .site-ui-font{font-family:Source Sans Pro,Helvetica,Arial,PingFang SC,Microsoft YaHei,微软雅黑,sans-serif}body{font-smoothing:subpixel-antialiased!important;-webkit-font-smoothing:subpixel-antialiased!important;-moz-osx-font-smoothing:auto!important;-webkit-overflow-scrolling:touch;font-family:Source Sans Pro,Helvetica,Arial,sans-serif;letter-spacing:.025em}html[lang^=ja] body{font-family:Source Sans Pro,Helvetica,Arial,Hiragino Kaku Gothic Pro,ヒラギノ角ゴ Pro W3,Osaka,Meiryo,メイリオ,MS Gothic,ＭＳ ゴシック,sans-serif}html[lang=zh-tw] body{font-family:Source Sans Pro,Helvetica,Arial,PingFang TC,Microsoft JhengHei,微軟正黑,sans-serif}html[lang=zh-cn] body{font-family:Source Sans Pro,Helvetica,Arial,PingFang SC,Microsoft YaHei,微软雅黑,sans-serif}abbr[title]{border-bottom:none;text-decoration:underline;-webkit-text-decoration:underline dotted;text-decoration:underline dotted}abbr[data-original-title],abbr[title]{cursor:help}body.modal-open{overflow-y:auto;padding-right:0!important}svg{text-shadow:none}
    </style>
    <!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media queries -->
    <!-- WARNING: Respond.js doesn't work if you view the page via file:// -->
    <!--[if lt IE 9]>
    	<script src="https://cdnjs.cloudflare.com/ajax/libs/html5shiv/3.7.3/html5shiv.min.js" integrity="sha256-3Jy/GbSLrg0o9y5Z5n1uw0qxZECH7C6OQpVBgNFYa0g=" crossorigin="anonymous"></script>
    	<script src="https://cdnjs.cloudflare.com/ajax/libs/respond.js/1.4.2/respond.min.js" integrity="sha256-g6iAfvZp+nDQ2TdTR/VVKJf3bGro4ub5fvWSWVRi2NE=" crossorigin="anonymous"></script>
		<script src="https://cdnjs.cloudflare.com/ajax/libs/es5-shim/4.5.9/es5-shim.min.js" integrity="sha256-8E4Is26QH0bD52WoQpcB+R/tcWQtpzlCojrybUd7Mxo=" crossorigin="anonymous"></script>
    <![endif]-->
</head>

<body>
    <div id="doc" class="markdown-body container-fluid comment-inner comment-enabled" data-hard-breaks="true"><p><span>Document Number: P2588R2</span><br>
<span>Date: 2022-11-7</span><br>
<span>Reply to: Gonzalo Brito Gadeschi &lt;gonzalob _at_ </span><a href="http://nvidia.com" target="_blank" rel="noopener"><span>nvidia.com</span></a><span>&gt;</span><br>
<span>Authors: Gonzalo Brito Gadeschi, Eric Niebler, Anthony Williams, Thomas Rodgers</span><br>
<span>Audience: LEWG</span></p><style>
ins {
    color:green; 
    background-color:yellow;
    text-decoration:underline;
}
del { 
    color:red;
    background-color:yellow;
    text-decoration:line-through;
}
    .markdown-body { 
    max-width: 1000px; 
}
</style><h1 id="barrier’s-phase-completion-guarantees" data-id="barrier’s-phase-completion-guarantees"><a class="anchor hidden-xs" href="#barrier’s-phase-completion-guarantees" title="barrier’s-phase-completion-guarantees"><span class="octicon octicon-link"></span></a><code>barrier</code><span>’s phase completion guarantees</span></h1><p><span class="toc"><ul>
<li><a href="#barrier’s-phase-completion-guarantees" title="barrier’s phase completion guarantees">barrier’s phase completion guarantees</a><ul>
<li><a href="#Abstract" title="Abstract">Abstract</a></li>
<li><a href="#Introduction" title="Introduction">Introduction</a></li>
<li><a href="#Analysis-of-barrier-semantics" title="Analysis of barrier semantics">Analysis of barrier semantics</a><ul>
<li><a href="#On-hardware-acceleration" title="On hardware acceleration">On hardware acceleration</a></li>
<li><a href="#Impact-on-existing-implementations" title="Impact on existing implementations">Impact on existing implementations</a></li>
<li><a href="#Largest-possible-semantic-change" title="Largest possible semantic change">Largest possible semantic change</a></li>
<li><a href="#Potential-impact-on-implementations" title="Potential impact on implementations">Potential impact on implementations</a></li>
<li><a href="#Impact-on-existing-applications" title="Impact on existing applications">Impact on existing applications</a></li>
<li><a href="#Potential-impact-on-applications" title="Potential impact on applications">Potential impact on applications</a></li>
</ul>
</li>
<li><a href="#Suggestion" title="Suggestion">Suggestion</a><ul>
<li><a href="#Suggested-wording" title="Suggested wording">Suggested wording</a></li>
</ul>
</li>
<li><a href="#Acknowledgements" title="Acknowledgements">Acknowledgements</a></li>
</ul>
</li>
</ul>
</span></p><p><strong><span>Revisions</span></strong></p><ul>
<li><span>Revision 2:</span>
<ul>
<li>
<p><span>Discussed R1 at SG1 2022-11-07. The following polls were taken.</span></p>
<ul>
<li><strong><span>Poll 1</span></strong><span>: In response to DE-135 and US 63-131, we move to apply the change suggested in P2588R1.</span></li>
</ul>
<table>
<thead>
<tr>
<th><span>SF</span></th>
<th><span>F</span></th>
<th><span>N</span></th>
<th><span>A</span></th>
<th><span>SA</span></th>
</tr>
</thead>
<tbody>
<tr>
<td><span>2</span></td>
<td><span>3</span></td>
<td><span>6</span></td>
<td><span>6</span></td>
<td><span>0</span></td>
</tr>
</tbody>
</table>
<p><span>No consensus.</span></p>
<ul>
<li><strong><span>Poll 2</span></strong><span>: In response to DE-135 and US 63-131, we move to apply the change suggested in P2588R1 with the words “, or it is a new thread” removed.</span></li>
</ul>
<table>
<thead>
<tr>
<th><span>SF</span></th>
<th><span>F</span></th>
<th><span>N</span></th>
<th><span>A</span></th>
<th><span>SA</span></th>
</tr>
</thead>
<tbody>
<tr>
<td><span>4</span></td>
<td><span>12</span></td>
<td><span>2</span></td>
<td><span>0</span></td>
<td><span>0</span></td>
</tr>
</tbody>
</table>
<p><span>Consensus.</span></p>
<ul>
<li><strong><span>Poll 3</span></strong><span>: In response to DE-135 and US 63-131, we decline to apply the change suggested in P2588R1.</span></li>
</ul>
<p><span>Unanimous consent to not take this poll</span></p>
</li>
<li>
<p><span>Updated to forbid the implementation from running the </span><code>CompletionFunction</code><span> on a new thread by removing the following statement from the proposed wording: </span><code>, or it is a new thread.</code><span>.</span></p>
</li>
</ul>
</li>
<li><span>Revision 1:</span>
<ul>
<li><span>Updated wording, removed optional changes.</span></li>
<li><span>From the SG1 polls, LEWG should give feedback on whether we want to apply this change to </span><code>std::barrier</code><span> in the earliest standard, and whether we should consider doing so as a defect report.</span></li>
</ul>
</li>
<li><span>Revision 0:</span>
<ul>
<li>
<p><span>Polls taken at SG1 2022-08-29 meeting:</span></p>
<ul>
<li><strong><span>Poll 1</span></strong><span>: We like the relaxed phase completion guarantees on std::barrier as in the suggested wording of P2588 (with the small changes discussed).</span></li>
</ul>
<table>
<thead>
<tr>
<th><span>SF</span></th>
<th><span>F</span></th>
<th><span>N</span></th>
<th><span>A</span></th>
<th><span>SA</span></th>
</tr>
</thead>
<tbody>
<tr>
<td><span>4</span></td>
<td><span>2</span></td>
<td><span>0</span></td>
<td><span>0</span></td>
<td><span>0</span></td>
</tr>
</tbody>
</table>
</li>
<li>
<p><strong><span>Poll 2</span></strong><span>: We want to apply this change to std::barrier in the earliest Standard.</span></p>
<table>
<thead>
<tr>
<th><span>SF</span></th>
<th><span>F</span></th>
<th><span>N</span></th>
<th><span>A</span></th>
<th><span>SA</span></th>
</tr>
</thead>
<tbody>
<tr>
<td><span>5</span></td>
<td><span>0</span></td>
<td><span>1</span></td>
<td><span>0</span></td>
<td><span>0</span></td>
</tr>
</tbody>
</table>
</li>
<li>
<p><strong><span>Poll 3</span></strong><span>: We want to get a DR on C++ for this change.</span></p>
<table>
<thead>
<tr>
<th><span>SF</span></th>
<th><span>F</span></th>
<th><span>N</span></th>
<th><span>A</span></th>
<th><span>SA</span></th>
</tr>
</thead>
<tbody>
<tr>
<td><span>2</span></td>
<td><span>2</span></td>
<td><span>1</span></td>
<td><span>1</span></td>
<td><span>0</span></td>
</tr>
</tbody>
</table>
</li>
<li>
<p><span>Link to implementation: </span><a href="https://godbolt.org/z/W95Y1q1vY" target="_blank" rel="noopener"><span>https://godbolt.org/z/W95Y1q1vY</span></a></p>
</li>
</ul>
</li>
</ul><h2 id="Abstract" data-id="Abstract"><a class="anchor hidden-xs" href="#Abstract" title="Abstract"><span class="octicon octicon-link"></span></a><span>Abstract</span></h2><p><span>Unintended consequences of </span><code>std::barrier</code><span>’s specification constrain implementations to run the </span><code>CompletionFunction</code><span> on the last thread that arrives at the barrier during the phase. This prevents </span><code>std::barrier</code><span> from benefiting from hardware acceleration for thread synchronization. Removing these constraints is a </span><em><span>breaking change</span></em><span>. This paper aims to find a sweet spot for the barrier specification that delivers the functionality that applications need while allowing efficient implementations.</span></p><h2 id="Introduction" data-id="Introduction"><a class="anchor hidden-xs" href="#Introduction" title="Introduction"><span class="octicon octicon-link"></span></a><span>Introduction</span></h2><p><span>The specification of </span><code>std::barrier&lt;CompletionFunction&gt;</code><span> requires the phase completion step to run when the expected count becomes zero </span><em><span>on one of the threads that arrived at the barrier during the phase</span></em><span> </span><a href="http://eel.is/c++draft/thread.barrier#class-1.2" target="_blank" rel="noopener"><span>thread.barrier.class-1.2</span></a><span>:</span></p><blockquote>
<p><span>When the expected count reaches zero, the phase completion step is run. For the specialization with the default value of the </span><code>CompletionFunction</code><span> template parameter, the completion step is run as part of the call to </span><code>arrive</code><span> or </span><code>arrive_and_drop</code><span> that caused the expected count to reach zero. For other specializations, the completion step is run on one of the threads that arrived at the barrier during the phase.</span></p>
</blockquote><p><span>The specification of </span><code>std::barrier</code><span> does not require any thread that arrives at the barrier to call </span><code>std::barrier::wait</code><span>, but calling </span><code>barrier::wait</code><span> is necessary to observe phase completion </span><a href="http://eel.is/c++draft/thread.barrier#class-3.sentence-3" target="_blank" rel="noopener"><span>thread.barrier.class-3.sentence-3</span></a><span>:</span></p><blockquote>
<p><span>[…] the behavior is undefined if any of the barrier object’s member functions other than </span><code>wait</code><span> are called while the completion step is in progress.</span></p>
</blockquote><p><span>A thread that never calls </span><code>wait</code><span> can still arrive at the barrier again via synchronization through some other thread that does call </span><code>wait</code><span>. This is very useful in practice, as the following example shows:</span></p><p><a target="_blank" rel="noopener"></a><strong><span>Example 0</span></strong><span>: a producer / consumer pipeline (</span><a href="https://godbolt.org/z/T8Y7494W5" target="_blank" rel="noopener"><span>godbolt</span></a><span>)</span></p><pre><code class="cpp hljs"><div class="wrapper"><div class="gutter linenumber"><span></span>
<span></span>
<span></span>
<span></span>
<span></span>
<span></span>
<span></span>
<span></span>
<span></span>
<span></span>
<span></span>
<span></span>
<span></span>
<span></span>
<span></span>
<span></span>
<span></span>
<span></span></div><div class="code">std<span class="token double-colon punctuation">::</span>barrier<span class="token operator">&lt;</span>CF0<span class="token operator">&gt;</span> <span class="token function">b0</span><span class="token punctuation">(</span><span class="token number">2</span><span class="token punctuation">,</span> cf0<span class="token punctuation">)</span><span class="token punctuation">;</span>
std<span class="token double-colon punctuation">::</span>barrier<span class="token operator">&lt;</span>CF1<span class="token operator">&gt;</span> <span class="token function">b1</span><span class="token punctuation">(</span><span class="token number">2</span><span class="token punctuation">,</span> cf1<span class="token punctuation">)</span><span class="token punctuation">;</span>

<span class="token keyword">void</span> <span class="token function">thread_0</span><span class="token punctuation">(</span><span class="token punctuation">)</span> <span class="token punctuation">{</span>
    <span class="token keyword">while</span><span class="token punctuation">(</span><span class="token boolean">true</span><span class="token punctuation">)</span> <span class="token punctuation">{</span>
        <span class="token function">produce_data</span><span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">;</span>        <span class="token comment">// A</span>
        b0<span class="token punctuation">.</span><span class="token function">arrive</span><span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">;</span>           <span class="token comment">// B: signal data produced</span>
        b1<span class="token punctuation">.</span><span class="token function">arrive_and_wait</span><span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">;</span>  <span class="token comment">// C: wait on data consumed</span>
    <span class="token punctuation">}</span>
<span class="token punctuation">}</span>

<span class="token keyword">void</span> <span class="token function">thread_1</span><span class="token punctuation">(</span><span class="token punctuation">)</span> <span class="token punctuation">{</span>
    <span class="token keyword">while</span><span class="token punctuation">(</span><span class="token boolean">true</span><span class="token punctuation">)</span> <span class="token punctuation">{</span>
        b0<span class="token punctuation">.</span><span class="token function">arrive_and_wait</span><span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">;</span>  <span class="token comment">// D: wait on data produced    </span>
        <span class="token function">consume_data</span><span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">;</span>        <span class="token comment">// E</span>
        b1<span class="token punctuation">.</span><span class="token function">arrive_and_wait</span><span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">;</span>  <span class="token comment">// F: signal data consumed</span>
    <span class="token punctuation">}</span>
<span class="token punctuation">}</span>
</div></div></code></pre><p><span>In this example, </span><code>thread_0</code><span> at “A” produces some data, and then at “B” signals </span><code>thread_1</code><span> that the data is ready. </span><code>thread_0</code><span> will never wait on barrier </span><code>b0</code><span>. Then </span><code>thread_0</code><span> waits on </span><code>thread_1</code><span> consuming the data, and proceeds to generate new data.</span></p><p><span>The </span><code>[[nodiscard]]</code><span> attribute on </span><code>arrive</code><span> shows that </span><code>barrier</code><span> designers were not expecting threads to arrive at the barrier without calling wait.</span></p><p><span>The current standard wording in </span><a href="https://eel.is/c++draft/thread.barrier#class-1.2.sentence-3" target="_blank" rel="noopener"><span>thread.barrier.class-1.2.sentence-3</span></a><span>:</span></p><blockquote>
<p><span>For other specializations, the completion step is run on one of the threads that arrived at the barrier during the phase.</span></p>
</blockquote><p><span>aims to provide implementations with enough freedom to run the </span><code>CompletionFunction</code><span> on any thread that participates in the barrier during the phase.</span></p><p><span>Unfortunately, the current wording requires all implementations to run the </span><code>CompletionFunction</code><span> as part of the call to </span><code>arrive</code><span> performed by the last thread that arrives at the barrier during the phase, as the following example shows:</span></p><p><a target="_blank" rel="noopener"></a><strong><span>Example 1</span></strong><span>: Guarantee that </span><code>CompletionFunction</code><span> runs if no thread waits (</span><a href="https://godbolt.org/z/41vqEh58e" target="_blank" rel="noopener"><span>godbolt</span></a><span>)</span></p><pre><code class="cpp hljs"><div class="wrapper"><div class="gutter linenumber"><span></span>
<span></span>
<span></span>
<span></span>
<span></span>
<span></span>
<span></span>
<span></span>
<span></span>
<span></span>
<span></span>
<span></span>
<span></span>
<span></span></div><div class="code">std<span class="token double-colon punctuation">::</span>barrier<span class="token operator">&lt;</span>CF<span class="token operator">&gt;</span> b<span class="token punctuation">{</span><span class="token number">2</span><span class="token punctuation">,</span> cf<span class="token punctuation">}</span><span class="token punctuation">;</span>
<span class="token keyword">using</span> tok_t <span class="token operator">=</span> <span class="token keyword">decltype</span><span class="token punctuation">(</span>b<span class="token punctuation">.</span><span class="token function">arrive</span><span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">)</span><span class="token punctuation">;</span>

<span class="token keyword">void</span> <span class="token function">thread</span><span class="token punctuation">(</span><span class="token punctuation">)</span> <span class="token punctuation">{</span>
    <span class="token keyword">new</span> <span class="token function">tok_t</span><span class="token punctuation">(</span>b<span class="token punctuation">.</span><span class="token function">arrive</span><span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">)</span><span class="token punctuation">;</span>       <span class="token comment">// A: arrive and leak token</span>
<span class="token punctuation">}</span>                               <span class="token comment">// B: thread exit</span>

<span class="token keyword">auto</span> t0 <span class="token operator">=</span> std<span class="token double-colon punctuation">::</span><span class="token function">thread</span><span class="token punctuation">(</span>thread<span class="token punctuation">)</span><span class="token punctuation">;</span>  <span class="token comment">// C: Spawn two threads</span>
<span class="token keyword">auto</span> t1 <span class="token operator">=</span> std<span class="token double-colon punctuation">::</span><span class="token function">thread</span><span class="token punctuation">(</span>thread<span class="token punctuation">)</span><span class="token punctuation">;</span>

t0<span class="token punctuation">.</span><span class="token function">join</span><span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">;</span>                      <span class="token comment">// D: Join them</span>
t1<span class="token punctuation">.</span><span class="token function">join</span><span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">;</span>

<span class="token comment">// E: Standard guarantees that CompletionFunction did run</span>
</div></div></code></pre><p><span>This example spawns two threads at “C”, both of which execute the same sequence of operations:</span></p><ul>
<li><span>“A” arrive at the barrier and leak the token, and</span></li>
<li><span>“B” exit.</span></li>
</ul><p><span>After joining both threads at “D”, they no longer exist.</span></p><p><span>The standard guarantees in </span><a href="https://eel.is/c++draft/thread.barrier#class-1.2.sentence-1" target="_blank" rel="noopener"><span>thread.barrier.class-1.2.sentence-1</span></a><span> that the completion function runs:</span></p><blockquote>
<p><span>When the expected count reaches zero, the phase completion step is run.</span></p>
</blockquote><p><span>in one of the threads that arrived at the barrier during the phase (</span><a href="https://eel.is/c++draft/thread.barrier#class-1.2.sentence-3" target="_blank" rel="noopener"><span>thread.barrier.class-1.2.sentence-3</span></a><span>). At “E”, these threads do not exist anymore. Therefore, the </span><code>CompletionFunction</code><span> must have run before “E”, and more precisely, it must have run before the last thread that arrived at the barrier during the phase exits.</span></p><p><strong><span>That is</span></strong><span>: there is only one place in which it makes sense for conforming standard library implementations to run the phase completion step: as part of the </span><code>arrive</code><span> performed by the last thread that arrives at the barrier during the phase.</span></p><p><span>This is an unintended consequence of the interaction between:</span></p><ul>
<li><span>the freedom for threads to never call wait, and</span></li>
<li><span>the guarantees about when and where the phase completion step runs.</span></li>
</ul><p><span>These consequences are problematic in practice, where </span><a href="https://en.wikipedia.org/wiki/Amdahl%27s_law" target="_blank" rel="noopener"><span>Amhdahl’s law</span></a><span> limits the scalability of massively parallel applications with small “serial” overheads on modern </span><a href="https://en.wikipedia.org/wiki/Non-uniform_memory_access" target="_blank" rel="noopener"><span>NUMA</span></a><span> architectures with millions of hardware threads. While </span><code>std::barrier</code><span>’s split </span><code>arrive</code><span>/</span><code>wait</code><span> APIs enable threads to hide the cost of synchronization behind independent work, the amount of independent computation available in real applications is limited. Clauses like </span><a href="https://eel.is/c++draft/thread.barrier#class-1.2.sentence-3" target="_blank" rel="noopener"><span>thread.barrier.class-1.2.sentence-3</span></a><span> aim to enable </span><code>std::barrier</code><span> to use hardware accelerators for synchronization, such as those available in NVIDIA GPUs, to allow applications to hide the cost of synchronization behind small amounts of independent work, but due to the unintended consequences explained above, implementations currently cannot do so.</span></p><p><strong><span>Fixing this requires a breaking change. There are clear engineering trade-offs</span></strong><span> between varying degrees of guarantees which determine the set of well-formed programs with different degrees of implementation flexibility which determine performance.</span></p><p><strong><span>This paper aims to</span></strong><span> to help the reader answer the question: What is the sweet spot for </span><code>std::barrier</code><span> semantics, and what are the consequences of the breaking changes required to get there?</span></p><p><span>The following section analyzes the semantics of the current barrier specification and the different changes that we could make to balance functionality and performance. Then, we evaluate changes according to the functionality provided, their ability to leverage hardware acceleration, and the impact of a change on both existing and potential standard library implementations and end-user applications.</span></p><p><span>Finally, the authors suggest changes that balance functionality and performance to deliver the functionality that applications need in practice while allowing efficient implementations and propose wording for these changes.</span></p><h2 id="Analysis-of-barrier-semantics" data-id="Analysis-of-barrier-semantics"><a class="anchor hidden-xs" href="#Analysis-of-barrier-semantics" title="Analysis-of-barrier-semantics"><span class="octicon octicon-link"></span></a><span>Analysis of barrier semantics</span></h2><p><span>The “core” semantics of the </span><code>std::barrier</code><span> phase completion step are:</span></p><ol>
<li><span>The last arrival </span><em><span>happens-before</span></em><span> phase completion, which </span><em><span>happens-before</span></em><span> any thread unblocks from </span><code>wait</code><span>.</span></li>
<li><span>Establish </span><em><span>cumulativity</span></em><span> from all threads that arrive at the barrier during the phase to all threads unblocked from </span><code>wait</code><span> through the thread that runs the </span><code>CompletionFunction</code><span>.</span></li>
</ol><p><span>These “core” semantics enable applications to, e.g., perform a reduction in a critical-section in-between arriving and waiting:</span></p><p><a target="_blank" rel="noopener"></a><strong><span>Example 2</span></strong><span>: reduction in critical section (</span><a href="https://godbolt.org/z/qcTT15bEa" target="_blank" rel="noopener"><span>godbolt</span></a><span>)</span></p><pre><code class="cpp hljs"><div class="wrapper"><div class="gutter linenumber"><span></span>
<span></span>
<span></span>
<span></span>
<span></span>
<span></span>
<span></span>
<span></span>
<span></span>
<span></span>
<span></span>
<span></span>
<span></span>
<span></span></div><div class="code">std<span class="token double-colon punctuation">::</span>vector<span class="token operator">&lt;</span><span class="token keyword">int</span><span class="token operator">&gt;</span> <span class="token function">data</span><span class="token punctuation">(</span>nthreads<span class="token punctuation">)</span><span class="token punctuation">;</span>
<span class="token keyword">int</span> reduction<span class="token punctuation">;</span>

<span class="token keyword">auto</span> reduce <span class="token operator">=</span> <span class="token punctuation">[</span><span class="token operator">&amp;</span><span class="token punctuation">]</span> <span class="token punctuation">{</span> 
  reduction <span class="token operator">=</span> std<span class="token double-colon punctuation">::</span><span class="token function">accumulate</span><span class="token punctuation">(</span>data<span class="token punctuation">.</span><span class="token function">begin</span><span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">,</span> data<span class="token punctuation">.</span><span class="token function">end</span><span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">,</span> <span class="token number">0</span><span class="token punctuation">)</span><span class="token punctuation">;</span> 
<span class="token punctuation">}</span><span class="token punctuation">;</span>
std<span class="token double-colon punctuation">::</span>barrier<span class="token operator">&lt;</span><span class="token keyword">decltype</span><span class="token punctuation">(</span>reduce<span class="token punctuation">)</span><span class="token operator">&gt;</span> <span class="token function">b</span><span class="token punctuation">(</span>nthreads<span class="token punctuation">,</span> reduce<span class="token punctuation">)</span><span class="token punctuation">;</span>


<span class="token keyword">void</span> <span class="token function">thread</span><span class="token punctuation">(</span>size_t i<span class="token punctuation">)</span> <span class="token punctuation">{</span>
    data<span class="token punctuation">[</span>i<span class="token punctuation">]</span> <span class="token operator">=</span> <span class="token function">produce_data</span><span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">;</span>  <span class="token comment">// A</span>
    b<span class="token punctuation">.</span><span class="token function">arrive_and_wait</span><span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">;</span>       <span class="token comment">// B</span>
    <span class="token function">consume</span><span class="token punctuation">(</span>reduction<span class="token punctuation">)</span><span class="token punctuation">;</span>        <span class="token comment">// C</span>
<span class="token punctuation">}</span>
</div></div></code></pre><p><span>Here, threads produce some data at “A” and arrive and wait at the barrier at “B”. </span><code>reduce</code><span> is then called inside a critical section, after the last thread arrives, and before any thread is unblocked from the wait. Since all threads participating in the barrier are “stalled” at the </span><code>wait</code><span>, </span><code>reduce</code><span> can access </span><code>data</code><span> safely without data-races. Finally, </span><code>reduce</code><span> happens-before any thread is unblocked from the </span><code>wait</code><span>. That is, all uses of </span><code>reduction</code><span> at </span><code>C</code><span> observe the value that </span><code>reduce</code><span> initialized it with safely and without data-races.</span></p><p><span>The main design choices that this paper concerns itself with are “when” and “where” should the C++ standard guarantee that the </span><code>CompletionFunction</code><span> runs. These two properties, “where” and “when”, are intertwined. The following </span><a href="#table1"><span>Table 1</span></a><span> explores some of the main options, the choices they enable, and their impact on hardware acceleration, existing implementations, and user applications.</span></p><p><a target="_blank" rel="noopener"></a><strong><span>Table 1</span></strong><span>: Design tradeoffs: “When does the </span><code>CompletionFunction</code><span> run?”, “Where is it allowed to run (on which threads)?”, “Does the </span><code>CompletionFunction</code><span> run if no thread calls </span><code>wait</code><span>?”, “What are the hardware acceleration opportunities of these constraints?”, “What’s the impact on implementations and users?”</span></p><table>
<thead>
<tr>
<th><span>When</span></th>
<th><span>Where</span></th>
<th><span>Runs if no thread waits?</span></th>
<th><span>Hardware acceleration opportunities</span></th>
<th><span>Implementation impact</span></th>
<th><span>User impact</span></th>
</tr>
</thead>
<tbody>
<tr>
<td><span>Last </span><code>arrive</code></td>
<td><span>Last thread to arrive</span></td>
<td><span>Yes</span></td>
<td><span>Very low</span></td>
<td><span>None</span></td>
<td><span>None</span></td>
</tr>
<tr>
<td><span>Any </span><code>arrive</code></td>
<td><span>Any thread that arrives</span></td>
<td><span>Yes</span></td>
<td><span>Very low</span></td>
<td><span>None</span></td>
<td><span>None</span></td>
</tr>
<tr>
<td><span>Any </span><code>wait</code></td>
<td><span>Any thread that calls </span><code>wait</code></td>
<td><span>No</span></td>
<td><span>Low</span></td>
<td><span>All</span></td>
<td><span>None for any application surveyed, minimal otherwise.</span></td>
</tr>
<tr>
<td><span>Any </span><code>arrive</code><span> or </span><code>wait</code></td>
<td><span>A thread that arrives or waits</span></td>
<td><strong><span>Options</span></strong><span>:</span><br><span>“Yes”</span><br><span>“No”</span><br><span>“Unspecified”</span><br><span>“Implementation defined”</span></td>
<td><span>Medium</span></td>
<td><span>If answer to “Runs if no thread waits?” is No, None. Otherwise, All.</span></td>
<td><span>None for any application surveyed, minimal otherwise.</span></td>
</tr>
<tr>
<td><span>After last arrive before any thread unblocks from wait</span></td>
<td><strong><span>Options</span></strong><span>:</span><br><span>“Unspecified”</span><br><span>“A thread that arrives or waits”</span><br><span>“A new thread”</span><br><span>“A thread that arrives or waits or a new thread”</span></td>
<td><strong><span>Options</span></strong><span>:</span><br><span>“Yes”</span><br><span>“No”</span><br><span>“Unspecified”</span><br><span>“Implementation defined”</span></td>
<td><span>High</span></td>
<td><span>If answer to “Runs if no thread waits?” is No, None. Otherwise, All.</span></td>
<td><span>None for any application surveyed, minimal otherwise.</span></td>
</tr>
</tbody>
</table><p><span>Restricting the choice of “when” significantly constrains the threads in which it makes sense for implementations to run the </span><code>CompletionFunction</code><span>. For example, restricting “when” to particular API calls, including the broad “Any </span><code>arrive</code><span> or </span><code>wait</code><span>”, restricts implementations to only run the phase completion step within those API calls (on threads making these API calls).</span></p><p><span>An important question impacting </span><a href="#example1"><span>Example 1</span></a><span> is whether the </span><code>CompletionFunction</code><span> runs if no thread waits. The third column in </span><a href="#table1"><span>Table 1</span></a><span> answers this question for the options considered. For the last two rows in the table, the standard is free to define these semantics as it wishes. Some options are provided in the table inline, and vary from well-defined semantics (“Yes” or “No”), to “implementation-defined” or “unspecified”.</span></p><h3 id="On-hardware-acceleration" data-id="On-hardware-acceleration"><a class="anchor hidden-xs" href="#On-hardware-acceleration" title="On-hardware-acceleration"><span class="octicon octicon-link"></span></a><span>On hardware acceleration</span></h3><p><span>The column on “Hardware acceleration opportunities” describes how effective “hardware barrier synchronization accelerations” can be at accelerating synchronization via </span><code>std::barrier</code><span>. Hardware accelerators that have more freedom to pick which thread completes the barrier phase (up to the freedom of creating a new thread to do so) can be significantly more effective than those that cannot make this choice. As the number of hardware threads available on computer hardware increases, the benefits of these accelerators outweighs their costs. NVIDIA GPUs have been shipping barrier accelerators for many years. Being descriptive and not prescriptive enables hardware vendors to innovate.</span></p><h3 id="Impact-on-existing-implementations" data-id="Impact-on-existing-implementations"><a class="anchor hidden-xs" href="#Impact-on-existing-implementations" title="Impact-on-existing-implementations"><span class="octicon octicon-link"></span></a><span>Impact on existing implementations</span></h3><p><span>If the standard </span><em><span>allows</span></em><span> the </span><code>CompletionFunction</code><span> to run even if no thread ever waits, no mainstream standard library implementation (</span><code>libc++</code><span>, </span><code>libstdc++</code><span>, and MSVC STL) needs to change. Otherwise, these standard library implementations would need to change.</span></p><p><span>For the last two rows, selecting “implementation-defined” behavior would require the existing implementations to document their behavior.</span></p><h3 id="Largest-possible-semantic-change" data-id="Largest-possible-semantic-change"><a class="anchor hidden-xs" href="#Largest-possible-semantic-change" title="Largest-possible-semantic-change"><span class="octicon octicon-link"></span></a><span>Largest possible semantic change</span><a target="_blank" rel="noopener"></a></h3><p><span>To evaluate the impact of breaking changes, this section defines the “largest” possible semantic change we could make. It allows </span><code>CompletionFunction</code><span> to run:</span></p><ul>
<li><strong><span>When</span></strong><span>: Runs after last arrive before any thread unblocks from wait (no other requirements about “when”)</span></li>
<li><strong><span>Where</span></strong><span>: Unspecified.</span></li>
<li><strong><span>Runs if no thread waits?</span></strong><span> Unspecified.</span></li>
</ul><p><span>This change </span><strong><span>breaks</span></strong><span> three </span><code>std::barrier</code><span> guarantees:</span></p><ul>
<li><span>That the phase completion step is always run when the expected count reaches 0.</span>
<ul>
<li><span>With this change, it is only guaranteed to run if at least one thread calls </span><code>wait</code><span>, but this change does not guarantee that it does not run if no thread calls </span><code>wait</code><span>. That is, even if no thread calls </span><code>wait</code><span>, these change allows the implementation to run the </span><code>CompletionFunction</code><span>, but it does not require the implementation to do so.</span></li>
</ul>
</li>
<li><span>That the phase completion step runs on the last thread that arrived at the barrier during the phase.</span>
<ul>
<li><span>With this change, the phase completion step could run on any thread (it’s unspecified where it runs). It can run on the last thread that arrived, but it can also run on a thread that waits, or on a new thread.</span></li>
</ul>
</li>
<li><span>That a barrier with a default completion function can be arrived on indefinitely without calling wait.</span>
<ul>
<li><span>With this change, this becomes undefined behavior.</span></li>
</ul>
</li>
</ul><h3 id="Potential-impact-on-implementations" data-id="Potential-impact-on-implementations"><a class="anchor hidden-xs" href="#Potential-impact-on-implementations" title="Potential-impact-on-implementations"><span class="octicon octicon-link"></span></a><span>Potential impact on implementations</span></h3><p><span>Thomas Rodgers mentioned that </span><a href="https://developer.apple.com/documentation/dispatch" target="_blank" rel="noopener"><span>libdispatch</span></a><span> barriers potentially complete on a different thread in the thread pool: all the work in the queue before the enqueuing of the barrier work item completes before the barrier work item, whose completion may happen on some other thread. The </span><a href="#largest"><span>largest change</span></a><span> would enable such an implementation.</span></p><p><span>NVIDIA’s barrier accelerators can run the phase completion step very close to the memory where the barrier resides. The </span><a href="#largest"><span>largest change</span></a><span> enables such an implementation.</span></p><h3 id="Impact-on-existing-applications" data-id="Impact-on-existing-applications"><a class="anchor hidden-xs" href="#Impact-on-existing-applications" title="Impact-on-existing-applications"><span class="octicon octicon-link"></span></a><span>Impact on existing applications</span></h3><p><span>We surveyed all public uses of </span><code>std::barrier</code><span> on GitHub as well as within some NVIDIA’s code bases that use them heavily.</span></p><p><span>The impact of all changes considered - including the </span><a href="#largest"><span>largest change</span></a><span> - on all existing applications we surveyed was </span><strong><span>non-existent</span></strong><span>. The feature is a new C++20 feature that’s been only available since:</span></p><ul>
<li><span>GNU libstdc++: version 11.1, released </span><a href="https://gcc.gnu.org/onlinedocs/libstdc++/manual/status.html#status.iso.2017" target="_blank" rel="noopener"><span>27 April 2021</span></a></li>
<li><span>LLVM libc++: version 11, released </span><a href="https://libcxx.llvm.org/Status/Cxx20.html" target="_blank" rel="noopener"><span>26 August 2020</span></a></li>
<li><span>MSVC:  version 2019 16.9, released </span><a href="https://docs.microsoft.com/en-us/cpp/overview/visual-cpp-language-conformance?view=msvc-170#c-standard-library-features" target="_blank" rel="noopener"><span>2nd March 2021</span></a></li>
</ul><p><span>and it is a relatively niche and sharp tool.</span></p><p><span>All applications we discovered were either educational or small toy programs for learning purposes. They all had one thread that both arrived and waited at the barrier during all barrier phases. None of them relied on a particular thread running the </span><code>CompletionFunction</code><span> (we explore the uses we can imagine in the next section).</span></p><h3 id="Potential-impact-on-applications" data-id="Potential-impact-on-applications"><a class="anchor hidden-xs" href="#Potential-impact-on-applications" title="Potential-impact-on-applications"><span class="octicon octicon-link"></span></a><span>Potential impact on applications</span></h3><p><span>While the authors could not find any application in the wild that would break due to the </span><a href="#largest"><span>largest change</span></a><span>, together with the experts we polled, we can imagine some.</span></p><h4 id="Portability-concerns" data-id="Portability-concerns"><a class="anchor hidden-xs" href="#Portability-concerns" title="Portability-concerns"><span class="octicon octicon-link"></span></a><span>Portability concerns</span></h4><p><span>Anthony Williams raised the following concern: if most standard library implementations run the phase completion step as part of the last thread that arrives at the barrier during the phase, applications might end up silently and accidentally relying on this guarantee. Therefore, applications will not be portable to implementations that do something else.</span></p><h4 id="Count-down-with-effect" data-id="Count-down-with-effect"><a class="anchor hidden-xs" href="#Count-down-with-effect" title="Count-down-with-effect"><span class="octicon octicon-link"></span></a><span>Count-down with effect</span></h4><p><a href="https://wg21.link/P2300" target="_blank" rel="noopener"><span>P2300</span></a><span> </span><code>when_all</code><span> algorithm accepts a variable number of asynchronous tasks and executes a continuation when they all complete. Given </span><code>std::barrier</code><span> as specified today, one valid implementation of </span><code>when_all</code><span> simply arrives at the barrier from all asynchronous tasks. After the last task arrives, the continuation runs. No thread calls </span><code>wait</code><span>, and the barrier is never used again.</span></p><p><span>The </span><a href="#largest"><span>largest change</span></a><span> would silently break the semantics of such a </span><code>when_all</code><span> implementation, causing the application to hang on implementations that do not run the phase completion step as part of the last thread that arrives.</span></p><p><span>This implementation of </span><code>when_all</code><span> would be using </span><code>std::barrier</code><span> as a “single-use count-down with effect”: a counter initialized to the number of asynchronous tasks that each task decrements by </span><code>1</code><span>. When the counter reaches 0, it runs some function for its effect.</span></p><p><span>Since the counter is “single-use”, it would make more sense for </span><code>when_all</code><span> to use </span><code>std::latch</code><span> here. However, </span><code>when_all</code><span> cannot do so because </span><code>std::latch</code><span> does not support </span><code>CompletionFunctions</code><span>. A way to enable this use case would be to add support for </span><code>CompletionFunctions</code><span> to </span><code>std::latch</code><span>. Furthermore, since </span><code>std::latch</code><span> is single-use, the semantics that make sense for </span><code>std::barrier</code><span> do not necessarily make sense for </span><code>std::latch</code><span>. This is worth exploring.</span></p><p><span>Barriers and latches are synchronization primitives intended to synchronize groups of threads with each other. However, this is not what the </span><code>when_all</code><span> example needs. The </span><code>when_all</code><span> example synchronizes a set of threads with the thread that runs some effect; it does not synchronize the threads themselves with each other. So another alternative that might be worth exploring is to provide such synchronization primitives in the standard library.</span></p><h4 id="Thread-id" data-id="Thread-id"><a class="anchor hidden-xs" href="#Thread-id" title="Thread-id"><span class="octicon octicon-link"></span></a><span>Thread id</span></h4><p><span>Anthony Williams mentioned that they commonly see applications in which a handle, like a database handle, is stored in </span><code>thread_local</code><span> storage (TLS). Such an application could use the knowledge that the </span><code>CompletionFunction</code><span> is only executed by a thread that arrived at the barrier during the phase to just access this handle from the </span><code>CompletionFunction</code><span>.</span></p><p><span>Thomas Rodgers recognized that such an application could, in some cases, store a </span><code>shared_ptr&lt;Handle*&gt;</code><span> in the </span><code>CompletionFunction</code><span> itself and access the database handle through it. However, if the database handle is tied to the thread that created it, this approach would not work. He also recognized that on the </span><a href="https://developer.apple.com/documentation/dispatch" target="_blank" rel="noopener"><span>libdispatch</span></a><span> barrier model, such an application could not depend on TLS either.</span></p><p><span>This is an example of a larger class of applications that rely on the guarantee that one of the threads that arrived at the barrier during the phase runs the </span><code>CompletionFunction</code><span>. Thread locals are one example; inspecting the thread id and doing something with it would be another.</span></p><p><span>Any change that relaxes the guarantees about “where” (on which thread) the </span><code>CompletionFunction</code><span> may run will be turning some of these use cases from “well-formed” programs into non-portable or illegal programs. We can imagine these applications but have not found them in the wild.</span></p><h2 id="Suggestion" data-id="Suggestion"><a class="anchor hidden-xs" href="#Suggestion" title="Suggestion"><span class="octicon octicon-link"></span></a><span>Suggestion</span></h2><p><span>Our suggestion is to pursue some flavor of the </span><a href="#largest"><span>largest</span></a><span> semantic change, restricting ourselves to the options that do not impact any existing standard library implementation or application we surveyed in the wild. While we can imagine theoretical applications that would be impacted by any option we take, it is very hard to write applications that do so correctly. Given the novelty and niche of this feature, we expect the impact on such applications to be minimal to non-existent. All experts we polled from the domains of applications using </span><code>std::barrier</code><span>, standard library implementors, and designers of </span><code>std::barrier</code><span> were surprised by the current semantics. These semantics were not intended. The intention was to enable </span><code>std::barrier</code><span> to leverage hardware acceleration. If feasible, our recommendation is to backport this change to C++20.</span></p><p><span>That is, our suggestion is to guarantee that:</span></p><ul>
<li><span>The last thread arriving at a barrier phase happens-before the </span><code>CompletionFunction</code><span> runs which happens-before any thread is unblocked from that phase.</span></li>
<li><span>Cumulativity is established between all threads arriving at the barrier during the phase and the thread running the phase completion step, and between that thread and any thread that observes phase completion via a call to </span><code>wait</code><span>.</span></li>
<li><span>If no thread observes phase completion, whether phase completion runs is </span><em><span>unspecified</span></em><span>.</span></li>
<li><span>The phase completion step runs on one of the threads that arrived or waited at the barrier during the phase.</span></li>
</ul><h3 id="Suggested-wording" data-id="Suggested-wording"><a class="anchor hidden-xs" href="#Suggested-wording" title="Suggested-wording"><span class="octicon octicon-link"></span></a><span>Suggested wording</span></h3><p><span>The proposed change modifies </span><a href="http://eel.is/c++draft/thread.barrier#class-1" target="_blank" rel="noopener"><span>thread.barrier.class-1</span></a><span> as follows:</span></p><blockquote>
<p><span>Each barrier phase consists of the following steps:</span></p>
<ul>
<li><span>(1.1) The expected count is decremented by each call to arrive or </span><code>arrive_and_drop</code><span>.</span></li>
<li><span>(1.2) When the expected count reaches zero, </span><del><span>the phase completion step is run. For the specialization with the default value of the CompletionFunction template parameter, the completion step is run as part of the call to </span><code>arrive</code><span> or </span><code>arrive_and_drop</code><span> that caused the expected count to reach zero. For other specializations, the completion step is run on one of the threads that arrived at the barrier during the phase.</span></del><ins><span>the phase completion step executes if at least one thread observes phase completion by waiting at the phase synchronization point; otherwise, whether the phase completion step runs is unspecified. The thread on which the phase completion executes is an unspecified one among the threads that arrived or waited at the barrier during the phase.</span></ins></li>
<li><span>(1.3) When the completion step finishes, the expected count is reset to what was specified by the expected argument to the constructor, possibly adjusted by calls to </span><code>arrive_and_drop</code><span>, and the next phase starts.</span></li>
</ul>
</blockquote><h2 id="Acknowledgements" data-id="Acknowledgements"><a class="anchor hidden-xs" href="#Acknowledgements" title="Acknowledgements"><span class="octicon octicon-link"></span></a><span>Acknowledgements</span></h2><p><span>Everyone that helped, in particular, Olivier Giroux, Eric Niebler, David Olsen, Anthony Williams, and Thomas Rodgers.</span></p></div>
    <div class="ui-toc dropup unselectable hidden-print" style="display:none;">
        <div class="pull-right dropdown">
            <a id="tocLabel" class="ui-toc-label btn btn-default" data-toggle="dropdown" href="#" role="button" aria-haspopup="true" aria-expanded="false" title="Table of content">
                <i class="fa fa-bars"></i>
            </a>
            <ul id="ui-toc" class="ui-toc-dropdown dropdown-menu" aria-labelledby="tocLabel">
                <div class="toc"><ul class="nav">
<li><a href="#barrier’s-phase-completion-guarantees" title="barrier’s phase completion guarantees">barrier’s phase completion guarantees</a><ul class="nav">
<li><a href="#Abstract" title="Abstract">Abstract</a></li>
<li><a href="#Introduction" title="Introduction">Introduction</a></li>
<li><a href="#Analysis-of-barrier-semantics" title="Analysis of barrier semantics">Analysis of barrier semantics</a><ul class="nav">
<li><a href="#On-hardware-acceleration" title="On hardware acceleration">On hardware acceleration</a></li>
<li><a href="#Impact-on-existing-implementations" title="Impact on existing implementations">Impact on existing implementations</a></li>
<li><a href="#Largest-possible-semantic-change" title="Largest possible semantic change">Largest possible semantic change</a></li>
<li><a href="#Potential-impact-on-implementations" title="Potential impact on implementations">Potential impact on implementations</a></li>
<li><a href="#Impact-on-existing-applications" title="Impact on existing applications">Impact on existing applications</a></li>
<li><a href="#Potential-impact-on-applications" title="Potential impact on applications">Potential impact on applications</a></li>
</ul>
</li>
<li><a href="#Suggestion" title="Suggestion">Suggestion</a><ul class="nav">
<li><a href="#Suggested-wording" title="Suggested wording">Suggested wording</a></li>
</ul>
</li>
<li><a href="#Acknowledgements" title="Acknowledgements">Acknowledgements</a></li>
</ul>
</li>
</ul>
</div><div class="toc-menu"><a class="expand-toggle" href="#">Expand all</a><a class="back-to-top" href="#">Back to top</a><a class="go-to-bottom" href="#">Go to bottom</a></div>
            </ul>
        </div>
    </div>
    <div id="ui-toc-affix" class="ui-affix-toc ui-toc-dropdown unselectable hidden-print" data-spy="affix" style="top:17px;display:none;" null null>
        <div class="toc"><ul class="nav">
<li><a href="#barrier’s-phase-completion-guarantees" title="barrier’s phase completion guarantees">barrier’s phase completion guarantees</a><ul class="nav">
<li><a href="#Abstract" title="Abstract">Abstract</a></li>
<li><a href="#Introduction" title="Introduction">Introduction</a></li>
<li><a href="#Analysis-of-barrier-semantics" title="Analysis of barrier semantics">Analysis of barrier semantics</a><ul class="nav">
<li><a href="#On-hardware-acceleration" title="On hardware acceleration">On hardware acceleration</a></li>
<li><a href="#Impact-on-existing-implementations" title="Impact on existing implementations">Impact on existing implementations</a></li>
<li><a href="#Largest-possible-semantic-change" title="Largest possible semantic change">Largest possible semantic change</a></li>
<li><a href="#Potential-impact-on-implementations" title="Potential impact on implementations">Potential impact on implementations</a></li>
<li><a href="#Impact-on-existing-applications" title="Impact on existing applications">Impact on existing applications</a></li>
<li><a href="#Potential-impact-on-applications" title="Potential impact on applications">Potential impact on applications</a></li>
</ul>
</li>
<li><a href="#Suggestion" title="Suggestion">Suggestion</a><ul class="nav">
<li><a href="#Suggested-wording" title="Suggested wording">Suggested wording</a></li>
</ul>
</li>
<li><a href="#Acknowledgements" title="Acknowledgements">Acknowledgements</a></li>
</ul>
</li>
</ul>
</div><div class="toc-menu"><a class="expand-toggle" href="#">Expand all</a><a class="back-to-top" href="#">Back to top</a><a class="go-to-bottom" href="#">Go to bottom</a></div>
    </div>
    <script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.1.1/jquery.min.js" integrity="sha256-hVVnYaiADRTO2PzUGmuLJr8BLUSjGIZsDYGmIJLv2b8=" crossorigin="anonymous"></script>
    <script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.3.7/js/bootstrap.min.js" integrity="sha256-U5ZEeKfGNOja007MMD3YBI0A3OSZOQbeG6z2f2Y0hu8=" crossorigin="anonymous" defer></script>
    <script src="https://cdnjs.cloudflare.com/ajax/libs/gist-embed/2.6.0/gist-embed.min.js" integrity="sha256-KyF2D6xPIJUW5sUDSs93vWyZm+1RzIpKCexxElmxl8g=" crossorigin="anonymous" defer></script>
    <script>
        var markdown = $(".markdown-body");
        //smooth all hash trigger scrolling
        function smoothHashScroll() {
            var hashElements = $("a[href^='#']").toArray();
            for (var i = 0; i < hashElements.length; i++) {
                var element = hashElements[i];
                var $element = $(element);
                var hash = element.hash;
                if (hash) {
                    $element.on('click', function (e) {
                        // store hash
                        var hash = this.hash;
                        if ($(hash).length <= 0) return;
                        // prevent default anchor click behavior
                        e.preventDefault();
                        // animate
                        $('body, html').stop(true, true).animate({
                            scrollTop: $(hash).offset().top
                        }, 100, "linear", function () {
                            // when done, add hash to url
                            // (default click behaviour)
                            window.location.hash = hash;
                        });
                    });
                }
            }
        }

        smoothHashScroll();
        var toc = $('.ui-toc');
        var tocAffix = $('.ui-affix-toc');
        var tocDropdown = $('.ui-toc-dropdown');
        //toc
        tocDropdown.click(function (e) {
            e.stopPropagation();
        });

        var enoughForAffixToc = true;

        function generateScrollspy() {
            $(document.body).scrollspy({
                target: ''
            });
            $(document.body).scrollspy('refresh');
            if (enoughForAffixToc) {
                toc.hide();
                tocAffix.show();
            } else {
                tocAffix.hide();
                toc.show();
            }
            $(document.body).scroll();
        }

        function windowResize() {
            //toc right
            var paddingRight = parseFloat(markdown.css('padding-right'));
            var right = ($(window).width() - (markdown.offset().left + markdown.outerWidth() - paddingRight));
            toc.css('right', right + 'px');
            //affix toc left
            var newbool;
            var rightMargin = (markdown.parent().outerWidth() - markdown.outerWidth()) / 2;
            //for ipad or wider device
            if (rightMargin >= 133) {
                newbool = true;
                var affixLeftMargin = (tocAffix.outerWidth() - tocAffix.width()) / 2;
                var left = markdown.offset().left + markdown.outerWidth() - affixLeftMargin;
                tocAffix.css('left', left + 'px');
            } else {
                newbool = false;
            }
            if (newbool != enoughForAffixToc) {
                enoughForAffixToc = newbool;
                generateScrollspy();
            }
        }
        $(window).resize(function () {
            windowResize();
        });
        $(document).ready(function () {
            windowResize();
            generateScrollspy();
        });

        //remove hash
        function removeHash() {
            window.location.hash = '';
        }

        var backtotop = $('.back-to-top');
        var gotobottom = $('.go-to-bottom');

        backtotop.click(function (e) {
            e.preventDefault();
            e.stopPropagation();
            if (scrollToTop)
                scrollToTop();
            removeHash();
        });
        gotobottom.click(function (e) {
            e.preventDefault();
            e.stopPropagation();
            if (scrollToBottom)
                scrollToBottom();
            removeHash();
        });

        var toggle = $('.expand-toggle');
        var tocExpand = false;

        checkExpandToggle();
        toggle.click(function (e) {
            e.preventDefault();
            e.stopPropagation();
            tocExpand = !tocExpand;
            checkExpandToggle();
        })

        function checkExpandToggle () {
            var toc = $('.ui-toc-dropdown .toc');
            var toggle = $('.expand-toggle');
            if (!tocExpand) {
                toc.removeClass('expand');
                toggle.text('Expand all');
            } else {
                toc.addClass('expand');
                toggle.text('Collapse all');
            }
        }

        function scrollToTop() {
            $('body, html').stop(true, true).animate({
                scrollTop: 0
            }, 100, "linear");
        }

        function scrollToBottom() {
            $('body, html').stop(true, true).animate({
                scrollTop: $(document.body)[0].scrollHeight
            }, 100, "linear");
        }
    </script>
</body>

</html>
