<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en">
<head>
<!-- 2024-11-18 Mon 10:03 -->
<meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<title>P3409R1: Enabling more efficient stop-token based cancellation of senders</title>
<meta name="author" content="Lewis Baker" />
<meta name="generator" content="Org Mode" />
<style>
  #content { max-width: 60em; margin: auto; }
  .title  { text-align: center;
             margin-bottom: .2em; }
  .subtitle { text-align: center;
              font-size: medium;
              font-weight: bold;
              margin-top:0; }
  .todo   { font-family: monospace; color: red; }
  .done   { font-family: monospace; color: green; }
  .priority { font-family: monospace; color: orange; }
  .tag    { background-color: #eee; font-family: monospace;
            padding: 2px; font-size: 80%; font-weight: normal; }
  .timestamp { color: #bebebe; }
  .timestamp-kwd { color: #5f9ea0; }
  .org-right  { margin-left: auto; margin-right: 0px;  text-align: right; }
  .org-left   { margin-left: 0px;  margin-right: auto; text-align: left; }
  .org-center { margin-left: auto; margin-right: auto; text-align: center; }
  .underline { text-decoration: underline; }
  #postamble p, #preamble p { font-size: 90%; margin: .2em; }
  p.verse { margin-left: 3%; }
  pre {
    border: 1px solid #e6e6e6;
    border-radius: 3px;
    background-color: #f2f2f2;
    padding: 8pt;
    font-family: monospace;
    overflow: auto;
    margin: 1.2em;
  }
  pre.src {
    position: relative;
    overflow: auto;
  }
  pre.src:before {
    display: none;
    position: absolute;
    top: -8px;
    right: 12px;
    padding: 3px;
    color: #555;
    background-color: #f2f2f299;
  }
  pre.src:hover:before { display: inline; margin-top: 14px;}
  /* Languages per Org manual */
  pre.src-asymptote:before { content: 'Asymptote'; }
  pre.src-awk:before { content: 'Awk'; }
  pre.src-authinfo::before { content: 'Authinfo'; }
  pre.src-C:before { content: 'C'; }
  /* pre.src-C++ doesn't work in CSS */
  pre.src-clojure:before { content: 'Clojure'; }
  pre.src-css:before { content: 'CSS'; }
  pre.src-D:before { content: 'D'; }
  pre.src-ditaa:before { content: 'ditaa'; }
  pre.src-dot:before { content: 'Graphviz'; }
  pre.src-calc:before { content: 'Emacs Calc'; }
  pre.src-emacs-lisp:before { content: 'Emacs Lisp'; }
  pre.src-fortran:before { content: 'Fortran'; }
  pre.src-gnuplot:before { content: 'gnuplot'; }
  pre.src-haskell:before { content: 'Haskell'; }
  pre.src-hledger:before { content: 'hledger'; }
  pre.src-java:before { content: 'Java'; }
  pre.src-js:before { content: 'Javascript'; }
  pre.src-latex:before { content: 'LaTeX'; }
  pre.src-ledger:before { content: 'Ledger'; }
  pre.src-lisp:before { content: 'Lisp'; }
  pre.src-lilypond:before { content: 'Lilypond'; }
  pre.src-lua:before { content: 'Lua'; }
  pre.src-matlab:before { content: 'MATLAB'; }
  pre.src-mscgen:before { content: 'Mscgen'; }
  pre.src-ocaml:before { content: 'Objective Caml'; }
  pre.src-octave:before { content: 'Octave'; }
  pre.src-org:before { content: 'Org mode'; }
  pre.src-oz:before { content: 'OZ'; }
  pre.src-plantuml:before { content: 'Plantuml'; }
  pre.src-processing:before { content: 'Processing.js'; }
  pre.src-python:before { content: 'Python'; }
  pre.src-R:before { content: 'R'; }
  pre.src-ruby:before { content: 'Ruby'; }
  pre.src-sass:before { content: 'Sass'; }
  pre.src-scheme:before { content: 'Scheme'; }
  pre.src-screen:before { content: 'Gnu Screen'; }
  pre.src-sed:before { content: 'Sed'; }
  pre.src-sh:before { content: 'shell'; }
  pre.src-sql:before { content: 'SQL'; }
  pre.src-sqlite:before { content: 'SQLite'; }
  /* additional languages in org.el's org-babel-load-languages alist */
  pre.src-forth:before { content: 'Forth'; }
  pre.src-io:before { content: 'IO'; }
  pre.src-J:before { content: 'J'; }
  pre.src-makefile:before { content: 'Makefile'; }
  pre.src-maxima:before { content: 'Maxima'; }
  pre.src-perl:before { content: 'Perl'; }
  pre.src-picolisp:before { content: 'Pico Lisp'; }
  pre.src-scala:before { content: 'Scala'; }
  pre.src-shell:before { content: 'Shell Script'; }
  pre.src-ebnf2ps:before { content: 'ebfn2ps'; }
  /* additional language identifiers per "defun org-babel-execute"
       in ob-*.el */
  pre.src-cpp:before  { content: 'C++'; }
  pre.src-abc:before  { content: 'ABC'; }
  pre.src-coq:before  { content: 'Coq'; }
  pre.src-groovy:before  { content: 'Groovy'; }
  /* additional language identifiers from org-babel-shell-names in
     ob-shell.el: ob-shell is the only babel language using a lambda to put
     the execution function name together. */
  pre.src-bash:before  { content: 'bash'; }
  pre.src-csh:before  { content: 'csh'; }
  pre.src-ash:before  { content: 'ash'; }
  pre.src-dash:before  { content: 'dash'; }
  pre.src-ksh:before  { content: 'ksh'; }
  pre.src-mksh:before  { content: 'mksh'; }
  pre.src-posh:before  { content: 'posh'; }
  /* Additional Emacs modes also supported by the LaTeX listings package */
  pre.src-ada:before { content: 'Ada'; }
  pre.src-asm:before { content: 'Assembler'; }
  pre.src-caml:before { content: 'Caml'; }
  pre.src-delphi:before { content: 'Delphi'; }
  pre.src-html:before { content: 'HTML'; }
  pre.src-idl:before { content: 'IDL'; }
  pre.src-mercury:before { content: 'Mercury'; }
  pre.src-metapost:before { content: 'MetaPost'; }
  pre.src-modula-2:before { content: 'Modula-2'; }
  pre.src-pascal:before { content: 'Pascal'; }
  pre.src-ps:before { content: 'PostScript'; }
  pre.src-prolog:before { content: 'Prolog'; }
  pre.src-simula:before { content: 'Simula'; }
  pre.src-tcl:before { content: 'tcl'; }
  pre.src-tex:before { content: 'TeX'; }
  pre.src-plain-tex:before { content: 'Plain TeX'; }
  pre.src-verilog:before { content: 'Verilog'; }
  pre.src-vhdl:before { content: 'VHDL'; }
  pre.src-xml:before { content: 'XML'; }
  pre.src-nxml:before { content: 'XML'; }
  /* add a generic configuration mode; LaTeX export needs an additional
     (add-to-list 'org-latex-listings-langs '(conf " ")) in .emacs */
  pre.src-conf:before { content: 'Configuration File'; }

  table { border-collapse:collapse; }
  caption.t-above { caption-side: top; }
  caption.t-bottom { caption-side: bottom; }
  td, th { vertical-align:top;  }
  th.org-right  { text-align: center;  }
  th.org-left   { text-align: center;   }
  th.org-center { text-align: center; }
  td.org-right  { text-align: right;  }
  td.org-left   { text-align: left;   }
  td.org-center { text-align: center; }
  dt { font-weight: bold; }
  .footpara { display: inline; }
  .footdef  { margin-bottom: 1em; }
  .figure { padding: 1em; }
  .figure p { text-align: center; }
  .equation-container {
    display: table;
    text-align: center;
    width: 100%;
  }
  .equation {
    vertical-align: middle;
  }
  .equation-label {
    display: table-cell;
    text-align: right;
    vertical-align: middle;
  }
  .inlinetask {
    padding: 10px;
    border: 2px solid gray;
    margin: 10px;
    background: #ffffcc;
  }
  #org-div-home-and-up
   { text-align: right; font-size: 70%; white-space: nowrap; }
  textarea { overflow-x: auto; }
  .linenr { font-size: smaller }
  .code-highlighted { background-color: #ffff00; }
  .org-info-js_info-navigation { border-style: none; }
  #org-info-js_console-label
    { font-size: 10px; font-weight: bold; white-space: nowrap; }
  .org-info-js_search-highlight
    { background-color: #ffff00; color: #000000; font-weight: bold; }
  .org-svg { }
</style>
</head>
<body>
<div id="content" class="content">
<h1 class="title">P3409R1: Enabling more efficient stop-token based cancellation of senders</h1>
<div id="table-of-contents" role="doc-toc">
<h2>Table of Contents</h2>
<div id="text-table-of-contents" role="doc-toc">
<ul>
<li><a href="#org5d83021">1. Abstract</a></li>
<li><a href="#org728d60c">2. Revision History</a></li>
<li><a href="#org79d4a83">3. Overview</a></li>
<li><a href="#org03e40b2">4. Motivation</a>
<ul>
<li><a href="#org17cef3f">4.1. Why does it need to be done now?</a></li>
</ul>
</li>
<li><a href="#orgd369fec">5. Proposal</a>
<ul>
<li><a href="#orge29b3dd">5.1. Allowing <code>stoppable_token</code> that only support a finite number of stop-callbacks</a></li>
<li><a href="#org745d524">5.2. Limiting when stop-callbacks can be constructed by sender algorithms</a></li>
<li><a href="#org794dc5c">5.3. Adding the <code>std::single_inplace_stop_source</code> type</a></li>
<li><a href="#org901398b">5.4. Adding the <code>std::finite_inplace_stop_source&lt;N&gt;</code> class template</a>
<ul>
<li><a href="#org4df5145">5.4.1. Tweaks to the <code>stoppable_token</code> and <code>stoppable-source</code> concepts</a></li>
</ul>
</li>
<li><a href="#org764a865">5.5. Modifications to <code>std::execution</code> sender algorithms</a>
<ul>
<li><a href="#orgb5a09bc">5.5.1. Changes to <code>split</code> algorithm</a></li>
<li><a href="#org274713f">5.5.2. Changes to <code>when_all</code> algorithms</a></li>
<li><a href="#org3467e81">5.5.3. Alternative: Leave the choice of stop-token to be implementation-defined</a></li>
</ul>
</li>
</ul>
</li>
<li><a href="#org1bf6085">6. Design Discussion</a>
<ul>
<li><a href="#org581a728">6.1. Performance Benefits</a>
<ul>
<li><a href="#org3c1a2ce">6.1.1. Cost of <code>inplace_stop_token</code></a></li>
<li><a href="#org9023fbb">6.1.2. Cost of <code>single_inplace_stop_token</code></a></li>
<li><a href="#org9721b1f">6.1.3. Cost of <code>finite_inplace_stop_token</code></a></li>
</ul>
</li>
<li><a href="#org67070e4">6.2. Performance vs Safety Tradeoff</a></li>
<li><a href="#orgf6e522e">6.3. Usage in <code>task</code> coroutines</a></li>
<li><a href="#orgb3ffe60">6.4. Do we still need <code>inplace_stop_token</code>?</a></li>
</ul>
</li>
<li><a href="#org74803d2">7. Proposed Wording</a>
<ul>
<li><a href="#org04c6dc7">7.1. Changes to stop token concepts</a></li>
<li><a href="#orgaaccff7">7.2. Single-callback stop-token wording</a></li>
<li><a href="#org9165719">7.3. Finite-callback stop token wording</a></li>
<li><a href="#org8dbcda5">7.4. Changes to <code>receiver</code> concept</a></li>
<li><a href="#orgab1aee9">7.5. Changes to <code>when_all</code></a></li>
<li><a href="#org8b6ce0c">7.6. Changes to <code>&lt;version&gt;</code> header</a></li>
</ul>
</li>
<li><a href="#org37557e8">8. Appendix A - Benchmarks</a>
<ul>
<li><a href="#org74901d4">8.1. Register/unregister stop-callbacks single-threaded</a></li>
<li><a href="#orgd031387">8.2. Call <code>request_stop()</code> with no callbacks</a></li>
<li><a href="#orgb384ecf">8.3. Call <code>request_stop()</code> with x/y callbacks</a></li>
<li><a href="#org0dfd35a">8.4. Register/unregister callbacks from two threads concurrently</a></li>
</ul>
</li>
<li><a href="#org381ebcc">9. Appendix B - Implementation of <code>single_inplace_stop_source</code></a></li>
</ul>
</div>
</div>
<table border="2" cellspacing="0" cellpadding="6" rules="groups" frame="hsides">


<colgroup>
<col  class="org-left" />

<col  class="org-left" />
</colgroup>
<tbody>
<tr>
<td class="org-left">Document</td>
<td class="org-left"><b>P3409R1</b></td>
</tr>

<tr>
<td class="org-left">Date</td>
<td class="org-left"><b>2024-11-18</b></td>
</tr>

<tr>
<td class="org-left">Reply To</td>
<td class="org-left"><b>Lewis Baker &lt;lewissbaker@gmail.com&gt;</b></td>
</tr>

<tr>
<td class="org-left">Audience</td>
<td class="org-left"><b>SG1</b></td>
</tr>
</tbody>
</table>

<style>
  ins { background-color:#A0FFA0 }
  del { background-color:#FFA0A0 }
  expos { font-style:italic }
  exposonly {
    content: "// exposition only";
    font-style:italic;
  }
  div.section {
    counter-reset: paragraph;
    background-color:#A0FFA0;
  }
  div.wording > div.section {
    padding: 5px;
  }
  div.wording > div.section > h3 {
    margin-top: 5px;
  }
  div.wording {
    counter-reset: paragraph;
    margin-left: 50px
  }
  div.wording p.numbered::before {
    position: absolute;
    margin-left: -20px;
    margin-top: 2px;
    font-size: 0.75em;
    color: #CCC;
    content: counter(paragraph);
    counter-increment: paragraph;
  }
  div.wording ul {
    list-style-type: '\2014   ';
    counter-reset: paragraph;
  }
  div.wording ul li::before {
    position: absolute;
    margin-left: -75px;
    margin-top: 2px;
    font-size: 0.75em;
    color: #CCC;
    content: "(" counters(paragraph, ".") ")";
    counter-increment: paragraph;
  }
  div.wording ul ul li::before {
    margin-left: -125px;
  }
  div.wording ul ul ul li::before {
    margin-left: -175px;
  }
  div.wording ul ul ul ul li::before {
    margin-left: -225px;
  }
</style>

<div id="outline-container-org5d83021" class="outline-2">
<h2 id="org5d83021"><span class="section-number-2">1.</span> Abstract</h2>
<div class="outline-text-2" id="text-1">
<p>
This paper proposes placing semantic constraints on consumers of a stop-token obtained
from the <code>std::get_stop_token</code> query on a receiver to allow more efficient implementations
of the <code>stoppable_token</code> data-structure to be used in sender-algorithms where there are a
finite number of child operations.
</p>

<p>
It also proposes adding a new <code>std::single_inplace_stop_source</code> type that
implements such a data-structure that only allows a single stop-callback to be
registered at a time.
</p>

<p>
It also proposes adding a new <code>std::finite_inplace_stop_source&lt;N&gt;</code> class template that
is similar to an array of N <code>std::single_inplace_stop_source</code> objects in that it can allow
up to N consumers but is more space and cpu-efficient than such an array.
</p>

<p>
Finally, it proposes updating the <code>split()</code> algorithm to use <code>std::single_inplace_stop_source</code>
and the <code>when_all()</code> algorithm to use <code>std::finite_inplace_stop_source</code> instead of
<code>std::inplace_stop_source</code> as currently specified.
</p>

<p>
This paper does not propose removing/replacing <code>std::inplace_stop_token</code> as there are
still use-cases for a stop-token that can support registration of an unbounded number
of stop-callbacks.
</p>

<p>
This paper is one of two alternatives to be considered for improving the performance
of cancellation of sender-based asynchronous operations. The other paper to be considered
along-side this paper is <a href="https://wg21.link/P3410">P3410</a> "Enabling more efficient sender cancellation without stop-tokens",
which explores a design that puts a <code>request_stop()</code> method on the operation-state objects
instead of using stop-tokens to communicate stop-requests.
</p>
</div>
</div>

<div id="outline-container-org728d60c" class="outline-2">
<h2 id="org728d60c"><span class="section-number-2">2.</span> Revision History</h2>
<div class="outline-text-2" id="text-2">
<p>
R1
</p>
<ul class="org-ul">
<li>Added proposed wording.</li>
<li>Changed <code>single_inplace_stop_token</code> and <code>finite_inplace_stop_token</code> equality operator
to be a member instead of a hidden friend to be consistent with <code>inplace_stop_token</code>.</li>
<li>Minor fixes to reference implementation in Appendix B (add missing equality operator
to <code>single_inplace_stop_token</code>, simplify some expressions).</li>
</ul>
</div>
</div>

<div id="outline-container-org79d4a83" class="outline-2">
<h2 id="org79d4a83"><span class="section-number-2">3.</span> Overview</h2>
<div class="outline-text-2" id="text-3">
<p>
The design proposed for sender/receiver-based asynchronous operations in P2300R10
included facilities for cooperatively cancelling in-flight operations and is based
on a generalisation of the <code>std::stop_token</code> design added in C++20.
</p>

<p>
The current design of <code>std::stoppable_token</code> and the <code>std::get_stop_token</code> query on
a receiver's environment permits consumers of the stop-token to attach an arbitrary
number of stop-callback objects to a given stop-token obtained from a receiver's
environment.
</p>

<p>
This means that implementations of the <code>std::stoppable_token</code> concept, such as
<code>std::stop_token</code> and <code>std::inplace_stop_token</code>, are required to use a data-structure
that supports registration of an arbitrary number of stop-callbacks. This usually
involves a doubly-linked intrusive list with some form of mutex (usually a spin-mutex)
to synchronize access to the stop-calback list.
</p>

<p>
However, there are some cases where an algorithm has a finite number of child
operations and where each child operation generally only needs to register a
single stop-callback.
</p>

<p>
Implementations of the <code>stoppable_token</code> concept can make use of simpler and more
efficient data-structures if it can assume that there are a finite number of consumers of
the stop-token, each of which will only ever register a single stop-callback.
</p>

<p>
This paper proposes enabling these simpler data-structures by modifying the semantic
constraints of stop-tokens returned from <code>std::get_stop_token()</code> on a receiver's
environment to limit usage of the stop-token to register at most one stop-callback
unless otherwise specified.
</p>

<p>
This paper also proposes adding two new families of classes implementing the
<code>stoppable_token</code> concept which take advantage of these semantic constraints on usage:
</p>
<ul class="org-ul">
<li><code>std::single_inplace_stop_source</code> - Supports a single associated stop-callback.</li>
<li><code>std::finite_inplace_stop_source&lt;N&gt;</code> - Equivalent to a tuple of N separate <code>std::single_inplace_stop_source</code>
objects, providing N independent stop-tokens, each of which supports a single associated stop-callback.</li>
</ul>
</div>
</div>

<div id="outline-container-org03e40b2" class="outline-2">
<h2 id="org03e40b2"><span class="section-number-2">4.</span> Motivation</h2>
<div class="outline-text-2" id="text-4">
<p>
When the paper P2175R0 "Composable cancellation for sender-based async operations" was
reviewed by SG1, concerns were raised about the potential performance cost of the
stop-token-based design of the cancellation mechanisms due to the amount of synchronization
involved.
</p>

<p>
In order to be able to cancel some kinds of asynchronous operations after they have started,
the operation may need to register a stop-callback that will execute some logic to
interrupt the operation in the event that a stop-request is made before the operation
completes. For example, such logic might removing an entry from a queue of scheduled tasks,
or might call an underlying OS API to cancel a pending I/O operation.
</p>

<p>
Before the operation completes and the operation-state is destroyed, the operation then needs
to unregister the callback.
</p>

<p>
In cases where there are many leaf operations, each of which need to register and unregister
a stop-callback, and where the operations themselves may complete quickly much of the time,
and where a stop-request being made is relatively rare, the overhead of constantly registering
and deregistering the stop-callback can potentially be significant.
</p>

<p>
For example, when repeatedly reading from a socket where the socket constantly has buffered
data available, we might issue an async <code>recv</code> operation, register a stop-callback just in
case the operation takes a long time and a stop-request were to be issued, only to have the
operation completion almost immediately, then requiring us to deregister the stop-callback
before producing the result for the consumer.
</p>

<p>
With the current design of <code>std::inplace_stop_token</code>, the registration/deregistration of
stop-callbacks typically involves acquiring a spin-lock on the data-structure, updating a
doubly-linked list of registered stop-callbacks and then releasing the spin-lock.
</p>

<p>
Whilst <code>std::inplace_stop_token</code> is still a relatively lightweight data-structure - it doesn't
do an dynamic allocation, like <code>std::stop_token</code> does - we would like to try and minimise the
overhead needed for registering and deregistering stop-callbacks where possible to improve the
performance of situations like the one described above.
</p>

<p>
Allowing the use of a simpler data-structure in these cases would reduce the amount of synchronization
involved (although there is still <i>some</i> synchronization required), reduce the amount of book-keeping
required, and also reduce the size needed within operation-states for storing stop-callbacks.
</p>
</div>

<div id="outline-container-org17cef3f" class="outline-3">
<h3 id="org17cef3f"><span class="section-number-3">4.1.</span> Why does it need to be done now?</h3>
<div class="outline-text-3" id="text-4-1">
<p>
If we do not apply this change before releasing sender/receiver, we will not be able to apply this
optimization later.
</p>

<p>
If the status quo is kept then users may write their own sender algorithm implementations that
take advantage of the fact that they can register multiple stop-callbacks. e.g. by passing an
inherited stop-token to multiple child operations which run concurrently and each register a
stop-callback.
</p>

<p>
Adding a restriction on how stop-tokens can be used later would be a breaking change as trying
to compose such user-defined algorithms with algorithms that wanted to take advantage of
the stop-token restrictions would lead to undefined behaviour due to that user-code potentially
trying to register multiple stop-callbacks associated with the stop-token, resulting in some
pre-condition violations.
</p>

<p>
Also, trying to change an algorithm implementation from using <code>std::inplace_stop_source</code>
to later using <code>std::single_inplace_stop_source</code> is going to change the layout of operation-state
types and would thus be a potential ABI break for that algorithm.
</p>
</div>
</div>
</div>


<div id="outline-container-orgd369fec" class="outline-2">
<h2 id="orgd369fec"><span class="section-number-2">5.</span> Proposal</h2>
<div class="outline-text-2" id="text-5">
<p>
This paper proposes several changes:
</p>
<ul class="org-ul">
<li>Allowing <code>std::get_stop_token()</code> to return a <code>stoppable_token</code> type that only supports one
associated stop-callback to exist at a time.</li>
<li>Defining a <code>std::single_inplace_stop_source</code> type that only supports a single associated
stop-callback to exist at a time.</li>
<li>Defining a <code>std::finite_inplace_stop_source&lt;N&gt;</code> type that provides N separate stop-tokens,
each of which supports a single associated stop-callback, but where sending a stop-request
via the stop-source sends the stop-request to all N stop-tokens.</li>
<li>Modifying the <code>split()</code> algorithm to use <code>std::single_inplace_stop_source</code> instead of
<code>std::inplace_stop_source</code>.</li>
<li>Modifying the <code>when_all()</code> algorithm to use <code>std::finite_inplace_stop_source</code> instead of
<code>std::inplace_stop_source</code>.</li>
</ul>
</div>

<div id="outline-container-orge29b3dd" class="outline-3">
<h3 id="orge29b3dd"><span class="section-number-3">5.1.</span> Allowing <code>stoppable_token</code> that only support a finite number of stop-callbacks</h3>
<div class="outline-text-3" id="text-5-1">
<p>
There are no syntactic changes required to the <code>stoppable_token</code> concept in order to support this.
</p>

<p>
However, to make the intent clear, we need to add a paragraph to the description of stop-tokens
that states that for a given type that models <i>stop-callback-for</i>, its constructor may have a
pre-condition that the number of associated stop-callback objects is less than some positive,
finite number.
</p>

<p>
This explicitly grants permission to implementers of the concept to add such a pre-condition
to its stop-callback constructor. It also means that consumers of a generic stop-token must
assume that the stop-callback constructor may have such a pre-condition, potentially with a
maximum number of existing associated stop-callback objects that is zero, and therefore such
generic consumers should limit themselves to constructing a single stop-callback object
associated with the stop-token.
</p>

<p>
Implementations of stop-callback types are still free to define their constructor without
such a precondition, and it is still valid for consumers of the corresponding stop-token
type to construct multiple associated stop-callback objects.
</p>

<p>
For example, if I write a function that takes a <code>std::inplace_stop_token</code> then I know that
this type allows an unbounded number of associated <code>std::inplace_stop_callback</code> objects
and so, within the function I can safely construct multiple associated stop-callback objects.
</p>

<div class="org-src-container">
<pre class="src src-c++"><span class="org-type">void</span> <span class="org-function-name">func1</span>(<span class="org-constant">std</span>::<span class="org-type">inplace_stop_token</span> <span class="org-variable-name">st</span>) {
  <span class="org-keyword">const</span> <span class="org-keyword">auto</span> <span class="org-variable-name">on_stop</span> = [] { <span class="org-comment-delimiter">/* </span><span class="org-comment">do something</span><span class="org-comment-delimiter"> */</span> };

  <span class="org-comment-delimiter">// </span><span class="org-comment">Constructing multiple stop-callbacks is allowed.</span>
  <span class="org-constant">std</span>::<span class="org-type">inplace_stop_callback</span> <span class="org-variable-name">cb1</span>{st, [&amp;] <span class="org-keyword">noexcept</span> { <span class="org-comment-delimiter">/* </span><span class="org-comment">do something</span><span class="org-comment-delimiter"> */</span> }};
  <span class="org-constant">std</span>::<span class="org-type">inplace_stop_callback</span> <span class="org-variable-name">cb2</span>{st, [&amp;] <span class="org-keyword">noexcept</span> { <span class="org-comment-delimiter">/* </span><span class="org-comment">do something else</span><span class="org-comment-delimiter"> */</span> }};

  <span class="org-comment-delimiter">// </span><span class="org-comment">...</span>
}
</pre>
</div>

<p>
However, if I were to write a function-template that could be instantiated with any type
that satisfied <code>std::stoppable_token</code>, then I would need to limit myself to constructing
at most one associated stop-callback object at a time.
</p>

<div class="org-src-container">
<pre class="src src-c++"><span class="org-keyword">template</span>&lt;<span class="org-constant">std</span>::<span class="org-type">stoppable_token</span> <span class="org-variable-name">StopToken</span>&gt;
<span class="org-type">void</span> <span class="org-function-name">func2</span>(<span class="org-type">StopToken</span> <span class="org-variable-name">st</span>) {
<span class="org-keyword">const</span> <span class="org-keyword">auto</span> <span class="org-variable-name">on_stop</span> = [] { <span class="org-comment-delimiter">/* </span><span class="org-comment">do something</span><span class="org-comment-delimiter"> */</span> };
<span class="org-keyword">using</span> <span class="org-type">callback_t</span> = <span class="org-constant">std</span>::<span class="org-type">stop_callback_for_t</span>&lt;<span class="org-type">StopToken</span>, <span class="org-keyword">decltype</span>(on_stop)&gt;;

<span class="org-comment-delimiter">// </span><span class="org-comment">Constructing a single stop-callback is OK</span>
<span class="org-type">callback_t</span> <span class="org-variable-name">cb1</span>{st, on_stop};

<span class="org-comment-delimiter">// </span><span class="org-comment">Constructing a second stop-callback would potentially be a pre-condition</span>
<span class="org-comment-delimiter">// </span><span class="org-comment">violation if StopToken happens to be e.g. std::single_inplace_stop_token.</span>
<span class="org-type">callback_t</span> <span class="org-variable-name">cb2</span>{st, on_stop};
}
</pre>
</div>

<p>
Further, I would also be implying that my function also has a pre-condition that my caller provide me
with a stop-token that permits me to construct at least one associated stop-callback. This would prevent
them from passing, for example, a <code>std::single_inplace_stop_token</code> that already had an associated
stop-callback object.
</p>

<div class="org-src-container">
<pre class="src src-c++"><span class="org-keyword">template</span>&lt;<span class="org-constant">std</span>::<span class="org-type">stoppable_token</span> <span class="org-variable-name">StopToken</span>&gt;
<span class="org-type">void</span> <span class="org-function-name">func3</span>(<span class="org-type">StopToken</span> <span class="org-variable-name">st</span>) {
  <span class="org-keyword">const</span> <span class="org-keyword">auto</span> <span class="org-variable-name">on_stop</span> = [] { <span class="org-comment-delimiter">/* </span><span class="org-comment">do something</span><span class="org-comment-delimiter"> */</span> };
  <span class="org-keyword">using</span> <span class="org-type">callback_t</span> = <span class="org-constant">std</span>::<span class="org-type">stop_callback_for_t</span>&lt;<span class="org-type">StopToken</span>, <span class="org-keyword">decltype</span>(on_stop)&gt;;

  <span class="org-type">callback_t</span> <span class="org-variable-name">cb</span>{st, on_stop};

  <span class="org-comment-delimiter">// </span><span class="org-comment">...</span>
}

<span class="org-type">void</span> <span class="org-function-name">caller</span>() {
  <span class="org-constant">std</span>::<span class="org-type">single_inplace_stop_source</span> <span class="org-variable-name">ss</span>;

  func3(ss.get_token());  <span class="org-comment-delimiter">// </span><span class="org-comment">OK. stop-token allows constructing a stop-callback</span>

  <span class="org-constant">std</span>::<span class="org-type">single_inplace_stop_callback</span> <span class="org-variable-name">cb</span>{ss.get_token(), [] { <span class="org-comment-delimiter">/* </span><span class="org-comment">do something</span><span class="org-comment-delimiter"> */</span> }};

  func3(ss.get_token());  <span class="org-comment-delimiter">// </span><span class="org-comment">BUG: violates func3's pre-condition.</span>
                          <span class="org-comment-delimiter">// </span><span class="org-comment">stop-token already has an associated stop-callback.</span>
}
</pre>
</div>
</div>
</div>

<div id="outline-container-org745d524" class="outline-3">
<h3 id="org745d524"><span class="section-number-3">5.2.</span> Limiting when stop-callbacks can be constructed by sender algorithms</h3>
<div class="outline-text-3" id="text-5-2">
<p>
One of the use-cases that needs to be carefully considered here are algorithms, like <code>schedule_from</code>, which
are specified to connect multiple child operations ahead of time, but only actually executes one of the
child operations at a time.
</p>

<p>
These are algorithms where <code>connect()</code> on the parent operation calls <code>connect()</code> on two (or more)
child operations, but where <code>start()</code> on the parent operation calls <code>start()</code> on the first child
but <code>start()</code> on the second child is not called until after the completion of the first child.
i.e. where the execution of the child operations does not overlap in time.
</p>

<p>
Consider the case where such a parent operation is provided an environment with a stop-token
that only permits a single stop-callback (such as the proposed <code>std::single_inplace_stop_token</code>).
</p>

<p>
It would be preferable to allow passing through this stop-token to both children rather than
having to construct a separate <code>std::finite_inplace_stop_source&lt;2&gt;</code> and provide different stop-tokens
to each child and then also attach a stop-callback to the provided stop-token which forwards stop-requests
through to a call to <code>request_stop()</code> on the local stop-source.
</p>

<p>
However, in order to guarantee that we do not violate the pre-conditions of the stop-callback constructor,
we need to ensure that the child operations do not both attempt to construct stop-callbacks with overlapping
lifetimes.
</p>

<p>
The current wording for [exec.recv.concepts] p3 states:
</p>

<blockquote>
<p>
Let <code>rcvr</code> be a receiver and let <code>op_state</code> be an operation state associated with an asynchronous operation created by connecting <code>rcvr</code> with a sender.
Let <code>token</code> be a stop token equal to <code>get_stop_token(get_env(rcvr))</code>.
<code>token</code> shall remain valid for the duration of the asynchronous operation's lifetime ([exec.async.ops]).
</p>

<p>
[Note: This means that, unless it knows about further guarantees provided by the type of <code>rcvr</code>, the implementation of <code>op_state</code> cannot use
token after it executes a completion operation.
This also implies that any stop callbacks registered on token must be destroyed before the invocation of the completion operation.
— end note]
</p>
</blockquote>

<p>
This references [exec.async.ops] p7 which defines "asynchronous operation lifetime":
</p>

<blockquote>
<p>
The <i>lifetime of an asynchronous operation</i>, also known as the <i>operation's async lifetime</i>, begins when its start operation begins executing and ends when its completion operation begins executing.
If the lifetime of an asynchronous operation's associated operation state ends before the lifetime of the asynchronous operation, the behavior is undefined.
After an asynchronous operation executes a completion operation, its associated operation state is invalid.
Accessing any part of an invalid operation state is undefined behavior.
</p>
</blockquote>

<p>
The important parts of these two paragraphs are that the stop-token obtained from <code>get_stop_token(get_env(rcvr))</code>
is only required to be valid for the duration of the asynchronous operation's lifetime, and that an asynchronous
operation's lifetime starts at the beginning of the call to <code>start()</code> on the operation-state and ends at the
beginning of the call to a completion-function.
</p>

<p>
This implies that, unless you have additional information about the validity of a stop-token provided in the
environment, you should not assume that it is valid to construct a stop-callback associated with that stop-token
(or indeed do anything else you can't do with an invalid stop-token) until the <code>start()</code> operation on the
operation-state is called.
</p>

<p>
This constraint placed on sender algorithms and their use of stop-tokens should be sufficient to guarantee
that it is safe for the class of algorithms discussed above, for example <code>schedule_from</code>, to pass through
a <code>single_inplace_stop_token</code> from its environment to the environment passed to child operations.
</p>

<p>
The one change I would suggest here is to explicitly call out this restriction in the note, similarly to
how the note calls out that stop-callbacks must be destroyed before the invocation of the completion
operation. In particular it should call out that stop-callbacks should not be constructed until after
the beginning of the invocation of the <code>start()</code> method on the operation-state.
</p>

<p>
A parent operation only needs to introduce a new stop-source and give separate stop-tokens to child operations
if all of the following are true:
</p>
<ul class="org-ul">
<li>we don't know that the stop-token can support multiple stop-callbacks at the same time; and</li>
<li>the child operations have overlapping asynchronous operation lifetimes; and</li>
<li>the parent operation wants to forward stop-requests to child operations</li>
</ul>
</div>
</div>

<div id="outline-container-org794dc5c" class="outline-3">
<h3 id="org794dc5c"><span class="section-number-3">5.3.</span> Adding the <code>std::single_inplace_stop_source</code> type</h3>
<div class="outline-text-3" id="text-5-3">
<p>
Proposes adding the following class and class-template definitions the <code>&lt;stop_token&gt;</code> header:
</p>

<div class="org-src-container">
<pre class="src src-c++"><span class="org-keyword">namespace</span> <span class="org-constant">std</span>
{
  <span class="org-keyword">class</span> <span class="org-type">single_inplace_stop_token</span>;
  <span class="org-keyword">template</span> &lt;<span class="org-constant">std</span>::<span class="org-type">invocable</span> <span class="org-variable-name">CB</span>&gt;
  <span class="org-keyword">class</span> <span class="org-type">single_inplace_stop_callback</span>;

  <span class="org-keyword">class</span> <span class="org-type">single_inplace_stop_source</span> {
  <span class="org-keyword">public</span>:
    <span class="org-function-name">single_inplace_stop_source</span>() <span class="org-keyword">noexcept</span>;
    ~<span class="org-function-name">single_inplace_stop_source</span>();

    <span class="org-function-name">single_inplace_stop_source</span>(<span class="org-keyword">const</span> <span class="org-type">single_inplace_stop_source</span>&amp;) = <span class="org-keyword">delete</span>;
    <span class="org-function-name">single_inplace_stop_source</span>(<span class="org-type">single_inplace_stop_source</span>&amp;&amp;) = <span class="org-keyword">delete</span>;
    <span class="org-type">single_inplace_stop_source</span>&amp; <span class="org-keyword">operator</span><span class="org-function-name">=</span>(<span class="org-keyword">const</span> <span class="org-type">single_inplace_stop_source</span>&amp;) = <span class="org-keyword">delete</span>;
    <span class="org-type">single_inplace_stop_source</span>&amp; <span class="org-keyword">operator</span><span class="org-function-name">=</span>(<span class="org-type">single_inplace_stop_source</span>&amp;&amp;) = <span class="org-keyword">delete</span>;

    <span class="org-type">bool</span> <span class="org-function-name">stop_possible</span>() <span class="org-keyword">const</span> <span class="org-keyword">noexcept</span>;
    <span class="org-type">bool</span> <span class="org-function-name">stop_requested</span>() <span class="org-keyword">const</span> <span class="org-keyword">noexcept</span>;

    <span class="org-type">bool</span> <span class="org-function-name">request_stop</span>() <span class="org-keyword">noexcept</span>;

    <span class="org-type">single_inplace_stop_token</span> <span class="org-function-name">get_token</span>() <span class="org-keyword">const</span> <span class="org-keyword">noexcept</span>;
  };

  <span class="org-keyword">class</span> <span class="org-type">single_inplace_stop_token</span> {
  <span class="org-keyword">public</span>:
    <span class="org-keyword">template</span> &lt;<span class="org-keyword">typename</span> <span class="org-type">CB</span>&gt;
    <span class="org-keyword">using</span> <span class="org-type">callback_type</span> = <span class="org-type">single_inplace_stop_callback</span>&lt;<span class="org-type">CB</span>&gt;;

    <span class="org-function-name">single_inplace_stop_token</span>() <span class="org-keyword">noexcept</span>;
    <span class="org-function-name">single_inplace_stop_token</span>(<span class="org-keyword">const</span> <span class="org-type">single_inplace_stop_token</span>&amp;) <span class="org-keyword">noexcept</span>;
    <span class="org-function-name">single_inplace_stop_token</span>(<span class="org-type">single_inplace_stop_token</span>&amp;&amp;) <span class="org-keyword">noexcept</span>;
    ~<span class="org-function-name">single_inplace_stop_token</span>();

    <span class="org-type">single_inplace_stop_token</span>&amp; <span class="org-keyword">operator</span><span class="org-function-name">=</span>(<span class="org-keyword">const</span> <span class="org-type">single_inplace_stop_token</span>&amp;) <span class="org-keyword">noexcept</span>;
    <span class="org-type">single_inplace_stop_token</span>&amp; <span class="org-keyword">operator</span><span class="org-function-name">=</span>(<span class="org-type">single_inplace_stop_token</span>&amp;&amp;) <span class="org-keyword">noexcept</span>;

    <span class="org-type">bool</span> <span class="org-function-name">stop_possible</span>() <span class="org-keyword">const</span> <span class="org-keyword">noexcept</span>;
    <span class="org-type">bool</span> <span class="org-function-name">stop_requested</span>() <span class="org-keyword">const</span> <span class="org-keyword">noexcept</span>;

    <span class="org-type">bool</span> <span class="org-keyword">operator</span><span class="org-function-name">==</span>(<span class="org-keyword">const</span> <span class="org-type">single_inplace_stop_token</span>&amp; <span class="org-variable-name">a</span>) <span class="org-keyword">const</span> <span class="org-keyword">noexcept</span> = <span class="org-keyword">default</span>;

  <span class="org-keyword">private</span>:
    <span class="org-type">single_inplace_stop_souce</span>* <span class="org-variable-name">source</span>;  <span class="org-comment-delimiter">// </span><span class="org-comment">exposition only</span>
  };

  <span class="org-keyword">template</span> &lt;<span class="org-constant">std</span>::<span class="org-type">invocable</span> <span class="org-type">CB</span>&gt;
  <span class="org-keyword">class</span> <span class="org-type">single_inplace_stop_callback</span> {
  <span class="org-keyword">public</span>:
    <span class="org-keyword">template</span> &lt;<span class="org-keyword">typename</span> <span class="org-type">Initializer</span>&gt;
    <span class="org-keyword">requires</span> <span class="org-constant">std</span>::<span class="org-type">constructible_from</span>&lt;<span class="org-type">CB</span>, <span class="org-type">Initializer</span>&gt;
    <span class="org-function-name">single_inplace_stop_callback</span>(<span class="org-type">single_inplace_stop_token</span> <span class="org-variable-name">st</span>,
                                 <span class="org-type">Initializer</span>&amp;&amp; <span class="org-variable-name">init</span>)
      <span class="org-keyword">noexcept</span>(<span class="org-constant">std</span>::<span class="org-type">is_nothrow_constructible_v</span>&lt;<span class="org-type">CB</span>, <span class="org-type">Initializer</span>&gt;);

    ~<span class="org-function-name">single_inplace_stop_callback</span>();

    <span class="org-function-name">single_inplace_stop_callback</span>(<span class="org-keyword">const</span> <span class="org-type">single_inplace_stop_callback</span>&amp;) = <span class="org-keyword">delete</span>;
    <span class="org-function-name">single_inplace_stop_callback</span>(<span class="org-type">single_inplace_stop_callback</span>&amp;&amp;) = <span class="org-keyword">delete</span>;
    <span class="org-type">single_inplace_stop_callback</span>&amp; <span class="org-keyword">operator</span><span class="org-function-name">=</span>(<span class="org-keyword">const</span> <span class="org-type">single_inplace_stop_callback</span>&amp;) = <span class="org-keyword">delete</span>;
    <span class="org-type">single_inplace_stop_callback</span>&amp; <span class="org-keyword">operator</span><span class="org-function-name">=</span>(<span class="org-type">single_inplace_stop_callback</span>&amp;&amp;) = <span class="org-keyword">delete</span>;

  <span class="org-keyword">private</span>:
    <span class="org-type">single_inplace_stop_source</span>* <span class="org-variable-name">source</span>;  <span class="org-comment-delimiter">// </span><span class="org-comment">exposition only</span>
    <span class="org-type">CB</span> <span class="org-variable-name">cb</span>;                               <span class="org-comment-delimiter">// </span><span class="org-comment">exposition only</span>
  };

  <span class="org-keyword">template</span> &lt;<span class="org-keyword">typename</span> <span class="org-type">CB</span>&gt;
  <span class="org-function-name">single_inplace_stop_callback</span>(<span class="org-type">single_inplace_stop_token</span>, <span class="org-type">CB</span>)
    -&gt; <span class="org-type">single_inplace_stop_callback</span>&lt;<span class="org-type">CB</span>&gt;;
}
</pre>
</div>

<p>
The semantics of these types are identical to that of the corresponding <code>std::inplace_stop_token</code>,
<code>std::inplace_stop_source</code> and <code>std::inplace_stop_callback&lt;CB&gt;</code> types, with the exception that
the <code>std::single_inplace_stop_callback</code> constructor has a pre-condition that there are no other
stop-callback objects associated with the <code>std::single_inplace_stop_token</code> object passed to
the constructor.
</p>
</div>
</div>

<div id="outline-container-org901398b" class="outline-3">
<h3 id="org901398b"><span class="section-number-3">5.4.</span> Adding the <code>std::finite_inplace_stop_source&lt;N&gt;</code> class template</h3>
<div class="outline-text-3" id="text-5-4">
<p>
In cases where a sender algorithm has multiple child operations, where the number of child operations
is statically known, and where the algorithm wants to be able to communicate a stop-request to all of the
child operations, using an array of <code>std::single_inplace_stop_source</code> objects is, in most cases, still
going to be more efficient than using a <code>std::inplace_stop_source</code>.
</p>

<p>
However, naively storing an array of <code>std::single_inplace_stop_source</code> objects still has some overheads
due to redundancy in the data-structures in the case where a stop-request is communicated to all of
the stop-sources at the same time (sequentially on the same thread).
</p>

<p>
The <code>std::single_inplace_stop_source</code> data-structure contains an atomic pointer and also an atomic
<code>std::thread::id</code> which is used to determine whether stop-callback deregistration is occurring from
inside a call to <code>request_stop()</code>.
</p>

<p>
If we store an array of N <code>std::single_inplace_stop_source</code> objects, then we are storing N copies of
this <code>std::thread::id</code> value, even though in this case, they will all contain the same value.
We can save some storage in this case, by instead defining a data-structure that has N atomic pointers
but only one atomic <code>std::thread::id</code> value.
</p>

<p>
Such a data-structure would have identical performance and layout to <code>std::single_inplace_stop_source</code>
for N=1, but would save (N-1) pointers of storage for N&gt;=2.
</p>

<p>
This paper proposes adding an implementation of such a data-structure, named <code>std::finite_inplace_stop_source</code>,
which is templated on the desired number of independent stop-tokens that need to be supported.
</p>

<p>
The synopsis for this class-template is as follows:
</p>
<div class="org-src-container">
<pre class="src src-c++"><span class="org-keyword">namespace</span> <span class="org-constant">std</span>
{
  <span class="org-keyword">template</span> &lt;<span class="org-type">size_t</span> <span class="org-variable-name">N</span>, <span class="org-type">size_t</span> <span class="org-variable-name">Idx</span>&gt;
  <span class="org-keyword">class</span> <span class="org-type">finite_inplace_stop_token</span>;
  <span class="org-keyword">template</span> &lt;<span class="org-type">size_t</span> <span class="org-variable-name">N</span>, <span class="org-type">size_t</span> <span class="org-variable-name">Idx</span>, <span class="org-constant">std</span>::<span class="org-type">invocable</span> <span class="org-variable-name">CB</span>&gt;
  <span class="org-keyword">class</span> <span class="org-type">finite_inplace_stop_callback</span>;

  <span class="org-keyword">template</span> &lt;<span class="org-type">size_t</span> <span class="org-variable-name">N</span>&gt;
  <span class="org-keyword">class</span> <span class="org-type">finite_inplace_stop_source</span> {
   <span class="org-keyword">public</span>:
    <span class="org-function-name">finite_inplace_stop_source</span>() <span class="org-keyword">noexcept</span>;
    ~<span class="org-function-name">finite_inplace_stop_source</span>();

    <span class="org-function-name">finite_inplace_stop_source</span>(<span class="org-keyword">const</span> <span class="org-type">finite_inplace_stop_source</span>&amp;) = <span class="org-keyword">delete</span>;
    <span class="org-function-name">finite_inplace_stop_source</span>(<span class="org-type">finite_inplace_stop_source</span>&amp;&amp;) = <span class="org-keyword">delete</span>;
    <span class="org-type">finite_inplace_stop_source</span>&amp; <span class="org-keyword">operator</span><span class="org-function-name">=</span>(<span class="org-keyword">const</span> <span class="org-type">finite_inplace_stop_source</span>&amp;) = <span class="org-keyword">delete</span>;
    <span class="org-type">finite_inplace_stop_source</span>&amp; <span class="org-keyword">operator</span><span class="org-function-name">=</span>(<span class="org-type">finite_inplace_stop_source</span>&amp;&amp;) = <span class="org-keyword">delete</span>;

    <span class="org-type">bool</span> <span class="org-function-name">stop_possible</span>() <span class="org-keyword">const</span> <span class="org-keyword">noexcept</span>;
    <span class="org-type">bool</span> <span class="org-function-name">stop_requested</span>() <span class="org-keyword">const</span> <span class="org-keyword">noexcept</span>;

    <span class="org-type">bool</span> <span class="org-function-name">request_stop</span>() <span class="org-keyword">noexcept</span>;

    <span class="org-keyword">template</span> &lt;<span class="org-type">size_t</span> <span class="org-variable-name">Idx</span>&gt;
      <span class="org-keyword">requires</span>(Idx &lt; N)
    <span class="org-type">finite_inplace_stop_token</span>&lt;N, Idx&gt; <span class="org-function-name">get_token</span>() <span class="org-keyword">const</span> <span class="org-keyword">noexcept</span>;
  };

  <span class="org-keyword">template</span> &lt;<span class="org-type">size_t</span> <span class="org-variable-name">N</span>, <span class="org-type">size_t</span> <span class="org-variable-name">Idx</span>&gt;
  <span class="org-keyword">class</span> <span class="org-type">finite_inplace_stop_token</span> {
   <span class="org-keyword">public</span>:
    <span class="org-keyword">template</span> &lt;<span class="org-keyword">typename</span> <span class="org-type">CB</span>&gt;
    <span class="org-keyword">using</span> <span class="org-type">callback_type</span> = <span class="org-type">finite_inplace_stop_callback</span>&lt;N, Idx, <span class="org-type">CB</span>&gt;;

    <span class="org-function-name">finite_inplace_stop_token</span>() <span class="org-keyword">noexcept</span>;

    <span class="org-type">bool</span> <span class="org-function-name">stop_possible</span>() <span class="org-keyword">const</span> <span class="org-keyword">noexcept</span>;
    <span class="org-type">bool</span> <span class="org-function-name">stop_requested</span>() <span class="org-keyword">const</span> <span class="org-keyword">noexcept</span>;

    <span class="org-type">bool</span> <span class="org-keyword">operator</span><span class="org-function-name">==</span>(<span class="org-keyword">const</span> <span class="org-type">finite_inplace_stop_token</span>&amp; <span class="org-variable-name">a</span>) <span class="org-keyword">const</span> <span class="org-keyword">noexcept</span> = <span class="org-keyword">default</span>,

   <span class="org-keyword">private</span>:
    <span class="org-type">finite_inplace_stop_source</span>&lt;N&gt;* <span class="org-variable-name">source_</span>;  <span class="org-comment-delimiter">// </span><span class="org-comment">exposition-only</span>
  };

  <span class="org-keyword">template</span> &lt;<span class="org-type">size_t</span> <span class="org-variable-name">N</span>, <span class="org-type">size_t</span> <span class="org-variable-name">Idx</span>, <span class="org-constant">std</span>::<span class="org-type">invocable</span> <span class="org-type">CB</span>&gt;
  <span class="org-keyword">class</span> <span class="org-type">finite_inplace_stop_callback</span> {
   <span class="org-keyword">public</span>:
    <span class="org-keyword">template</span> &lt;<span class="org-keyword">typename</span> <span class="org-type">Init</span>&gt;
      <span class="org-keyword">requires</span> <span class="org-constant">std</span>::<span class="org-type">constructible_from</span>&lt;<span class="org-type">CB</span>, <span class="org-type">Init</span>&gt;
    <span class="org-function-name">finite_inplace_stop_callback</span>(
        <span class="org-type">finite_inplace_stop_token</span>&lt;N, Idx&gt; <span class="org-variable-name">st</span>,
        <span class="org-type">Init</span>&amp;&amp; <span class="org-variable-name">init</span>) <span class="org-keyword">noexcept</span>(<span class="org-constant">std</span>::<span class="org-type">is_nothrow_constructible_v</span>&lt;<span class="org-type">CB</span>, <span class="org-type">Init</span>&gt;);

    ~<span class="org-function-name">finite_inplace_stop_callback</span>();

    <span class="org-function-name">finite_inplace_stop_callback</span>(<span class="org-keyword">const</span> <span class="org-type">finite_inplace_stop_callback</span>&amp;) = <span class="org-keyword">delete</span>;
    <span class="org-function-name">finite_inplace_stop_callback</span>(<span class="org-type">finite_inplace_stop_callback</span>&amp;&amp;) = <span class="org-keyword">delete</span>;
    <span class="org-type">finite_inplace_stop_callback</span>&amp; <span class="org-keyword">operator</span><span class="org-function-name">=</span>(<span class="org-keyword">const</span> <span class="org-type">finite_inplace_stop_callback</span>&amp;) = <span class="org-keyword">delete</span>;
    <span class="org-type">finite_inplace_stop_callback</span>&amp; <span class="org-keyword">operator</span><span class="org-function-name">=</span>(<span class="org-type">finite_inplace_stop_callback</span>&amp;&amp;) = <span class="org-keyword">delete</span>;

  <span class="org-keyword">private</span>:
    <span class="org-type">CB</span> <span class="org-variable-name">cb</span>;                                  <span class="org-comment-delimiter">// </span><span class="org-comment">exposition-only</span>
    <span class="org-type">finite_inplace_stop_source</span>&lt;N&gt;* <span class="org-variable-name">source_</span>; <span class="org-comment-delimiter">// </span><span class="org-comment">exposition-only</span>
  };

  <span class="org-keyword">template</span> &lt;<span class="org-type">size_t</span> <span class="org-variable-name">N</span>, <span class="org-type">size_t</span> <span class="org-variable-name">Idx</span>, <span class="org-keyword">typename</span> <span class="org-type">CB</span>&gt;
  <span class="org-function-name">finite_inplace_stop_callback</span>(<span class="org-type">finite_inplace_stop_token</span>&lt;N, Idx&gt;, <span class="org-type">CB</span>)
    -&gt; <span class="org-type">finite_inplace_stop_callback</span>&lt;N, <span class="org-variable-name">Idx</span>, <span class="org-variable-name">CB</span>&gt;;
}
</pre>
</div>

<p>
An instance of <code>finite_inplace_stop_source&lt;N&gt;</code> has N separate associated <code>finite_inplace_stop_token&lt;N, Idx&gt;</code>
stop-tokens, where <code>Idx</code> is in the range 0 .. N-1.
</p>

<p>
Each <code>finite_inplace_stop_token&lt;N,Idx&gt;</code> from a given stop-source can have at most one
associated <code>finite_inplace_stop_callback&lt;N, Idx&gt;</code> object at a time.
</p>

<p>
When a call to <code>request_stop()</code> is made on the stop-source object, the stop-request is sent to
all of the associated stop-tokens. Further, any stop-callbacks associated with any of the associated
stop-tokens will be invoked.
</p>

<p>
The intent here is that it would be a valid implementation of <code>finite_inplace_stop_source&lt;N&gt;</code> to just
hold an array of <code>single_inplace_stop_source</code> objects and to have <code>request_stop()</code> forward to a call
to <code>request_stop()</code> on all of the <code>single_inplace_stop_source</code> objects. However, a high QoI implementation
may choose to use a more efficient data-structure.
</p>

<p>
There is also the question of whether we should permit this class to be instantiated with a template-parameter
of 0 or not. i.e. is it valid to write <code>finite_inplace_stop_source&lt;0&gt;</code>.
</p>

<p>
Such an object would not have the ability to obtain any associated stop-tokens and therefore would
not have the ability to register any stop-callbacks. Ideally, such a type would compile out to nothing.
</p>

<p>
To enable this optimization, the <code>finite_inplace_stop_source&lt;N&gt;::stop_possible()</code> method returns <code>N &gt;= 1</code>.
This means that it will return <code>false</code> for <code>N == 0</code>, a case when there is no possibility of obtaining
an associated-stop token that could observe a stop-request. Such a stop-source object is already possible
with <code>std::stop_source(std::nostopstate)</code> and is called a <i>disengaged</i> stop-source.
</p>

<p>
This allows implementations to provide a specialization for <code>finite_inplace_stop_source&lt;0&gt;</code> that is an
empty class, rather than having to have an <code>atomic_flag</code> data-member just to make sure that <code>request_stop()</code>
returns <code>true</code> on first invocation and <code>false</code> on subsequent invocations.
</p>

<p>
For example, a possible implementation of this specialization may be:
</p>
<div class="org-src-container">
<pre class="src src-c++"><span class="org-keyword">namespace</span> <span class="org-constant">std</span>
{
  <span class="org-keyword">template</span>&lt;&gt;
  <span class="org-keyword">class</span> <span class="org-type">finite_inplace_stop_source</span>&lt;0&gt; {
  <span class="org-keyword">public</span>:
    finite_inplace_stop_source() <span class="org-keyword">noexcept</span> = <span class="org-keyword">default</span>;

    finite_inplace_stop_source(<span class="org-keyword">const</span> <span class="org-type">finite_inplace_stop_source</span>&amp;) = <span class="org-keyword">delete</span>;
    <span class="org-function-name">finite_inplace_stop_source</span>(<span class="org-type">finite_inplace_stop_source</span>&amp;&amp;) = <span class="org-keyword">delete</span>;
    <span class="org-type">finite_inplace_stop_source</span>&amp; <span class="org-keyword">operator</span><span class="org-function-name">=</span>(<span class="org-keyword">const</span> <span class="org-type">finite_inplace_stop_source</span>&amp;) = <span class="org-keyword">delete</span>;
    <span class="org-type">finite_inplace_stop_source</span>&amp; <span class="org-keyword">operator</span><span class="org-function-name">=</span>(<span class="org-type">finite_inplace_stop_source</span>&amp;&amp;) = <span class="org-keyword">delete</span>;

    <span class="org-type">bool</span> <span class="org-function-name">stop_possible</span>() <span class="org-keyword">const</span> <span class="org-keyword">noexcept</span> { <span class="org-keyword">return</span> <span class="org-constant">false</span>; }
    <span class="org-type">bool</span> <span class="org-function-name">stop_requested</span>() <span class="org-keyword">const</span> <span class="org-keyword">noexcept</span> { <span class="org-keyword">return</span> <span class="org-constant">false</span>; }
    <span class="org-type">bool</span> <span class="org-function-name">request_stop</span>() <span class="org-keyword">noexcept</span> { <span class="org-keyword">return</span> <span class="org-constant">false</span>; }
  };
}
</pre>
</div>
</div>

<div id="outline-container-org4df5145" class="outline-4">
<h4 id="org4df5145"><span class="section-number-4">5.4.1.</span> Tweaks to the <code>stoppable_token</code> and <code>stoppable-source</code> concepts</h4>
<div class="outline-text-4" id="text-5-4-1">
<p>
The nature of the <code>finite_inplace_stop_source&lt;N&gt;</code> type is such that the existing definitions of <code>stoppable-source</code>
and <code>stoppable_token</code> as described in [thread.stoptoken.intro] and [stoptoken.concepts] do not quite fit the
type, yet the <code>finite_inplace_stop_source</code> family of types is something that I think the concepts should
support.
</p>
</div>

<ol class="org-ol">
<li><a id="org7b05433"></a>Relaxing <code>stoppable-source</code><br />
<div class="outline-text-5" id="text-5-4-1-1">
<p>
The exposition-only <code>stoppable-source</code> concept definition currently requires that the type has a
<code>get_token()</code> member-function that returns a <code>stoppable_token</code>. However, the <code>finite_inplace_stop_source&lt;N&gt;</code>
type has a <code>get_token&lt;Idx&gt;()</code> member-function, and thus the <code>finite_inplace_stop_source&lt;N&gt;</code> type would
not satisfy the <code>stoppable-source</code> concept.
</p>

<p>
This paper therefore proposes to remove this syntactic requirement from <code>stoppable-source</code> and to instead
just provide a semantic requirement that there is <i>some</i> way to obtain a <code>stoppable_token</code> that is associated
with the stop-source.
</p>
</div>
</li>

<li><a id="orgc237eb5"></a>Associated stop-callbacks<br />
<div class="outline-text-5" id="text-5-4-1-2">
<p>
One of the other challenges with the current wording is that it refers to stop-tokens, stop-callbacks and
stop-source objects that share a stop-state being "associated" with each other.
</p>

<p>
We have two choices with regards to how to apply this logic to the <code>finite_inplace_stop_source</code> family of
types.
</p>

<p>
The first is to treat a <code>finite_inplace_stop_source&lt;N&gt;</code> object has having N separate stop-states,
with each <code>finite_inplace_stop_token&lt;N, Idx&gt;</code> refering to a particular stop-state. The <code>request_stop()</code>
method on the <code>finite_inplace_stop_source</code> has the semantics of sending a stop-request to each of the
N stop-states.
</p>

<p>
This would make it easier to define the pre-condition necessary on the constructor of a <code>finite_inplace_stop_callback</code>
object, as a stop-callback constructed using <code>source.get_token&lt;0&gt;()</code> would not be considered associated
with a stop-callback constructed using <code>source.get_token&lt;1&gt;()</code>, since they would refer to different
stop-states. We could just place a pre-condition on the stop-callback constructor that requires that
there are no existing stop-callbacks associated with the stop-token.
</p>

<p>
However, if we take this approach, we might want to modify the definition such that a <code>stoppable-source</code>
could potentially be associated with multiple stop-states. It is currently limited to being associated
with at most one stop-state.
</p>

<p>
The second approach is to treat the <code>finite_inplace_stop_source&lt;N&gt;</code> as having a single stop-state
such that all of the stop-callbacks registered using the different stop-token types are associated
with the <code>finite_inplace_stop_source</code> object.
</p>

<p>
This would then require some other way of describing the pre-condition on the stop-callback construction.
For example, we might need to define a pre-condition like "there are no stop-callbacks associated with
the provided stop-token argument which have the same <code>Idx</code> template argument as the stop-callback
currently being constructed.
</p>

<p>
This paper proposes the latter approach as a less-intrusive modification to the status-quo.
</p>
</div>
</li>
</ol>
</div>
</div>

<div id="outline-container-org764a865" class="outline-3">
<h3 id="org764a865"><span class="section-number-3">5.5.</span> Modifications to <code>std::execution</code> sender algorithms</h3>
<div class="outline-text-3" id="text-5-5">
<p>
Of the initial set of sender/receiver algorithms added in P2300R10, there are two algorithms which
are currently specified to construct their own <code>std::inplace_stop_source</code> object which could be
replaced with <code>std::single_inplace_stop_source</code> - <code>std::execution::when_all()</code> and <code>std::execution::split()</code>.
</p>

<p>
Other than the types of stop-tokens returned from queries on the environments passed to child operations,
there should be no changes in user-visible behaviour of these algorithms.
</p>
</div>

<div id="outline-container-orgb5a09bc" class="outline-4">
<h4 id="orgb5a09bc"><span class="section-number-4">5.5.1.</span> Changes to <code>split</code> algorithm</h4>
<div class="outline-text-4" id="text-5-5-1">
<p>
The <code>split</code> algorithm wraps a single child operation in a copyable sender that has shared ownership
semantics of the wrapped child operation.
</p>

<p>
The shared-state is specified to include a stop-source object of type <code>std::inplace_stop_source</code> and
the environment of the receiver connected to the wrapped sender returns an associated <code>std::inplace_stop_token</code>
from the <code>std::get_stop_token</code> query.
</p>

<p>
This paper proposes to just change the specification for this stop-source object to be of type
<code>std::single_inplace_stop_source</code> and for the environment passed to the child operation to return
a <code>std::single_inplace_stop_token</code> object from its <code>std::get_stop_token</code> query.
</p>
</div>
</div>

<div id="outline-container-org274713f" class="outline-4">
<h4 id="org274713f"><span class="section-number-4">5.5.2.</span> Changes to <code>when_all</code> algorithms</h4>
<div class="outline-text-4" id="text-5-5-2">
<p>
The default implementation of the <code>when_all</code> algorithm, and the <code>when_all_into_variant</code> algorithm
by impliciation as it is defined in terms of <code>when_all</code>, are specified to have the operation-state
owning a <code>std::inplace_stop_source</code> which is used to communicate a stop-request to all child
operations of the <code>when_all</code> operation.
</p>

<p>
This paper proposes to replace the <code>std::inplace_stop_source</code> with an instance of
<code>std::finite_inplace_stop_source&lt;N&gt;</code> where N is the number of child senders passed to the
<code>when_all()</code> algorithm.
</p>

<p>
The environment of the receiver connected to I'th child operation would provide an
environment from its <code>get_env()</code> method whose <code>std::get_stop_token</code> query returned
the result of calling the <code>get_token&lt;I&gt;()</code> member-function on the stop-source object.
</p>
</div>
</div>

<div id="outline-container-org3467e81" class="outline-4">
<h4 id="org3467e81"><span class="section-number-4">5.5.3.</span> Alternative: Leave the choice of stop-token to be implementation-defined</h4>
<div class="outline-text-4" id="text-5-5-3">
<p>
An alternative approach that could be considered for these algorithms is to leave the stop-token
type passed via the environment to child operations as unspecified and leave it up to
implementers to choose the most appropriate stop-token/stop-source type.
</p>

<p>
If we decide that we don't want to add the new <code>single_inplace_stop_token</code> and
<code>finite_inplace_stop_token</code> facilities to the standard library, but still want to
apply the semantic constraints on <code>stoppable_token</code> then this approach could let
implementations define their own internal stop-token types equivalent to the
types proposed here and use them instead of <code>inplace_stop_token</code>.
</p>

<p>
However, if we decide to add the <code>single_inplace_stop_token</code> and <code>finite_inplace_stop_token</code>
types into the standard library, then I don't see any significant downsides to specifying
that the <code>split</code> and <code>when_all</code> algorithms are defined in terms of them.
</p>
</div>
</div>
</div>
</div>

<div id="outline-container-org1bf6085" class="outline-2">
<h2 id="org1bf6085"><span class="section-number-2">6.</span> Design Discussion</h2>
<div class="outline-text-2" id="text-6">
</div>
<div id="outline-container-org581a728" class="outline-3">
<h3 id="org581a728"><span class="section-number-3">6.1.</span> Performance Benefits</h3>
<div class="outline-text-3" id="text-6-1">
</div>
<div id="outline-container-org3c1a2ce" class="outline-4">
<h4 id="org3c1a2ce"><span class="section-number-4">6.1.1.</span> Cost of <code>inplace_stop_token</code></h4>
<div class="outline-text-4" id="text-6-1-1">
<p>
The existing <code>inplace_stop_token</code> facility added by P2300R10 allows registering
an arbitrary number of stop-callbacks without requiring any dynamic memory allocations
through the use of an intrusive linked list of stop-callbacks.
</p>

<p>
The maintenance of the linked list of stop-callbacks requires synchronisation to ensure
that multiple threads concurrently trying to register/deregister/invoke callbacks do
not introduce data-races.
</p>

<p>
Here, we explore the cost of certain operations on typical implementations of this
class so we can understand the potential savings by using a simpler data-structure.
</p>

<p>
I will be using the reference implementation from the <a href="https://github.com/NVIDIA/stdexec">stdexec</a> library for this analysis.
</p>
</div>

<ol class="org-ol">
<li><a id="orgaf7594e"></a>Sizes of data-structure<br />
<div class="outline-text-5" id="text-6-1-1-1">
<p>
The <code>inplace_stop_source</code> structure needs to store the following data members:
</p>
<ul class="org-ul">
<li><code>std::atomic&lt;uint8_t&gt;</code> - Synchronization state / flags.</li>
<li><code>stop_callback_base*</code> - A pointer to first item in linked list of registered callbacks.</li>
<li><code>std::thread::id</code> - thread-id of the thread that first called <code>request_stop()</code>
This is needed to determine whether to block inside stop-callback deregistration
or not.</li>
</ul>

<p>
If these data-members are appropriately laid out, then on 64-bit platforms this structure
will usually be either 16 bytes or 24 bytes in size, depending on the size of your
platform's <code>thread::id</code> type.
</p>

<p>
The <code>inplace_stop_callback</code> data-structure needs to store the following:
</p>
<ul class="org-ul">
<li>pointer to the inplace<sub>stop</sub><sub>source</sub> (so it can deregister itself)</li>
<li>pointers to next/prev elements in the intrusive linked list</li>
<li>a function-pointer for the callback to run</li>
<li>synchronization state needed to signal when the callback has finished executing</li>
<li>an additional pointer to a flag that is used to determine whether the stop-callback
has been deregistered during the execution of the stop-callback</li>
<li>The user-provided callback object itself</li>
</ul>

<p>
The net result is that the stop-callback object typically has a size of 6 pointers plus
the size of the user's callback, which often itself contains a single pointer. So a total
size of 7 pointers or 56 bytes.
</p>
</div>
</li>

<li><a id="orgf4bfa82"></a>Cost of operations<br />
<div class="outline-text-5" id="text-6-1-1-2">
<p>
The <code>inplace_stop_token</code> class permits multiple threads to concurrently register/deregister
callbacks. Therefore the registration/deregistration requires synchronization to ensure
there are no data-races. As the synchronization operations are generally the most expensive
part of registering a stop-callback, we will largely focus on the number of synchronization
operations required.
</p>
</div>

<ol class="org-ol">
<li><a id="org7e8c86e"></a>Registering a stop-callback<br />
<div class="outline-text-6" id="text-6-1-1-2-1">
<p>
The set of steps performed during construction of an <code>inplace_stop_callback</code> object is as follows:
</p>

<ul class="org-ul">
<li>Tries to acquire a lock on the stop-source using a spin-lock
<ul class="org-ul">
<li>Enters a loop that performs a weak compare-exchange, trying to set the 'locked' flag from 0-&gt;1.</li>
<li>If it sees the 'locked' flag is already 1 then calls <code>wait()</code> on the atomic to allow the
current thread to block efficiently until the synchronization state changes (hopefully setting
the 'locked' flag to 0 or, alternatively, setting the 'stop-requested' flag to 1.</li>
<li>If it sees that there has already been a stop-request before the lock could be acquired
then it abandons the attempt to lock and just invokes the stop-callback it was trying
to register inline.</li>
</ul></li>
<li>Inserts the stop-callback object into the linked list</li>
<li>Unlocks the spin-lock and notifies any threads that might be waiting to acquire the lock.</li>
</ul>

<p>
If the operation is uncontended then the best-case execution is:
</p>
<ul class="org-ul">
<li>atomic load relaxed</li>
<li>atomic compare-exchange weak acq-rel</li>
<li>insert linked-list node</li>
<li>atomic store release</li>
<li>atomic notify</li>
</ul>

<p>
If the registration is contended and a thread is unable to acquire the lock immediately, then this becomes:
</p>
<ul class="org-ul">
<li>atomic load relaxed</li>
<li>repeat until successful
<ul class="org-ul">
<li>repeat until lock is available
<ul class="org-ul">
<li>atomic wait relaxed</li>
<li>atomic load relaxed</li>
</ul></li>
<li>atomic compare exchange weak acq-rel</li>
</ul></li>
<li>insert linked-list node</li>
<li>atomic store release</li>
<li>atomic notify</li>
</ul>

<p>
It is worth noting that the use of a spin-lock here with a single locked-flag does not guarantee
fairness among threads. A thread may spin in the above loops for an unbounded amount of time waiting
to acquire the lock if the lock is highly contended.
</p>

<p>
Other strategies could potentially be used here (e.g. ticket-based locks) which could improve fairness,
although at the cost of additional synchronization.
</p>
</div>
</li>

<li><a id="org6746dff"></a>Deregistering a stop-callback<br />
<div class="outline-text-6" id="text-6-1-1-2-2">
<p>
When a stop-callback object is destroyed, the stop-callback needs to be removed from the list
of registered callbacks.
</p>

<p>
This requires acquiring a lock on the spin-lock, similar to that required during callback registration
but without the early-out if a stop-request has been made.
</p>

<p>
However, it also needs to handle the case where the stop-callback has been run on another thread,
in which case we need to wait until the other thread indicates the callback has finished executing.
</p>

<p>
In the case where a stop-request has not been made, and there is no contention, this operation performs:
</p>
<ul class="org-ul">
<li>atomic load relaxed</li>
<li>atomic compare-exchange weak acq-rel</li>
<li>remove node from linked-list</li>
<li>atomic store release</li>
<li>atomic notify</li>
</ul>

<p>
In the case that a stop-request has not been made, but there is contention,
the operation may perform:
</p>
<ul class="org-ul">
<li>atomic load relaxed</li>
<li>repeat until successful
<ul class="org-ul">
<li>repeat until 'locked' flag is not set
<ul class="org-ul">
<li>atomic wait relaxed</li>
<li>atomic load relaxed</li>
</ul></li>
<li>atomic compare-exchange weak acq-rel</li>
</ul></li>
<li>remove node from linked-list</li>
<li>atomic store release</li>
<li>atomic notify</li>
</ul>

<p>
In the case that a stop-request has been made on another thread and the callback
invocation has already completed, the operations will include:
</p>
<ul class="org-ul">
<li>atomic load relaxed (read 'locked' flag = 0)</li>
<li>atomic compare-exchange weak acq-rel (set 'locked' flag = 1)</li>
<li>read node state - notice that callback has already been removed</li>
<li>atomic store release (set 'locked' flag = 0)</li>
<li>atomic notify</li>
<li>atomic load acquire (read 'callback-completed' flag = 1)</li>
</ul>

<p>
If there has been a stop-request on another thread and the callback invocation
has not yet returned then the operations will include:
</p>
<ul class="org-ul">
<li>atomic load relaxed (read 'locked' flag = 0)</li>
<li>atomic compare-exchange weak acq-rel (set 'locked' flag = 1)</li>
<li>read node state - notice that callback has already been removed</li>
<li>atomic store release (set 'locked' flag = 0)</li>
<li>atomic notify</li>
<li>atomic load acquire (read 'callback-completed' flag - read 0)</li>
<li>atomic wait acquire (until 'callback-completed' flag is non-zero)</li>
</ul>
</div>
</li>

<li><a id="org08bace9"></a>Calling <code>request_stop()</code><br />
<div class="outline-text-6" id="text-6-1-1-2-3">
<p>
The call to <code>inplace_stop_source::request_stop()</code> needs to first atomically acquire
the lock and set the 'stop-requested' flag. If successful, then the calling thread
is responsible for invoking the registered stop-callbacks.
As the lock must not be held during the invocation of the callbacks to prevent
potential deadlocks, the spin-lock must be released and reacquired for each
registered callback.
</p>

<p>
Further, for each callback, as the deregistration of the callback can potentially
be executing concurrently on another thread and become blocked waiting for the
invocation of the callback to complete, the thread calling <code>request_stop()</code> must
perform an atomic store release and an atomic notify after the callback returns
in order to unblock a concurrent callback deregistration (if one exists).
</p>

<p>
If there are no stop-callbacks registered (the best case scenario) then the
<code>request_stop()</code> operation will perform:
</p>
<ul class="org-ul">
<li>atomic load relaxed (read 'locked' flag = 0, 'stop-requested' flag = 0)</li>
<li>atomic compare-exchange weak acq-rel (set 'locked' flag = 1, 'stop-requested' flag = 1)</li>
<li>read list of callbacks and find empty list</li>
<li>store release (set 'locked' flag = 0)</li>
</ul>

<p>
If there are stop-callbacks registered then the <code>request_stop()</code> operation
will perform the following (assuming no contention):
</p>
<ul class="org-ul">
<li>atomic load relaxed (read 'locked' flag = 0, 'stop-requested' flag = 0)</li>
<li>atomic compare-exchange weak acq rel (set 'locked' flag = 1, 'stop-requested' flag = 1)</li>
<li>while callback list is non-empty
<ul class="org-ul">
<li>remove next callback</li>
<li>atomic store release (set 'locked' flag = 0)</li>
<li>atomic notify</li>
<li>invoke callback</li>
<li>atomic load relaxed (read 'locked' flag = 0)</li>
<li>atomic compare-exchange weak acq rel ( set 'locked' flag = 1)</li>
</ul></li>
<li>atomic store release (set 'locked' flag = 0)</li>
</ul>

<p>
If there is contention on the stop-source lock, then each of the lock-acquisition
operations will enter a spin-loop with atomic-wait backoff.
</p>
</div>
</li>
</ol>
</li>
</ol>
</div>



<div id="outline-container-org9023fbb" class="outline-4">
<h4 id="org9023fbb"><span class="section-number-4">6.1.2.</span> Cost of <code>single_inplace_stop_token</code></h4>
<div class="outline-text-4" id="text-6-1-2">
</div>
<ol class="org-ol">
<li><a id="orgeb6a8c3"></a>Sizes of data-structure<br />
<div class="outline-text-5" id="text-6-1-2-1">
<p>
The <code>single_inplace_stop_source</code> structure only needs to store a single
atomic pointer plus a thread-id.
</p>

<p>
On 64-bit platforms, this structure will typically be 16 bytes in size.
On some platforms this is the same as <code>inplace_stop_source</code> and on others
is 8-bytes smaller.
</p>

<p>
The <code>single_inplace_stop_callback</code> structure needs to store a pointer to
the stop-source and also a function-pointer to invoke when the stop-callback
is invoked, along with any state that the user's callback object requires,
which is often also a single pointer.
</p>

<p>
On 64-bit platforms this will be 16 bytes plus the size of the user's callback.
So in most cases, 24-bytes per stop-callback. Compare this to 56-bytes per stop-callback
for the reference implementation of <code>std::inplace_stop_callback</code>.
</p>

<p>
It is worth noting, however, that for <code>std::inplace_stop_callback</code> we can have many
stop-callback objects for a single <code>std::inplace_stop_source</code>.
Whereas for <code>std::single_inplace_stop_callback</code>, each callback object needs to be
associated with a different stop-source object.
</p>

<p>
Consider, for example, a <code>when_all()</code> algorithm that has 10 child operations,
where each child registers a single stop-callback in the child operation-state.
</p>

<p>
An implmentation that uses a single <code>std::inplace_stop_source</code> in the <code>when_all()</code> operation-state
would use 24 bytes in the parent operation-state and 56 bytes for a stop-callback in each of the
10x child operation-states - a total of 584 bytes.
</p>

<p>
An implementation that uses 10x <code>std::single_inplace_stop_source</code> objects in the <code>when_all()</code> operation
state would use 10x 16 bytes in the parent operation state and 24 bytes in each of the 10x child
operation-states - a total of 400 bytes.
</p>

<p>
The size usage can be further reduced by using the proposed <code>std::finite_inplace_stop_source</code>
which supports N separate stop-tokens, each with their own stop-callback slot in the stop-source.
In this case, we could store the <code>std::thread::id</code> of the thread requesting stop once and then only
require an extra pointer for each callback-slot in the stop-source.
i.e. it would be (N + 1) pointers in size instead of 2 * N pointers in size.
</p>

<p>
In this case, the storage needed for the <code>when_all()</code> algorithm with 10x children could be
further reduced to 8 + 10 * 8 + 10 * 24 = 328 bytes, compared to the current implementation
in terms of <code>std::inplace_stop_source</code> which takes 584 bytes - a saving of 256 bytes.
</p>
</div>
</li>

<li><a id="orgaf27e49"></a>Cost of operations<br />
<ol class="org-ol">
<li><a id="orgf7ddc15"></a>Registering a stop-callback<br />
<div class="outline-text-6" id="text-6-1-2-2-1">
<p>
Registering a stop-callback involves an atomic load-relaxed to see if a stop-request has
already been made and if not then a single compare-exchange strong to try to install the
stop-callback. The compare-exchange will only fail if there has been a stop-request issued
and so it does not need to be performed in a loop.
</p>

<p>
Thus, if a stop-request has been issued already then the operations are:
</p>
<ul class="org-ul">
<li>atomic load acquire (reads a state that indicates stop-requested)</li>
</ul>

<p>
If stop-request has not been issued then the operations will be:
</p>
<ul class="org-ul">
<li>atomic load acquire (reads a state that indicates no stop-request has been made)</li>
<li>atomic compare-exchange strong release (stores the stop-callback address)</li>
</ul>
</div>
</li>

<li><a id="orgc31f7c9"></a>Deregistering a stop-callback<br />
<div class="outline-text-6" id="text-6-1-2-2-2">
<p>
Deregistering the stop-callback involves trying to compare-exchange the atomic pointer from
pointing to the registered callback back to nullptr. If successful, then the deregistration
has won the race and the callback is successfully deregistered. Otherwise, if the compare-exchange
fails then this indicates that there was a stop-request which either has or is in the process
of invoking the callback.
</p>

<p>
In this case, the atomic pointer will have been set to the 'stop-requested' value before
the callback is invoked and is set to the 'stop-requested-callback-done' value after the
callback returns. The deregistration just needs to wait until the atomic pointer value is no
longer equal to the 'stop-requested' value, which can only happen when the thread calling
<code>request_stop()</code> sets it to 'stop-requested-callback-done' value.
</p>

<p>
We also need to handle the case where the callback is being deregistered from within the
stop-callback, as in this case we don't want to block until the stop-callback returns as
this would deadlock. We detect this case by comparing the current thread to the thread-id
of the thread calling <code>request_stop()</code> (which is written to
</p>

<p>
So, in the case that there has not yet been a stop-request, the cost is:
</p>
<ul class="org-ul">
<li>1x successful compare-exchange strong w/ relaxed memory order</li>
</ul>

<p>
In the case that there has been a stop-request and the callback has already finished
executing, we have:
</p>
<ul class="org-ul">
<li>1x unsuccessful compare-exchange strong w/ acquire memory order</li>
</ul>

<p>
In the case that there has been a stop-request and the callback has not yet finished
executing on another thread, we have:
</p>
<ul class="org-ul">
<li>1x unsuccessful compare-exchange strong w/ acquire memory order</li>
<li>relaxed load of thread-id from stop-state</li>
<li>comparison of this thread-id to <code>std::this_thread::get_id()</code></li>
<li><code>atomic::wait()</code> on pointer with acquire memory order to wait for value to change
from the 'stop-requested' value.</li>
</ul>

<p>
If the deregistration occurs on the same thread as the thread calling <code>request_stop()</code>
then the operations are the same, we just skip the <code>atomic::wait()</code> call.
</p>
</div>
</li>

<li><a id="org15f5f11"></a>Calling <code>request_stop()</code><br />
<div class="outline-text-6" id="text-6-1-2-2-3">
<p>
The <code>request_stop()</code> implementation of <code>single_inplace_stop_source</code> tries to atomically
compare-exchange the pointer to be the 'stop-requested' value, as long as it does not
already have a value that indicates a stop-request has been made.
</p>

<p>
Since the current value of the atomic pointer can potentially be changed concurrently
by another thread registering/deregistering a stop-callback, it needs to perform a
compare-exchange in a loop until either the compare-exchange succeeds or it observes
that another thread as made a stop-request.
</p>

<p>
If the compare-exchange is successful, it then inspect the previous value.
If there was a stop-callback registered, then it set the thread-id field to the current
thread-id and invokes the stop-callback. When the stop-callback invocation returns,
it writes the 'stop-requested-callback-done' value to the atomic pointer and notifies
any waiting threads.
</p>

<p>
So if there is no stop-callback registered (and no contention) then we have:
</p>
<ul class="org-ul">
<li>1x atomic load acquire - reads null pointer</li>
<li>1x successful compare-exchange weak w/ acq-rel memory order - stores 'stop-requested' value</li>
</ul>

<p>
If there is a stop-callback registered (and no contention) the we have:
</p>
<ul class="org-ul">
<li>atomic load acquire - reads non-null pointer</li>
<li>1x successful compare-exchange weak w/ acq-rel memory order - stores 'stop-requested' value</li>
<li>atomic store relaxed of current thread-id</li>
<li>invoke callback</li>
<li>atomic store release - stores 'stop-requested-callback-done' value</li>
<li>atomic notify</li>
</ul>
</div>
</li>

<li><a id="orgf74f13f"></a>A "mostly" lock-free implementation<br />
<div class="outline-text-6" id="text-6-1-2-2-4">
<p>
One important thing to note about the implementation of <code>single_inplace_stop_source</code> compared with
<code>inplace_stop_source</code> is that, for most of the operations, the implementation is now lock-free.
</p>

<p>
This means that one thread will never be waiting on some other thread to make forward progress
in order to complete its operation.
</p>

<p>
The one exception to this is where a call to <code>request_stop()</code> is racing with a concurrent call
to deregister a stop-callback on another thread. In this case, the call to deregister the
stop-callback may need to block until the invocation of the stop-callback returns to avoid
destroying the stop-callback object while it is still being used.
</p>
</div>
</li>
</ol>
</li>
</ol>
</div>

<div id="outline-container-org9721b1f" class="outline-4">
<h4 id="org9721b1f"><span class="section-number-4">6.1.3.</span> Cost of <code>finite_inplace_stop_token</code></h4>
<div class="outline-text-4" id="text-6-1-3">
<p>
The cost of <code>finite_inplace_stop_source&lt;N&gt;</code> is similar to that of an array of N <code>single_inplace_stop_source</code>
objects with a couple of minor differences.
</p>
</div>

<ol class="org-ol">
<li><a id="org737209f"></a>Differences in data-structure size<br />
<div class="outline-text-5" id="text-6-1-3-1">
<p>
The size of a <code>finite_inplace_stop_source&lt;N&gt;</code> can be up to (N-1) pointers smaller than that of an
array of N <code>single_inplace_stop_source</code> objects, reducing the number of cache lines required by
the operation-state.
</p>

<p>
This reduction in storage usage has one potential down-side in that it can potentially increase the
amount of false-sharing involved when multiple threads are each trying to register stop-callbacks
to the different stop-tokens concurrently. With <code>single_inplace_stop_source</code> there would be 4x
stop-states sharing a typical 64-byte cache line. Whereas with <code>finite_inplace_stop_source</code> the
storage for up to 8 stop-state may be grouped together in a single cache-line, increasing the
potential for false-sharing.
</p>

<p>
Whether this is an issue in-practice will depend on the use-case and whether, in practice, there will
be multiple threads concurrently trying to register/unregister stop-callbacks associated with the
same stop-source.
</p>

<p>
In most cases it is expected that using less storage will result in overall better performance.
However, there are pathological cases where overheads due to false-sharing can be significant.
It is worth noting that both approaches can be subject to false-sharing overheads to some degree.
</p>
</div>
</li>

<li><a id="org42e46b4"></a>Difference in operation-cost<br />
<div class="outline-text-5" id="text-6-1-3-2">
<p>
The cost of registering/deregistering stop-callbacks will be largely the same as that of
<code>single_inplace_stop_source</code>.
</p>

<p>
The main performance difference will be in terms of the cost of the <code>request_stop()</code> operation.
</p>

<p>
With <code>finite_inplace_stop_source::request_stop()</code>, the implementation can do two things more
efficiently compared to an array of <code>single_inplace_stop_source</code>:
</p>

<ol class="org-ol">
<li>It only needs to call <code>std::this_thread::get_id()</code> and store in the data-structure once
for all stop-tokens, rather than once for each stop-token.</li>
<li>Once it has decided the race of which thread called <code>request_stop()</code> first using a compare-exchange,
deciding the race between <code>request_stop()</code> and registration/deregistration of a stop-callback
can be done with an unconditional atomic exchange instead of an atomic compare-exchange loop,
which at least on some platforms, is slightly more efficient.</li>
</ol>

<p>
For measurements of the performance differences see <a href="#org37557e8">Appendix A - Benchmarks</a>. 
</p>
</div>
</li>
</ol>
</div>
</div>


<div id="outline-container-org67070e4" class="outline-3">
<h3 id="org67070e4"><span class="section-number-3">6.2.</span> Performance vs Safety Tradeoff</h3>
<div class="outline-text-3" id="text-6-2">
<p>
The proposed <code>std::single_inplace_stop_token</code> type adds extra pre-conditions to the
construction of <code>std::single_inplace_stop_callback</code> objects which are not there for
the <code>std::inplace_stop_callback</code> type.
</p>

<p>
This means that users of this stop-token type need to be more careful about its usage
to ensure that only a single associated stop-callback exists at a time as violating
this pre-condition can lead to undefined behaviour.
</p>

<p>
It also means that sender-algorithm implementers need to be more careful when forwarding
a <code>get_stop_token</code> query to multiple child operations.
</p>

<p>
It is worth noting that in most cases where an algorithm that has multiple child
operations with overlapping asynchronous operation lifetimes it will often want to
explicitly control the cancellation behaviour.
For example, by sending a stop-request to the other child operations when it receives
a particular result from one of the children.
</p>

<p>
This tends to be an inherent part of designing a concurrent algorithm, and implementations
that want to control cancellation will tend to need to introduce a new stop-source and
register a stop-callback with the parent environment's stop-token that forwards to this
new stop-source anyway, thus avoiding the problem of passing a single-callback-stop-token
to multiple, concurrent child operations.
</p>

<p>
Authors of new concurrent sender algorithms tend to need to be aware of lots of the lifetime
constraints anyway and implementing them is an advanced use of the sender/receiver framework.
</p>

<p>
The constraints that this paper proposes to put on usage of stop-tokens in a sender/receiver
context should not affect most users, who we expect to be largely composing existing senders.
As long as sender-algorithms abide by the restrictions, composing those algorithms together
should be transaprent.
</p>

<p>
The benefits to users are that when they use sender-algorithms that take advantage of the
single-callback constraints, their code uses less memory (operation-state objects are smaller)
and runs more efficiently (uses less CPU-time).
</p>
</div>
</div>

<div id="outline-container-orgf6e522e" class="outline-3">
<h3 id="orgf6e522e"><span class="section-number-3">6.3.</span> Usage in <code>task</code> coroutines</h3>
<div class="outline-text-3" id="text-6-3">
<p>
While there is not yet a concrete proposal for a <code>task</code> coroutine type that integrates with
sender/receiver, something that will need to be considered in such a proposal is what
stop-token type the coroutine will provide in the environment connected to awaited senders.
</p>

<p>
On the one hand, a coroutine can only await a single child operation at a time, and so
if the stop-token is only ever propagated to child operations by <code>co_await</code> expressions
then it seems reasonable to have the <code>task</code> coroutine provide a <code>std::single_inplace_stop_token</code>
rather than a <code>std::inplace_stop_token</code> so that we can take advantage of the better
performance.
</p>

<p>
However, one of the use-cases that is not uncommon in <code>task</code> coroutines is to use the
<code>read_env()</code> algorithm to obtain the current stop-token from the environment, and then to
construct a stop-callback as a local variable in the coroutine.
</p>

<p>
For example: A coroutine that calls a low-level, cancellable OS API, using the coroutine's stop-token natively
</p>
<div class="org-src-container">
<pre class="src src-c++"><span class="org-type">void</span> <span class="org-function-name">os_operation_start</span>(<span class="org-type">void</span>(*<span class="org-function-name">callback</span>)(<span class="org-type">int</span>, <span class="org-type">void</span>*), <span class="org-type">void</span>* <span class="org-variable-name">data</span>);
<span class="org-type">void</span> <span class="org-function-name">os_operation_cancel</span>(<span class="org-type">void</span>* <span class="org-variable-name">data</span>);

<span class="org-type">task</span>&lt;<span class="org-type">int</span>&gt; <span class="org-function-name">dequeue</span>() {
  <span class="org-keyword">struct</span> <span class="org-type">state_t</span> {
    <span class="org-type">async_manual_reset_event</span> <span class="org-variable-name">event</span>;
    <span class="org-constant">std</span>::<span class="org-type">optional</span>&lt;<span class="org-type">int</span>&gt; <span class="org-variable-name">result</span>;
  };

  <span class="org-type">state_t</span> <span class="org-variable-name">state</span>;

  <span class="org-keyword">auto</span> <span class="org-variable-name">on_stop_request</span> = [&amp;] <span class="org-keyword">noexcept</span> { os_operation_cancel(&amp;state); };

  <span class="org-keyword">auto</span> <span class="org-variable-name">on_complete</span> = [](<span class="org-type">int</span> <span class="org-variable-name">result</span>, <span class="org-type">void</span>* <span class="org-variable-name">data</span>) <span class="org-keyword">noexcept</span> {
    <span class="org-keyword">auto</span>&amp; <span class="org-variable-name">state</span> = *<span class="org-keyword">static_cast</span>&lt;<span class="org-type">state_t</span>*&gt;(data);
    state.result = result;
    state.event.set();
  };

  <span class="org-constant">std</span>::<span class="org-type">stoppable_token</span> <span class="org-keyword">auto</span> <span class="org-variable-name">st</span> =
    <span class="org-keyword">co_await</span> <span class="org-constant">std</span>::<span class="org-constant">execution</span>::read_env(<span class="org-constant">std</span>::<span class="org-constant">execution</span>::get_stop_token);

  os_operation_start(on_complete, &amp;state);

  {
    <span class="org-comment-delimiter">// </span><span class="org-comment">Register a stop-callback that will cancel the os_operation if a</span>
    <span class="org-comment-delimiter">// </span><span class="org-comment">stop-request comes in before the operation completes.</span>
    <span class="org-constant">std</span>::<span class="org-type">stop_callback_for_t</span>&lt;<span class="org-keyword">decltype</span>(st), <span class="org-keyword">decltype</span>(on_stop_request)&gt; <span class="org-variable-name">cb</span>{st, on_stop_request};

    <span class="org-keyword">co_await</span> state.event.wait();
  }

  <span class="org-keyword">if</span> (st.stop_requested()) {
    <span class="org-comment-delimiter">// </span><span class="org-comment">Complete with 'stopped' result</span>
    <span class="org-keyword">co_await</span> <span class="org-constant">std</span>::<span class="org-constant">execution</span>::just_stopped{};
  }

  <span class="org-keyword">co_return</span> result.value();
}
</pre>
</div>

<p>
In this example, the coroutine obtains the current stop-token using the <code>read_env</code> algorithm
and then constructs a stop-callback associated with that stop-token. Then, while this stop-callback
object is still alive, the coroutine then awaits on the <code>async_manual_reset_event</code> to suspend
the coroutine until the callback passed to <code>os_operation_start()</code> is invoked.
</p>

<p>
However, the coroutine will also need to pass an environment with a stop-token down
to all <code>co_await</code> expressions so that stop-requests can be transparently propagated
through coroutines to child operations. However, the child operation might then go
on to register its own stop-callback to that stop-token.
</p>

<p>
If the <code>task</code> coroutine were to use a <code>single_inplace_stop_token</code> for its stop-token then
this would run into potential problems with trying to attach multiple stop-callbacks.
</p>

<p>
Therefore, it's likely that we either need to ban such usage within a coroutine, or we
need the coroutine's stop-token type to be chosen to allow multiple stop-callbacks to
be attached. e.g. by using <code>inplace_stop_token</code> instead.
</p>

<p>
This will mean that when you try to compose a <code>task</code> object as a sender into algorithms
like <code>when_all</code> that there will need to be an adapter that adapts between the
<code>finite_inplace_stop_token</code>, passed by <code>when_all</code> in the environment to the <code>task</code>,
and a new <code>inplace_stop_source</code> that can produce an <code>inplace_stop_token</code> that can propagate
through the chain of coroutines.
</p>

<p>
This is an example of a situation where trying to use a more efficient stop-token type
can actually end up hurting performance in cases where you needed an <code>inplace_stop_token</code>
anyway.
</p>
</div>
</div>

<div id="outline-container-orgb3ffe60" class="outline-3">
<h3 id="orgb3ffe60"><span class="section-number-3">6.4.</span> Do we still need <code>inplace_stop_token</code>?</h3>
<div class="outline-text-3" id="text-6-4">
<p>
Yes, we still need to keep the <code>inplace_stop_token</code> family of types.
</p>

<p>
While it is still a generally useful facility, there are two main use-cases for it in the
facilities targeting C++26.
</p>

<p>
The first is the use by the yet-to-be-proposed <code>task</code> type mentioned above.
</p>

<p>
The second is the use by a cancellable <code>counting_scope</code> type proposed in <a href="https://wg21.link/P3149">P3149</a>.
</p>

<p>
In this case, a cancellable <code>counting_scope</code> may spawn an unbounded number of tasks running
within the async-scope. If you want to cancel all of the operations in that scope then you
need some way to send a stop-request to all of those spawned operations. The easiest way to
do that is to have a single <code>inplace_stop_source</code> and then to just pass an associated
<code>inplace_stop_token</code> in the environment passed to the spawned operation.
</p>
</div>
</div>
</div>

<div id="outline-container-org74803d2" class="outline-2">
<h2 id="org74803d2"><span class="section-number-2">7.</span> Proposed Wording</h2>
<div class="outline-text-2" id="text-7">
</div>
<div id="outline-container-org04c6dc7" class="outline-3">
<h3 id="org04c6dc7"><span class="section-number-3">7.1.</span> Changes to stop token concepts</h3>
<div class="outline-text-3" id="text-7-1">
<p>
Modify [thread.stoptoken.intro] as follows:
</p>
<div class="wording" style="counter-set: paragraph 3">
<p class="numbered">Callbacks registered via an object whose type models <code><expos>stoppable-callback-for</expos></code> are
called when a stop request is first made by any associated <code><expos>stoppable-source</expos></code> object.</p>
<p class="numbered">The types <code>stop_source</code> and <code>stop_token</code> and the class template <code>stop_callback</code>
implement the semantics of shared ownership of a stop state. The last remaning owner of the stop state automatically
releases the resources associated with stop state.</p>
<p class="numbered">An object of type <code>inplace_stop_source</code><ins>, <code>single_inplace_stop_source</code>, or a specialization
of <code>finite_inplace_stop_source</code></ins> is the sole owner of its stop state.
An object of type <code>inplace_stop_token</code><ins>, <code>single_inplace_stop_token</code>,</ins> or of a specialization
of the class template<ins>s</ins> <code>inplace_stop_callback</code><ins>, <code>single_inplace_stop_callback</code>,
<code>finite_inplace_stop_token</code>, or <code>finite_inplace_stop_callback</code></ins> does not participate in
ownership of its associated stop state.</p>
<p>[<i>Note</i>: They are for use when all uses of the associated token and callback objects are known
to nest within the lifetime of the <code>inplace_stop_source</code> object. — <i>end note</i>]</p>
</pre>
</div>

<p>
Modify [thread.stoptoken.syn] as follows:
</p>
<div class="wording">
<pre>
namespace std {
  // [stoptoken.concepts], stop token concepts
  template&lt;class CallbackFn, class Token, class Initializer = CallbackFn>
    concept <expos>stoppable-callback-for</expos> = <i>see below</i>;           // <i>exposition only</i>

  template&lt;class Token>
    concept stoppable_token = <i>see below</i>;

  template&lt;class Token>
    concept unstoppable_token = <i>see below</i>;

  template&lt;class Source>
    concept <expos>stoppable-source</expos> = <i>see below</i>;                 // <i>exposition only</i>

  // [stoptoken], class stop_token
  class stop_token;

  // [stopsource], class stop_source
  class stop_source;

  // no-shared-stop-state indicator
  struct nostopstate_t {
    explicit nostopstate_t() = default;
  };
  inline constexpr nostopstate_t nostopstate{};

  // [stopcallback], class template stop_callback
  template&lt;class Callback>
    class stop_callback;

  // [stoptoken.never], class never_stop_token
  class never_stop_token;

  // [stoptoken.inplace], class inplace_stop_token
  class inplace_stop_token;

  // [stopsource.inplace], class inplace_stop_source
  class inplace_stop_source;

  // [stopcallback.inplace], class template inplace_stop_callback
  template&lt;class CallbackFn>
    class inplace_stop_callback;

<ins>  // [stoptoken.single], class single_inplace_stop_token
  class single_inplace_stop_token;

  // [stopsource.single], class single_inplace_stop_source
  class single_inplace_stop_source;

  // [stopcallback.single], class template single_inplace_stop_callback
  template&lt;class Callback>
    class single_inplace_stop_callback;

  // [stoptoken.finite], class template finite_inplace_stop_token
  template&lt;size_t N, size_t Idx>
    class finite_inplace_stop_token;

  // [stopsource.finite], class template finite_inplace_stop_source
  template&lt;size_t N>
    class finite_inplace_stop_source;

  // [stopcallback.finite], class template finite_inplace_stop_callback
  template&lt;size_t N, size_t Idx, class Callback>
    class finite_inplace_stop_callback;</ins>

  template&lt;class T, class CallbackFn>
    using stop_callback_for_t = T::template callback_type&lt;CallbackFn>;
}
</pre>
</div>

<p>
Modify [stoptoken.concepts] as follows:
</p>
<div class="wording">
<p class="numbered">The exposition-only <code><expos>stoppable-callback-for</expos></code> concept checks for a callback
compatible with a given <code>Token</code> type.<br/>
<pre style="left-margin: 20px">
template&lt;class CallbackFn, class Token, class Initializer = CallbackFn>
  concept <expos>stoppable-callback-for</expos> =                               // <i>exposition only</i>
    invocable&lt;CallbackFn> &amp;&amp;
    constructible_from&lt;CallbackFn, Initializer> &amp;&amp;
    requires { typename stop_callback_for_t&lt;Token, CallbackFn>; } &amp;&amp;
    constructible_from&lt;stop_callback_for_t&lt;Token, CallbackFn>, const Token&amp;, Initializer>;
</pre></p>
<p class="numbered">Let <code>t</code> and <code>u</code> be distinct, valid objects of type <code>Token</code> that
reference the same logical stop state; let <code>init</code> be an expression such that <code>same_as&lt;decltype(init), Initializer></code>
is <code>true</code>; and let <code>SCB</code> denote the type <code>stop_callback_for_t&lt;Token, CallbackFn></code>.</p>
<p class="numbered">The concept <code><expos>stoppable-callback-for</expos>&lt;CallbackFn, Token, Initializer></code> is modeled only if:</p>
<ul class="numbered">
<li>The following concepts are modeled:
  <ul>
    <li><code>constructible_from&lt;SCB, Token, Initializer></code></li>
    <li><code>constructible_from&lt;SCB, Token&amp;, Initializer></code></li>
    <li><code>constructible_from&lt;SCB, const Token, Initializer></code></li>
  </ul></p>
</li>
<li><p>An object of type <code>SCB</code> has an associated callback function of type <code>CallbackFn</code>.
Let <code>scb</code> be an object of type <code>SCB</code> and let <code>callback_fn</code> denote <code>scb</code>'s
associated callback function. Direct-non-list-initializing <code>scb</code> from arguments <code>t</code> and
<code>init</code> shall execute a <i>stoppable callback registration</i> as follows:
<ul>
  <li>If <code>t.stop_possible()</code> is <code>true</code>:
    <ul>
      <li><code>callback_fn</code> shall be direct-initialized with <code>init</code>.</li>
      <li>Construction of <code>scb</code> shallonly throw exceptions thrown by the initialization of <code>callback_fn</code> from <code>init</code>.</li>
      <li>The callback invocation <code>std::forward&lt;CallbackFn>(callback_fn)()</code> shall be registered
with <code>t</code>'s associated stop state as follows:
        <ul>
          <li>If <code>t.stop_requested()</code> evaluates to <code>false</code> at the time of registration,
the callback registration is added to the stop state's list of callbacks such that <code>std::forward&lt;CallbackFn>(callback_fn)()</code>
is evaluated if a stop-request is made on the stop state.</li>
          <li>Otherwise, <code>std::forward&lt;CallbackFn>(callback_fn)()</code> shall be immediately evaluated on
the thread executing <code>scb</code>'s constructor, and the callback invocation shall not be added to
the list of callback invocations.</li>
        </ul>
<p>If the callback invocation was added to stop state's list of callbacks, <code>scb</code> shall be associated
with the stop state.</p></li>
    </ul>
  </li>
  <li>[<i>Note</i>: If <code>t.stop_possible()</code> is <code>false</code>, there is no requirement that the initialization
of <code>scb</code> causes the initialization of <code>callback_fn</code>. — <i>end note</i>]</li>
  <li><ins>[<i>Note</i>: Types modelling <code><expos>stoppable-callback-for</expos>&lt;CallbackFn, Token, Initializer></code>
may place preconditions on its stoppable callback registration that limits the number of active stop callback registrations
associated with a stop state ([stopcallback.single], [stopcallback.finite]). — <i>end note</i>]</ins></li>
</ul></li>
<li>Destruction of <code>scb</code> shall execute a <i>stoppable callback deregistration</i> as follows (in order):
<ul>
<li>If the constructor of <code>scb</code> did not register a callback invocation with <code>t</code>'s stop state, then the stoppable
callback deregistration shall have no effect other than destroying <code>callback_fn</code> if it was constructed.</li>
<li>Otherwise, the invocation of <code>callback_fn</code> shall be removed from the associated stop state.</li>
<li>If <code>callback_fn</code> is concurrently executing on another thread, then the stoppable callback deregistration
shall  block ([defns.block]) until the invocation of <code>callback_fn</code> returns such that the return from the
invocation of <code>callback_fn</code> strongly happens before ([intro.races]) the destruction of <code>callback_fn</code>.</li>
<li>If <code>callback_fn</code> is executing on the current thread, then the destructor shall not block waiting
for the return from the invocation of <code>callback_fn</code>.</li>
<li>A stoppable callback deregistration shall not block on the completion of the invocation of some other callback
registered with the same logical stop state.</li>
<li>The stoppable callback deregistration shall destroy <code>callback_fn</code>.
</ul>
</li>
<li><ins>An <i>active stop callback registration</i> is a callback invocation that has been added to a stop state's
list of callbacks by a stoppable callback registration and that has not yet been removed from the stop state's
list of callbacks by a stoppable callback deregistration. An active stop callback registration is associated with
the stop state that it was registered with.</ins>
</ul>
</div>

<p>
and as follows:
</p>

<div class="wording" style="counter-set: paragraph 8">
<p class="numbered">An object whose type models the exposition-only <code><expos>stoppable-source</expos></code>
concept can be queried whether stop has been requested (<code>stop_requested</code>) and whether stop is possible (<code>stop_possible</code>).
It is a factory for associated stop tokens<del> (<code>get_token</code>)</del>, and a stop request can be made on it (<code>request_stop</code>).
It maintains a list of registered stop callback invocations that it executes when a stop request is first made.</p>
<pre style="left-margin: 20px">
template&lt;class Source&gt;
  concept <expos>stoppable-source</expos> =                                    <expos>// exposition only</expos>
    requires (Source&amp; src, const Source csrc) {         <expos>// see implicit expression variations ([concepts.equality])</expos>
      <del>{ csrc.get_token() } -> stoppable_token;</del>
      { csrc.stop_possible() } noexcept -> same_as&lt;bool&gt;;
      { csrc.stop_requested() } noexcept -> same_as&lt;bool&gt;;
      { src.request_stop() } -> same_as&lt;bool&gt;;
    };
</pre>
<p class="numbered">
<ins>It is unspecified how to obtain a <code>stoppable_token</code> associated with a given <code><expos>stoppable-source</expos></code> object
or whether it is possible to obtain such a <code>stoppable_token</code>.</ins>
</p>
<p class="numbered">
An object whose type models <code><expos>stoppable-source</expos></code> has at most one associated logical stop state.
If it has no associated stop state, it is said to be disengaged.
Let <code>s</code> be an object whose type models <code><expos>stoppable-source</expos></code> and that is disengaged.
<code>s.stop_possible()</code> and <code>s.stop_requested()</code> shall be <code>false</code>.
</p>
<p class="numbered">
Let <code>t</code> be an object whose type models <code><expos>stoppable-source</expos></code>.
If <code>t</code> is disengaged, <del><code>t.get_token()</code></del><ins>obtaining a stop token from <code>t</code></ins> shall <del>return</del><ins>result in</ins> a disengaged stop token;
otherwise, it shall <del>return</del><ins>result in</ins> a stop token that is associated with the stop state of <code>t</code>.
</p>
</div>
</div>
</div>

<div id="outline-container-orgaaccff7" class="outline-3">
<h3 id="orgaaccff7"><span class="section-number-3">7.2.</span> Single-callback stop-token wording</h3>
<div class="outline-text-3" id="text-7-2">
<p>
Insert the the following sections after [stopcallback.inplace]
</p>

<div class="wording">
<div class="section">
<h3>Class <code>single_inplace_stop_token</code> [stoptoken.single]</h3>
<div class="section">
<h4>General [stoptoken.single.general]</h4>
<p class="numbered">The class <code>single_inplace_stop_token</code> models the concept <code>stoppable_token</code>.
It references the stop state of its associated <code>single_inplace_stop_source</code> object ([stopsource.single]), if any.</p>
<pre style="left-margin: 20px">
namespace std {
  class single_inplace_stop_token {
  public:
    template&lt;class Callback&gt;
      using callback_type = single_inplace_stop_callback&lt;Callback&gt;;

    single_inplace_stop_token() = default;
    bool operator==(const single_inplace_stop_token&) const = default;

    // [stoptoken.single.mem], member functions
    bool stop_requested() const noexcept;
    bool stop_possible() const noexcept;
    void swap(single_inplace_stop_token&) noexcept;

  private:
    const single_inplace_stop_source* <expos>stop-source</expos> = nullptr;        <expos>// exposition only</expos>
  };
}
</pre>
</div>
<div class="section">
<h4>Member functions [stoptoken.single.mem]</h4>
<pre>
void swap(single_inplace_stop_token&amp; rhs) noexcept;
</pre>
<p class="numbered"><i>Effects</i>: Exchanges the values of <code><expos>stop-source</expos></code> and <code>rhs.<expos>stop-source</expos></code>.</p>
<pre>
bool stop_requested() const noexcept;
</pre>
<p class="numbered"><i>Effects</i>: Equivalent to:
<pre style="margin-left: 20px">
return <expos>stop-source</expos> != nullptr &amp;&amp; <expos>stop-source</expos>-&gt;stop_requested();
</pre></p>
<p class="numbered">[<i>Note</i>: As specified in [basic.life], the behavior of <code>stop_requested</code> is undefined unless the call strongly happens before the start of the destructor of the associated <code>single_inplace_stop_source</code> object, if any.
— end note]</p>
<pre>
bool stop_possible() const noexcept;
</pre>
<p class="numbered"><i>Effects</i>: Equivalent to:
<pre style="margin-left: 20px">
return <expos>stop-source</expos> != nullptr;
</pre>
</p>
</div>
</div>
<div class="section">
<h3>Class <code>single_inplace_stop_source</code> [stopsource.single]</h3>
<div class="section">
<h4>General [stopsource.single.general]</h4>
<p class="numbered">The class <code>single_inplace_stop_source</code> models <code><expos>stoppable-source</expos></code>.
An object of type <code>single_inplace_stop_source</code> shall have at most one associated stop callback at a time.</p>
<pre style="margin-left: 20px">
namespace std {
  class single_inplace_stop_source {
  public:
    // [stopsource.single.cons], constructors and destructor
    constexpr single_inplace_stop_source() noexcept;
    ~single_inplace_stop_source();

    single_inplace_stop_source(single_inplace_stop_source&&) = delete;
    single_inplace_stop_source(const single_inplace_stop_source&) = delete;
    single_inplace_stop_source& operator=(single_inplace_stop_source&&) = delete;
    single_inplace_stop_source& operator=(const single_inplace_stop_source&) = delete;

    // [stopsource.single.mem], member functions
    constexpr single_inplace_stop_token get_token() const noexcept;
    static constexpr bool stop_possible() noexcept { return true; }
    bool stop_requested() const noexcept;
    bool request_stop() noexcept;
  };
</pre>
</div>
<div class="section">
<h4>Constructors [stopsource.single.cons]</h4>
<pre>
constexpr single_inplace_stop_source() noexcept;
</pre>
<p class="numbered"><i>Effects</i>: Initializes a new stop state inside <code>*this</code>.</p>
<p class="numbered"><i>Postconditions</i>: <code>stop_requested()</code> is <code>false</code>.</p>
<pre>
~single_inplace_stop_source();
</pre>
<p class="numbered"><i>Preconditions</i>: There are no active stop callback registrations associated with <code>*this</code>.</p>
</div>
<div class="section">
<h4>Member functions [stopsource.single.mem]</h4>
<pre>
constexpr single_inplace_stop_token get_token() const noexcept;
</pre>
<p class="numbered"><i>Returns</i>: A new associated <code>single_inplace_stop_token</code> object whose <code><expos>stop-source</expos></code> member is equal to <code>this</code>.</p>
<pre>
bool stop_requested() const noexcept;
</pre>
<p class="numbered"><i>Returns</i>: <code>true</code> if the stop state inside <code>*this</code> has received a stop request; otherwise <code>false</code>.</p>
<pre>
bool request_stop() noexcept;
</pre>
<p class="numbered"><i>Effects</i>: Executes a stop request operation ([stoptoken.concepts]).</p>
<p class="numbered"><i>Postconditions</i>: <code>stop_requested()</code> is <code>true</code>.</p>
<p class="numbered"><i>Returns</i>: <code>true</code> if this was the first call to <code>request_stop</code>; otherwise <code>false</code>.</p>
</div>
</div>
<div class="section">
<h3>Class template <code>single_inplace_stop_callback</code> [stopcallback.single]</h3>
<div class="section">
<h4>General [stopcallback.single.general]</h4>
<pre>
namespace std {
  template&lt;class CallbackFn&gt;
    class single_inplace_stop_callback {
    public:
      // [stopcallback.inplace.cons], constructors and destructor
      template&lt;typename Initializer&gt;
        explicit single_inplace_stop_callback(single_inplace_stop_token st, Initializer&amp;&amp; init)
          noexcept(is_nothrow_constructible_v&lt;CallbackFn, Initializer&gt;);
      ~single_inplace_stop_callback();

      singe_inplace_stop_callback(single_inplace_stop_callback&&) = delete;
      singe_inplace_stop_callback(const single_inplace_stop_callback&) = delete;
      singe_inplace_stop_callback& operator=(single_inplace_stop_callback&&) = delete;
      singe_inplace_stop_callback& operator=(const single_inplace_stop_callback&) = delete;

    private:
      CallbackFn <expos>callback-fn</expos>;                  <expos>// exposition only</expos>
    };

  template&lt;class CallbackFn&gt;
    single_inplace_stop_callback(single_inplace_stop_token, CallbackFn)
      -> single_inplace_stop_callback&lt;CallbackFn&gt;;
}
</pre>
<p class="numbered"><i>Mandates</i>: <code>CallbackFn</code> satisfies both <code>invocable</code> and <code>destructible</code>.</p>
<p class="numbered"><i>Remarks</i>: For a type <code>Initializer</code>, if <code><expos>stoppable-callback-for</expos>&lt;CallbackFn, single_inplace_stop_token, Initializer&gt;</code>
is satisfied, then <code><expos>stoppable-callback-for</expos>&lt;CallbackFn, single_inplace_stop_token, Initializer&gt;</code> is modeled.
For a <code>single_inplace_stop_callback&lt;CallbackFn&gt;</code> object, the exposition-only <code><expos>callback-fn</expos></code>
member is its associated callback function ([stoptoken.concepts]).</p>
</div>
<div class="section">
<h4>Constructors and destructor [stopcallback.single.cons]</h4>
<pre>
template&lt;class Initializer&gt;
  explicit single_inplace_stop_callback(single_inplace_stop_token st, Initializer&amp;&amp; init)
    noexcept(is_nothrow_constructible_v&lt;CallbackFn, Initializer&gt;);
</pre>
<p class="numbered"><i>Constraints</i>: <code>constructible_from&lt;CallbackFn, Initializer&gt;</code> is satisfied.</p>
<p class="numbered"><i>Preconditions</i>: There are no active stop callback registrations associated with <code>st</code>.</p>
<p class="numbered"><i>Effects</i>: Initializes <code><expos>callback-fn</expos></code> with <code>std::forward&lt;Initializer&gt;(init)</code>
and executes a stoppable callback registration ([stoptoken.concepts]).</p>
<pre>
~single_inplace_stop_callback();
</pre>
<p class="numbered"><i>Effects</i>: Executes a stoppable callback deregistration ([stoptoken.concepts]) of <code>*this</code>.</p>
</div>
</div>
</div>
</div>
</div>

<div id="outline-container-org9165719" class="outline-3">
<h3 id="org9165719"><span class="section-number-3">7.3.</span> Finite-callback stop token wording</h3>
<div class="outline-text-3" id="text-7-3">
<p>
Insert the following sections after the [stopcallback.single] section above.
</p>

<div class="wording">
<div class="section">
<h3>Class template <code>finite_inplace_stop_token</code> [stoptoken.finite]</h3>
<div class="section">
<h4>General [stoptoken.finite.general]</h4>
<p class="numbered">Specializations of the class template <code>finite_inplace_stop_token</code> model the concept <code>stoppable_token</code>.
Objects of types that are a specialization of <code>finite_inplace_stop_token</code> reference the stop state of its associated <code>finite_inplace_stop_source</code> object ([stopsource.finite]), if any.
An object of type <code>finite_inplace_stop_token&lt;N, Index&gt;</code> can be used to register a stop callback in the <code>Index</code>th stop callback slot
([stopsource.finite.general]) of the associated stop state.</p>
<pre style="left-margin: 20px">
namespace std {
  template&lt;size_t N, size_t Index&gt;
  class finite_inplace_stop_token {
  public:
    template&lt;class Callback&gt;
      using callback_type = finite_inplace_stop_callback&lt;N, Index, Callback&gt;;

    finite_inplace_stop_token() = default;
    bool operator==(const finite_inplace_stop_token&) const = default;

    // [stoptoken.finite.mem], member functions
    bool stop_requested() const noexcept;
    bool stop_possible() const noexcept;
    void swap(finite_inplace_stop_token&) noexcept;

  private:
    const finite_inplace_stop_source&lt;N&gt;* <expos>stop-source</expos> = nullptr;        <expos>// exposition only</expos>
  };
}
</pre>
<p class="numbered"><i>Mandates</i>: <code>Index &lt; N</code> is <code>true</code>.</p>
</div>
<div class="section">
<h4>Member functions [stoptoken.finite.mem]</h4>
<pre>
void swap(finite_inplace_stop_token&amp; rhs) noexcept;
</pre>
<p class="numbered"><i>Effects</i>: Exchanges the values of <code><expos>stop-source</expos></code> and <code>rhs.<expos>stop-source</expos></code>.</p>
<pre>
bool stop_requested() const noexcept;
</pre>
<p class="numbered"><i>Effects</i>: Equivalent to:
<pre style="margin-left: 20px">
return <expos>stop-source</expos> != nullptr &amp;&amp; <expos>stop-source</expos>-&gt;stop_requested();
</pre></p>
<p class="numbered">[<i>Note</i>: As specified in [basic.life], the behavior of <code>stop_requested</code> is undefined unless the call strongly happens before the start of the destructor of the associated <code>finite_inplace_stop_source</code> object, if any.
— end note]</p>
<pre>
bool stop_possible() const noexcept;
</pre>
<p class="numbered"><i>Effects</i>: Equivalent to:
<pre style="margin-left: 20px">
return <expos>stop-source</expos> != nullptr;
</pre>
</p>
</div>
</div>
<div class="section">
<h3>Class template <code>finite_inplace_stop_source</code> [stopsource.finite]</h3>
<div class="section">
<h4>General [stopsource.finite.general]</h4>
<p class="numbered">Specializations of the class template <code>finite_inplace_stop_source</code> model <code><expos>stoppable-source</expos></code>.
The stop state of an object of type <code>finite_inplace_stop_source&lt;N&gt;</code>, has <code>N</code> <i>stop callback slots</i>.
Each stop callback slot permits at most one associated active stop callback registration ([stoptoken.concepts]).
A stop callback slot is identified by an index, <code>Index</code>, in the range <code>0, 1, ..., N-1</code>,
and has a corresponding <code>stoppable_token</code> type, <code>finite_inplace_stop_token&lt;N, Index&gt;</code>.
Performing a stoppable callback registration ([stoptoken.concepts]) using a <code>finite_inplace_stop_token&lt;N, Index&gt;</code>,
obtained by calling <code>get_token&lt;Index&gt;()</code>, registers a stop callback in the <code>Index</code>th stop callback slot
of the associated stop state.</p>
<pre style="margin-left: 20px">
namespace std {
  template&lt;size_t N&gt;
    class finite_inplace_stop_source {
    public:
      // [stopsource.finite.cons], constructors and destructor
      constexpr finite_inplace_stop_source() noexcept;
      ~finite_inplace_stop_source();

      finite_inplace_stop_source(finite_inplace_stop_source&&) = delete;
      finite_inplace_stop_source(const finite_inplace_stop_source&) = delete;
      finite_inplace_stop_source& operator=(finite_inplace_stop_source&&) = delete;
      finite_inplace_stop_source& operator=(const finite_inplace_stop_source&) = delete;

      // [stopsource.finite.mem], member functions
      template&lt;size_t Index&gt;
        constexpr finite_inplace_stop_token&lt;N, Index&gt; get_token() const noexcept;
      static constexpr bool stop_possible() noexcept { return N != 0; }
      bool stop_requested() const noexcept;
      bool request_stop() noexcept;
    };
</pre>
</div>
<div class="section">
<h4>Constructors [stopsource.finite.cons]</h4>
<pre>
constexpr finite_inplace_stop_source() noexcept;
</pre>
<p class="numbered"><i>Effects</i>: Initializes a new stop state inside <code>*this</code>.</p>
<p class="numbered"><i>Postconditions</i>: <code>stop_requested()</code> is <code>false</code>.</p>
<pre>
~finite_inplace_stop_source();
</pre>
<p class="numbered"><i>Preconditions</i>: There are no active associated stop callback registrations.</p>
</div>
<div class="section">
<h4>Member functions [stopsource.finite.mem]</h4>
<pre>
template&lt;size_t Index&gt;
constexpr finite_inplace_stop_token&lt;N, Index&gt; get_token() const noexcept;
</pre>
<p class="numbered"><i>Returns</i>: A new associated <code>finite_inplace_stop_token&lt;N, Index&gt;</code> object whose <code><expos>stop-source</expos></code> member is equal to <code>this</code>.</p>
<pre>
bool stop_requested() const noexcept;
</pre>
<p class="numbered"><i>Returns</i>: <code>true</code> if the stop state inside <code>*this</code> has received a stop request; otherwise <code>false</code>.</p>
<pre>
bool request_stop() noexcept;
</pre>
<p class="numbered"><i>Effects</i>: Executes a stop request operation ([stoptoken.concepts]).</p>
<p class="numbered"><i>Postconditions</i>: <code>stop_requested()</code> is <code>true</code>.</p>
<p class="numbered"><i>Returns</i>: <code>true</code> if this was the first call to <code>request_stop</code>; otherwise <code>false</code>.</p>
</div>
</div>
<div class="section">
<h3>Class template <code>finite_inplace_stop_callback</code> [stopcallback.finite]</h3>
<div class="section">
<h4>General [stopcallback.finite.general]</h4>
<pre>
namespace std {
  template&lt;size_t N, size_t, Index, class CallbackFn&gt;
    class finite_inplace_stop_callback {
    public:
      // [stopcallback.inplace.cons], constructors and destructor
      template&lt;typename Initializer&gt;
        explicit finite_inplace_stop_callback(finite_inplace_stop_token&lt;N, Index&gt; st, Initializer&amp;&amp; init)
          noexcept(is_nothrow_constructible_v&lt;CallbackFn, Initializer&gt;);
      ~finite_inplace_stop_callback();

      finite_inplace_stop_callback(finite_inplace_stop_callback&&) = delete;
      finite_inplace_stop_callback(const finite_inplace_stop_callback&) = delete;
      finite_inplace_stop_callback& operator=(finite_inplace_stop_callback&&) = delete;
      finite_inplace_stop_callback& operator=(const finite_inplace_stop_callback&) = delete;

    private:
      CallbackFn <expos>callback-fn</expos>;                  <expos>// exposition only</expos>
    };

  template&lt;size_t N, size_t Index, class CallbackFn&gt;
    finite_inplace_stop_callback(finite_inplace_stop_token&lt;N, Index&gt;, CallbackFn)
      -> finite_inplace_stop_callback&lt;N, Index, CallbackFn&gt;;
}
</pre>
<p class="numbered"><i>Mandates</i>: <code>Index &lt; N</code> is <code>true</code> and <code>CallbackFn</code> satisfies both <code>invocable</code> and <code>destructible</code>.</p>
<p class="numbered"><i>Remarks</i>: For type <code>Initializer</code>, if <code><expos>stoppable-callback-for</expos>&lt;CallbackFn, finite_inplace_stop_token&lt;N, Index&gt;, Initializer&gt;</code>
is satisfied, then <code><expos>stoppable-callback-for</expos>&lt;CallbackFn, finite_inplace_stop_token&lt;N, Index&gt;, Initializer&gt;</code> is modeled.
For a <code>finite_inplace_stop_callback&lt;N, Index, CallbackFn&gt;</code> object, the exposition-only <code><expos>callback-fn</expos></code>
member is its associated callback function ([stoptoken.concepts]).</p>
</div>
<div class="section">
<h4>Constructors and destructor [stopcallback.finite.cons]</h4>
<pre>
template&lt;class Initializer&gt;
  explicit finite_inplace_stop_callback(finite_inplace_stop_token&lt;N, Index&gt; st, Initializer&amp;&amp; init)
    noexcept(is_nothrow_constructible_v&lt;CallbackFn, Initializer&gt;);
</pre>
<p class="numbered"><i>Constraints</i>: <code>constructible_from&lt;CallbackFn, Initializer&gt;</code> is satisfied.</p>
<p class="numbered"><i>Preconditions</i>: Either <code>st</code> is disengaged ([stoptoken.concepts]),
or there are no active stop callback registrations associated with <code>st</code> for the <code>Index</code>th stop callback slot of <code>st</code>'s associated stop state.</p>
<p class="numbered"><i>Effects</i>: Initializes <code><expos>callback-fn</expos></code> with <code>std::forward&lt;Initializer&gt;(init)</code>
and executes a stoppable callback registration ([stoptoken.concepts]).
If a stop callback is added to <code>st</code>'s associated stop state, it is added to the <code>Index</code>th stop callback slot of that stop state.</p>
<pre>
~inplace_stop_callback();
</pre>
<p class="numbered"><i>Effects</i>: Executes a stoppable callback deregistration ([stoptoken.concepts]) of <code>*this</code>.</p>
</div>
</div>
</div>
</div>
</div>

<div id="outline-container-org8dbcda5" class="outline-3">
<h3 id="org8dbcda5"><span class="section-number-3">7.4.</span> Changes to <code>receiver</code> concept</h3>
<div class="outline-text-3" id="text-7-4">
<p>
Modify [exec.recv.concepts] p3 as follows:
</p>

<div class="wording" style="counter-set: paragraph 2">
<h4>Receiver concepts [exec.recv.concepts]</h4>
<p>...</p>
<p class="numbered">Let <code>rcvr</code> be a receiver and let <code>op_state</code> be an operation state associated with
ana asynchronous operation created by connecting <code>rcvr</code> with a sender. Let <code>token</code> be a stop token
equal to <code>get_stop_token(get_env(rcvr))</code>. <code>token</code> shall remain valid for the duration of the asynchronous
operation's lifetime ([exec.async.ops]). <ins><code>token</code> shall allow creating at least one associated active stop callback
registration ([stoptoken.concepts]) at a time during <code>op_state</code>'s asynchronous operation's lifetime.</ins></p>
<p>[<i>Note</i>: This means that, unless it knows about further guarantees provided by the type of <code>rcvr</code>, the
implementation of <code>op_state</code> cannot use <code>token</code> after it executes a completion operation. This also implies
that<del> any stop callbacks registered on a token must be destroyed before the invocation of the completion operation.</del><ins>:</ins>
<ul class="numbered">
<li><ins>any stop callbacks registered on a token by <code>op_state</code> must be created after the beginning of the invocation of <code>start(op_state)</code>
and destroyed before the invocation of the completion operation; and</ins></li>
<li><ins>unless it knows about further guarantees provided by the type of <code>token</code>, the implementation of <code>op_state</code>
must limit the maximum number of active stop callback registrations associated with <code>token</code> that it creates at any point
in time to at most one.</ins></li>
</ul>
— <i>end note</i>]</p>
</div>
</div>
</div>

<div id="outline-container-orgab1aee9" class="outline-3">
<h3 id="orgab1aee9"><span class="section-number-3">7.5.</span> Changes to <code>when_all</code></h3>
<div class="outline-text-3" id="text-7-5">
<p>
Modify [exec.when.all] p6 as follows:
</p>

<div class="wording" style="counter-set: paragraph 5">
<p class="numbered">
The member <code><expos>impls-for</expos>&lt;when_all_t&gt;::<expos>get-env</expos></code> is initialized with
a callable object equivalent to the following lambda expression:
<pre>
[]&lt;class State, class Rcvr<ins>, size_t Idx</ins>&gt;(<del>auto&amp;&amp;</del><ins>integral_constant&lt;size_t, Idx&gt;</ins>, State&amp; state, const Receiver&amp; rcvr) noexcept {
  return <expos>JOIN-ENV</expos>(
    <expos>MAKE-ENV</expos>(get_stop_token, state.<expos>stop_src</expos>.<ins>template </ins>get_token<ins>&lt;Idx&gt;</ins>()), get_env(rcvr));
}
</pre>
</p>
</div>

<p>
Modify [exec.when.all] p7 as follows:
</p>

<div class="wording" style="counter-set: paragraph 6">
<p class="numbered">The member <code><expos>impls-for</expos>&lt;when_all_t&gt;::<expos>get-state</expos></code>
is initialized with a callable object equivalent to the following lambda expression:
<pre style="margin-left: 20px">
[]&lt;class Sndr, class Rcvr&gt;(Sndr&amp;&amp; sndr, Rcvr&amp; rcvr) noexcept(<i>e</i>) -&gt; decltype(<i>e</i>) {
  return <i>e</i>;
}
</pre>
where <code><i>e</i></code> is the expression
<pre style="margin-left: 20px">
std::forward&lt;Sndr&gt;(sndr).apply(<expos>make-state</expos>&lt;Rcvr&gt;())
</pre>
and where <code><expos>make-state</expos></code> is the following exposition-only class template:
<pre style="margin-left: 20x">
template&lt;class Sndr, class Env&gt;
concept <expos>max-1-sender-in</expos> = sender_in&lt;Sndr, Env&gt; &amp;&amp;                <expos>// exposition only</expos>
  (tuple_size_v&lt;value_types_of_t&lt;Sndr, Env, tuple, tuple&gt;&gt; &lt;= 1);

enum class <expos>disposition</expos> { <expos>started</expos>, <expos>error</expos>, <expos>stopped</expos> };             <expos>// exposition only</expos>

template&lt;class Rcvr&gt;
struct <expos>make-state</expos> {
  template&lt;<expos>max-1-sender-in</expos>&lt;env_of_t&lt;Rcvr&gt;&gt;... Sndrs&gt;
  auto operator()(auto, auto, Sndrs&amp;&amp;... sndrs) const {
    using values_tuple = <i>see below</i>;
    using errors_variant = <i>see below</i>;
    <ins>using stop_source = finite_inplace_stop_source&lt;sizeof...(sndrs)&gt;;
    struct <expos>forward-stop-request</expos> {
      stop_source& <expos>stop-source</expos>;
      void operator()() noexcept { <expos>stop-source</expos>.request_stop(); }
    };</ins>
    using stop_callback = stop_callback_for_t&lt;stop_token_of_t&lt;env_of_t&lt;Rcvr&gt;&gt;, <expos><del>on</del><ins>forward</ins>-stop-request</expos>&gt;;

    struct <expos>state-type</expos> {
      void <expos>arrive</expos>(Rcvr&amp; rcvr) noexcept {                        <expos>// exposition only</expos>
        if (0 == --<expos>count</expos>) {
          <expos>complete</expos>(rcvr);
        }
      }

      <ins>void <expos>register-stop-callback</expos>(stop_token_of_t&lt;env_of_t&lt;Rcvr&gt&gt; st) noexcept { <expos>// exposition only</expos>
        <expos>on_stop</expos>.emplace(std::move(st), <expos>forward-stop-request</expos>{<expos>stop_src</expos>});
      }</ins>

      void <expos>complete</expos>(Rcvr& rcvr) noexcept;                       <expos>// exposition only</expos>

      atomic&lt;size_t&gt; <expos>count</expos>{sizeof...(sndrs)};                   <expos>// exposition only</expos>
      <del>inplace_</del>stop_source <expos>stop_src</expos>{};                           <expos>// exposition only</expos>
      atomic&lt;<expos>disposition</expos>&gt; <expos>disp</expos>{<expos>disposition</expos>::<expos>started</expos>};           <expos>// exposition only</expos>
      errors_variant <expos>errors</expos>{};                                  <expos>// exposition only</expos>
      values_tuple <expos>values</expos>{};                                    <expos>// exposition only</expos>
      optional&lt;stop_callback&gt; <expos>on_stop</expos>{nullopt};                 <expos>// exposition only</expos>
    };

    return <expos>state-type</expos>{};
  }
};
</pre></p>
</div>

<p>
Modify [exec.when.all] p12 as follows:
</p>

<div class="wording" style="counter-set: paragraph 11">
<p class="numbered">
The member <code><expos>impls-for</expos>&lt;when_all_t&gt;::<expos>start</expos> is initialized with a callable
object equivalent to the following lambda expression:
<pre style="margin-left: 20px">
[]&lt;class State, class Rcvr, class... Ops&gt;(
    State&amp; state, Rcvr&amp; rcvr, Ops&amp;... ops) noexcept -&gt; void {
  <del>state.<expos>on_stop</expos>.emplace(
  </del><ins>state.<expos>register-stop-callback</expos>(</ins>get_stop_token(get_env(rcvr))<ins>);</ins><del>,
    <expos>on-stop-request</expos>{state.<expos>stop_src</expos>});</del>
  if (state.<expos>stop_src</expos>.stop_requested()) {
    state.<expos>on_stop</expos>.reset();
    set_stopped(std::move(rcvr));
  } else {
    (start(ops), ...);
  }
}
</pre></p>
</div>
</div>
</div>


<div id="outline-container-org8b6ce0c" class="outline-3">
<h3 id="org8b6ce0c"><span class="section-number-3">7.6.</span> Changes to <code>&lt;version&gt;</code> header</h3>
<div class="outline-text-3" id="text-7-6">
<p>
Modify [version.syn] as follows, replacing <code>XXXXXX</code> with the date of acceptance of this paper.
</p>

<div class="wording">
<h3>Header <code>&lt;version&gt;</code> synopsis [version.syn]</h3>
<p>
<pre>
...
#define __cpp_lib_semaphore                         201907L // also in &lt;semaphore&gt;
#define __cpp_lib_senders                           <del>202406L</del><ins>XXXXXXL</ins> // also in &lt;execution&gt;
#define __cpp_lib_shared_mutex                      201505L // also in &lt;shared_mutex&gt;
...
</pre></p>
</div>
</div>
</div>
</div>

<div id="outline-container-org37557e8" class="outline-2">
<h2 id="org37557e8"><span class="section-number-2">8.</span> Appendix A - Benchmarks</h2>
<div class="outline-text-2" id="text-8">
<p>
This section contains some micro-benchmarks that look at relative performance of different
kinds of stop-source data-structures proposed in this paper compared to the existing
<code>inplace_stop_source</code> type.
</p>

<p>
As with all micro-benchmarks, the results should be taken with a large grain of salt.
I have added some interpretation comments where I thought appropriate.
</p>

<p>
The source and raw output of the benchmarks can be found at <a href="https://gist.github.com/lewissbaker/d95b3a001650c509570af4968b0d00c5">https://gist.github.com/lewissbaker/d95b3a001650c509570af4968b0d00c5</a>
</p>

<p>
Benchmarks were evaluated on an AMD Ryzen 5950X using Clang 19 with compile-flags <code>-std=c++2c -O2 -stdlib=libc++ -DNDEBUG=1</code>.
</p>

<p>
With all of these benchmarks, the operation is performed 100k times per run.
e.g. registering + unregistering a single callback 100k times.
So, if you divide the time for the benchmark by 100k to get the per-operation times.
</p>

<p>
And then the run is performed multiple times and statistics gathered on the different runs.
For single-threaded benchmarks, the shortest time is reported.
For multi-threaded benchmarks, where the results are more variable, the min/max/p50/avg values are all reported to give a better picture of the distribution.
</p>
</div>

<div id="outline-container-org74901d4" class="outline-3">
<h3 id="org74901d4"><span class="section-number-3">8.1.</span> Register/unregister stop-callbacks single-threaded</h3>
<div class="outline-text-3" id="text-8-1">
<p>
This benchmark tests the performance of registering and unregistering a single callback onto a single stop-source 100k times.
</p>

<table border="2" cellspacing="0" cellpadding="6" rules="groups" frame="hsides">


<colgroup>
<col  class="org-left" />

<col  class="org-left" />
</colgroup>
<thead>
<tr>
<th scope="col" class="org-left">Data-structure</th>
<th scope="col" class="org-left">Elapsed Time</th>
</tr>
</thead>
<tbody>
<tr>
<td class="org-left"><code>inplace_stop_source</code></td>
<td class="org-left">788us</td>
</tr>

<tr>
<td class="org-left"><code>single_inplace_stop_source</code></td>
<td class="org-left">533us</td>
</tr>
</tbody>
</table>
</div>
</div>

<div id="outline-container-orgd031387" class="outline-3">
<h3 id="orgd031387"><span class="section-number-3">8.2.</span> Call <code>request_stop()</code> with no callbacks</h3>
<div class="outline-text-3" id="text-8-2">
<p>
This benchmark tests the performance of calling <code>request_stop()</code> on various stop-source configurations when there are
no associated stop-callbacks. This looks at situations where you might have a <code>when_all()</code> of multiple child operations,
none of which are actually cancellable and thus none of them register any stop-callbacks, so that we can see the relative
performance of different strategies.
</p>

<p>
For the <code>inplace_stop_source</code> this only needs to be run once as it has the same data-structure regardless of what
the maximum number of children is.
</p>

<p>
For both <code>single_inplace_stop_source</code> and <code>finite_inplace_stop_source</code> we run the test in multiple configurations,
evaluating for situations where different numbers of stop-callbacks are supported.
</p>

<table border="2" cellspacing="0" cellpadding="6" rules="groups" frame="hsides">


<colgroup>
<col  class="org-left" />

<col  class="org-right" />
</colgroup>
<thead>
<tr>
<th scope="col" class="org-left">Data-structure</th>
<th scope="col" class="org-right">Elapsed Time (us)</th>
</tr>
</thead>
<tbody>
<tr>
<td class="org-left"><code>inplace_stop_source</code></td>
<td class="org-right">445</td>
</tr>

<tr>
<td class="org-left">1x <code>single_inplace_stop_source</code></td>
<td class="org-right">306</td>
</tr>

<tr>
<td class="org-left">2x <code>single_inplace_stop_source</code></td>
<td class="org-right">588</td>
</tr>

<tr>
<td class="org-left"><code>finite_inplace_stop_source&lt;2&gt;</code></td>
<td class="org-right">528</td>
</tr>

<tr>
<td class="org-left">3x <code>single_inplace_stop_source</code></td>
<td class="org-right">974</td>
</tr>

<tr>
<td class="org-left"><code>finite_inplace_stop_source&lt;3&gt;</code></td>
<td class="org-right">661</td>
</tr>

<tr>
<td class="org-left">10x <code>single_inplace_stop_source</code></td>
<td class="org-right">3228</td>
</tr>

<tr>
<td class="org-left"><code>finite_inplace_stop_source&lt;10&gt;</code></td>
<td class="org-right">2215</td>
</tr>
</tbody>
</table>

<p>
It is worth noting here that, for the case where there are no stop-callbacks registered, the <code>inplace_stop_source</code>
is actually more efficient for all but the case where there is at most a single stop-callback. In this case, the
<code>request_stop()</code> method only needs to acquire and release the lock once, so the overhead is fairly low.
</p>

<p>
In the case where we have a maximum callback count of N &gt; 1, we end up needing to perform N separate atomic
compare-exchange operations to check each potential stop-callback slot, and so the cost of this operation
rises linearly with the maximum callback count.
</p>

<p>
In all cases for N &gt; 1, the <code>finite_inplace_stop_source</code> has a slight performance advantage over multiple
<code>single_inplace_stop_source</code> objects, which I mainly attribute to only having to call <code>std::this_thread::get_id()</code>
once instead of N times and the fact it can use atomic-exchange operations for the second and subsequent slots
instead of compare-exchange.
</p>
</div>
</div>

<div id="outline-container-orgb384ecf" class="outline-3">
<h3 id="orgb384ecf"><span class="section-number-3">8.3.</span> Call <code>request_stop()</code> with x/y callbacks</h3>
<div class="outline-text-3" id="text-8-3">
<p>
With the following benchmarks, we have a stop-source configuration that has a particular maximum number
of callbacks that can be registered, and then some number of stop-callbacks registered when a call
to <code>request_stop()</code> is made.
</p>

<p>
Here we measurethe performance of registering the stop-callbacks, calling <code>request_stop()</code> and then
deregistering the stop-callbacks 100k times, all from a single thread.
</p>

<p>
The first group looks at the case where there is only a single stop-callback registered.
</p>

<table border="2" cellspacing="0" cellpadding="6" rules="groups" frame="hsides">


<colgroup>
<col  class="org-left" />

<col  class="org-left" />

<col  class="org-right" />
</colgroup>
<thead>
<tr>
<th scope="col" class="org-left">Data-structure</th>
<th scope="col" class="org-left"># / max</th>
<th scope="col" class="org-right">Elapsed Time (us)</th>
</tr>
</thead>
<tbody>
<tr>
<td class="org-left"><code>inplace_stop_source</code></td>
<td class="org-left">1 / *</td>
<td class="org-right">1353</td>
</tr>

<tr>
<td class="org-left"><code>single_inplace_stop_source</code></td>
<td class="org-left">1 / 1</td>
<td class="org-right">939</td>
</tr>

<tr>
<td class="org-left">2x <code>single_inplace_stop_source</code></td>
<td class="org-left">1 / 2</td>
<td class="org-right">1211</td>
</tr>

<tr>
<td class="org-left"><code>finite_inplace_stop_source&lt;2&gt;</code></td>
<td class="org-left">1 / 2</td>
<td class="org-right">1120</td>
</tr>

<tr>
<td class="org-left">3x <code>single_inplace_stop_source</code></td>
<td class="org-left">1 / 3</td>
<td class="org-right">1402</td>
</tr>

<tr>
<td class="org-left"><code>finite_inplace_stop_source&lt;3&gt;</code></td>
<td class="org-left">1 / 3</td>
<td class="org-right">1325</td>
</tr>
</tbody>
</table>

<p>
In the case where there are potentially multiple children but only a single stop-callback
has been registered, the 
</p>

<p>
The next group of results looks at the case where we register more than one stop-callback.
In this case we are registering the same lambda multiple times.
</p>


<table border="2" cellspacing="0" cellpadding="6" rules="groups" frame="hsides">


<colgroup>
<col  class="org-left" />

<col  class="org-left" />

<col  class="org-right" />
</colgroup>
<thead>
<tr>
<th scope="col" class="org-left">Data-structure</th>
<th scope="col" class="org-left"># / max</th>
<th scope="col" class="org-right">Elapsed Time (us)</th>
</tr>
</thead>
<tbody>
<tr>
<td class="org-left"><code>inplace_stop_source</code></td>
<td class="org-left">2 / *</td>
<td class="org-right">2765</td>
</tr>

<tr>
<td class="org-left">2x <code>single_inplace_stop_source</code></td>
<td class="org-left">2 / 2</td>
<td class="org-right">1845</td>
</tr>

<tr>
<td class="org-left"><code>finite_inplace_stop_source&lt;2&gt;</code></td>
<td class="org-left">2 / 2</td>
<td class="org-right">1780</td>
</tr>
</tbody>
<tbody>
<tr>
<td class="org-left"><code>inplace_stop_source</code></td>
<td class="org-left">3 / *</td>
<td class="org-right">4044</td>
</tr>

<tr>
<td class="org-left">3x <code>single_inplace_stop_source</code></td>
<td class="org-left">3 / 3</td>
<td class="org-right">2697</td>
</tr>

<tr>
<td class="org-left"><code>finite_inplace_stop_source&lt;3&gt;</code></td>
<td class="org-left">3 / 3</td>
<td class="org-right">2642</td>
</tr>
</tbody>
<tbody>
<tr>
<td class="org-left"><code>inplace_stop_source</code></td>
<td class="org-left">10 / *</td>
<td class="org-right">12929</td>
</tr>

<tr>
<td class="org-left">10x <code>single_inplace_stop_source</code></td>
<td class="org-left">10 / 10</td>
<td class="org-right">9893</td>
</tr>

<tr>
<td class="org-left"><code>finite_inplace_stop_source&lt;10&gt;</code></td>
<td class="org-left">10 / 10</td>
<td class="org-right">8825</td>
</tr>
</tbody>
</table>

<p>
Here we can see that as the number of registered stop-callbacks goes up,
the benefit of the <code>single_</code> and <code>finite_</code> stop-source data-structures
widens.
</p>

<p>
This is largely attributed to the relatively high cost of synchronization needed
for each stop-callback with the <code>inplace_stop_source</code> data-structure - requiring
to lock/unlock the structure and maintain the next/prev pointers of the doubly-linked list.
</p>

<p>
It is worth noting that the CPU branch predictor can have a large impact on
the performance of these benchmarks. For example, if, instead of registering
the same lambda 10x in the last test, we instead register different lambdas
such that when invoking each of the registered stop-callbacks in turn it
dispatches to a different function-pointer, the performance can be up to
3x slower (e.g. \~24000us for the <code>finite_inplace_stop_source&lt;10&gt;</code> benchmark).
</p>
</div>
</div>

<div id="outline-container-org0dfd35a" class="outline-3">
<h3 id="org0dfd35a"><span class="section-number-3">8.4.</span> Register/unregister callbacks from two threads concurrently</h3>
<div class="outline-text-3" id="text-8-4">
<p>
In this test, we spin up two threads and have each thread simultaneously try to
register and unregister a single stop-callback to a stop-source 100k times.
</p>

<p>
Each run is synchronized by a spin-barrier that tries to have each thread
actively running rather than blocked in an OS synchronization call so that
we can better evaluate the effect of contention on the stop-source data-structure
from concurrent threads. In this sense, this is trying to evaluate the worst-case
scenario of multiple threads continually registering/deregistering callbacks and
conflicting with each other.
</p>

<p>
The times from both threads are added to the set of run-times and then
the overall results are compared. i.e. it generates two time samples for
each run.
</p>

<p>
In the case of <code>inplace_stop_source</code>, both threads try to register stop-callbacks
to the same stop-source object.
</p>

<p>
In the case of <code>single_inplace_stop_source</code>, each thread tries to register its
callback to a separate stop-source object. I've split this out into two variants
to try to highlight the impact of false-sharing in this scenario. The first result
in the table shows the performance if both <code>single_inplace_stop_source</code> objects
live in the same cache-line. The only difference in the code between the second
and third rows of the table is that the third row has aligned the stop-source
objects sufficiently to ensure that they are placed in different cache-lines.
</p>

<p>
In the case of <code>finite_inplace_stop_source&lt;2&gt;</code>, both threads are given a reference
to the same stop-source object, but each thread registers its stop-callback using
a different stop-token. <code>get_token&lt;0&gt;()</code> for the first thread, and <code>get_token&lt;1&gt;()</code>
for the second thread.
</p>

<table border="2" cellspacing="0" cellpadding="6" rules="groups" frame="hsides">


<colgroup>
<col  class="org-left" />

<col  class="org-right" />

<col  class="org-right" />

<col  class="org-right" />

<col  class="org-right" />
</colgroup>
<thead>
<tr>
<th scope="col" class="org-left">Data-structure</th>
<th scope="col" class="org-right">Min Time (us)</th>
<th scope="col" class="org-right">P50 Time</th>
<th scope="col" class="org-right">Avg Time</th>
<th scope="col" class="org-right">Max Time</th>
</tr>
</thead>
<tbody>
<tr>
<td class="org-left"><code>inplace_stop_source</code></td>
<td class="org-right">2820</td>
<td class="org-right">7430</td>
<td class="org-right">6633</td>
<td class="org-right">7871</td>
</tr>

<tr>
<td class="org-left">2x <code>single_inplace_stop_source</code></td>
<td class="org-right">984</td>
<td class="org-right">5705</td>
<td class="org-right">4796</td>
<td class="org-right">6264</td>
</tr>

<tr>
<td class="org-left">2x <code>single_inplace_stop_source</code> (no false sharing)</td>
<td class="org-right">533</td>
<td class="org-right">556</td>
<td class="org-right">569</td>
<td class="org-right">956</td>
</tr>

<tr>
<td class="org-left"><code>finite_inplace_stop_source&lt;2&gt;</code></td>
<td class="org-right">1000</td>
<td class="org-right">5257</td>
<td class="org-right">4778</td>
<td class="org-right">6163</td>
</tr>
</tbody>
</table>

<p>
The results here are a lot more variable than the single-threaded benchmarks and so
I have included the minimum, maximum, 50% percentile (median) and average measurements
to get a better idea of the overall distribution of times.
</p>

<p>
In all of the results except the "no false sharing" result, the mean is skewed lower
than the p50 results by some outliers which happened to get lucky and run fast because
the threads happened to be scheduled in such a way to contend far less. So the minimum
run-times are perhaps less useful to look at.
</p>

<p>
If we look, instead, at the p50 and average times then we see that all of the runs
except "no false sharing" are typically running much slower than we'd expect from
the single-threaded runs.
</p>

<p>
Much of this slow-down can be attributed to the impact of multiple threads conflicting
with each other, trying to atomically modify the same cache line. In the case of
<code>inplace_stop_source</code> this is a true conflicton the shared state, and in the case of
the other data-structures, the conflicts are the result of false-sharing (accessing
different atomic objects that live in the same cache-line).
</p>

<p>
This is evidenced by the fact that the "no false sharing" benchmark exhibiting much
less variability and times that are much closer to that of the single-threaded
performance.
</p>
</div>
</div>
</div>

<div id="outline-container-org381ebcc" class="outline-2">
<h2 id="org381ebcc"><span class="section-number-2">9.</span> Appendix B - Implementation of <code>single_inplace_stop_source</code></h2>
<div class="outline-text-2" id="text-9">
<p>
The following code shows a reference-implementation of the classes
<code>single_inplace_stop_source</code>, <code>single_inplace_stop_token</code> and the class template
<code>single_inplace_stop_callback</code>.
</p>

<p>
For a full implementation of <code>finite_inplace_stop_source</code>, see the source code for the above benchmark.
</p>

<div class="org-src-container">
<pre class="src src-c++"><span class="org-preprocessor">#include</span> <span class="org-string">&lt;atomic&gt;</span>
<span class="org-preprocessor">#include</span> <span class="org-string">&lt;cassert&gt;</span>
<span class="org-preprocessor">#include</span> <span class="org-string">&lt;concepts&gt;</span>
<span class="org-preprocessor">#include</span> <span class="org-string">&lt;thread&gt;</span>
<span class="org-preprocessor">#include</span> <span class="org-string">&lt;utility&gt;</span>

<span class="org-keyword">namespace</span> <span class="org-constant">std</span>
{
  <span class="org-keyword">class</span> <span class="org-type">single_inplace_stop_token</span>;
  <span class="org-keyword">template</span> &lt;<span class="org-keyword">typename</span> <span class="org-type">CB</span>&gt;
  <span class="org-keyword">class</span> <span class="org-type">single_inplace_stop_callback</span>;

  <span class="org-comment-delimiter">//////////////////////////////////////////////////////////////</span>
  <span class="org-comment-delimiter">// </span><span class="org-comment">single_inplace_stop_source</span>
  <span class="org-comment-delimiter">//</span>

  <span class="org-keyword">class</span> <span class="org-type">single_inplace_stop_source</span> {
   <span class="org-keyword">public</span>:
    <span class="org-function-name">single_inplace_stop_source</span>() <span class="org-keyword">noexcept</span> : state_(no_callback_state()) {}

    <span class="org-type">bool</span> <span class="org-function-name">request_stop</span>() <span class="org-keyword">noexcept</span>;
    <span class="org-type">bool</span> <span class="org-function-name">stop_requested</span>() <span class="org-keyword">const</span> <span class="org-keyword">noexcept</span>;

    <span class="org-type">single_inplace_stop_token</span> <span class="org-function-name">get_token</span>() <span class="org-keyword">const</span> <span class="org-keyword">noexcept</span>;

   <span class="org-keyword">private</span>:
    <span class="org-keyword">template</span> &lt;<span class="org-keyword">typename</span> <span class="org-type">CB</span>&gt;
    <span class="org-keyword">friend</span> <span class="org-keyword">class</span> <span class="org-type">single_inplace_stop_callback</span>;

    <span class="org-keyword">struct</span> <span class="org-type">callback_base</span> {
      <span class="org-type">void</span> (*<span class="org-function-name">execute</span>)(<span class="org-type">callback_base</span>* <span class="org-variable-name">self</span>) <span class="org-keyword">noexcept</span>;
    };

    <span class="org-type">bool</span> <span class="org-function-name">try_register_callback</span>(<span class="org-type">callback_base</span>* <span class="org-variable-name">cb</span>) <span class="org-keyword">const</span> <span class="org-keyword">noexcept</span>;
    <span class="org-type">void</span> <span class="org-function-name">deregister_callback</span>(<span class="org-type">callback_base</span>* <span class="org-variable-name">cb</span>) <span class="org-keyword">const</span> <span class="org-keyword">noexcept</span>;

    <span class="org-type">void</span>* <span class="org-function-name">stop_requested_state</span>() <span class="org-keyword">const</span> <span class="org-keyword">noexcept</span> { <span class="org-keyword">return</span> &amp;state_; }
    <span class="org-type">void</span>* <span class="org-function-name">stop_requested_callback_done_state</span>() <span class="org-keyword">const</span> <span class="org-keyword">noexcept</span> {
      <span class="org-keyword">return</span> &amp;thread_requesting_stop_;
    }
    <span class="org-keyword">static</span> <span class="org-type">void</span>* <span class="org-function-name">no_callback_state</span>() <span class="org-keyword">noexcept</span> { <span class="org-keyword">return</span> <span class="org-constant">nullptr</span>; }
    <span class="org-type">bool</span> <span class="org-function-name">is_stop_requested_state</span>(<span class="org-type">void</span>* <span class="org-variable-name">state</span>) <span class="org-keyword">const</span> <span class="org-keyword">noexcept</span> {
      <span class="org-keyword">return</span> (state == stop_requested_state()) ||
             (state == stop_requested_callback_done_state());
    }

    <span class="org-comment-delimiter">// </span><span class="org-comment">nullptr                  - no stop-request or stop-callback</span>
    <span class="org-comment-delimiter">// </span><span class="org-comment">&amp;state_                  - stop-requested</span>
    <span class="org-comment-delimiter">// </span><span class="org-comment">&amp;thread_requesting_stop_ - stop-requested, callback-done</span>
    <span class="org-comment-delimiter">// </span><span class="org-comment">other                    - pointer to callback_base</span>
    <span class="org-keyword">mutable</span> <span class="org-type">atomic</span>&lt;<span class="org-type">void</span>*&gt; <span class="org-variable-name">state_</span>;
    <span class="org-keyword">mutable</span> <span class="org-type">atomic</span>&lt;<span class="org-constant">thread</span>::id&gt; <span class="org-variable-name">thread_requesting_stop_</span>;
  };

  <span class="org-keyword">inline</span> <span class="org-type">bool</span> <span class="org-constant">single_inplace_stop_source</span>::<span class="org-function-name">stop_requested</span>() <span class="org-keyword">const</span> <span class="org-keyword">noexcept</span> {
    <span class="org-type">void</span>* <span class="org-variable-name">state</span> = state_.load(<span class="org-constant">std</span>::memory_order_acquire);
    <span class="org-keyword">return</span> is_stop_requested_state(state);
  }

  <span class="org-keyword">inline</span> <span class="org-type">bool</span> <span class="org-constant">single_inplace_stop_source</span>::<span class="org-function-name">request_stop</span>() <span class="org-keyword">noexcept</span> {
    <span class="org-type">void</span>* <span class="org-variable-name">old_state</span> = state_.load(<span class="org-constant">std</span>::memory_order_relaxed);
    <span class="org-keyword">do</span> {
      <span class="org-keyword">if</span> (is_stop_requested_state(old_state)) {
        <span class="org-keyword">return</span> <span class="org-constant">false</span>;
      }
    } <span class="org-keyword">while</span> (<span class="org-negation-char">!</span>state_.compare_exchange_weak(old_state, stop_requested_state(),
                                           memory_order_acq_rel,
                                           memory_order_relaxed));

    <span class="org-keyword">if</span> (old_state != no_callback_state()) {
      <span class="org-keyword">auto</span>* <span class="org-variable-name">callback</span> = <span class="org-keyword">static_cast</span>&lt;<span class="org-type">callback_base</span>*&gt;(old_state);
      thread_requesting_stop_.store(<span class="org-constant">this_thread</span>::get_id(),
                                    memory_order_relaxed);

      callback-&gt;execute(callback);

      state_.store(stop_requested_callback_done_state(), memory_order_release);
      state_.notify_one();
    }

    <span class="org-keyword">return</span> <span class="org-constant">true</span>;
  }

  <span class="org-keyword">inline</span> <span class="org-type">bool</span> <span class="org-constant">single_inplace_stop_source</span>::<span class="org-function-name">try_register_callback</span>(
      <span class="org-type">callback_base</span> * <span class="org-variable-name">base</span>) <span class="org-keyword">const</span> <span class="org-keyword">noexcept</span> {
    <span class="org-type">void</span>* <span class="org-variable-name">old_state</span> = state_.load(memory_order_acquire);
    <span class="org-keyword">if</span> (is_stop_requested_state(old_state)) {
      <span class="org-keyword">return</span> <span class="org-constant">false</span>;
    }

    assert(old_state == no_callback_state());

    <span class="org-keyword">if</span> (state_.compare_exchange_strong(old_state, <span class="org-keyword">static_cast</span>&lt;<span class="org-type">void</span>*&gt;(base),
                                       memory_order_release,
                                       memory_order_acquire)) {
      <span class="org-comment-delimiter">// </span><span class="org-comment">Successfully registered callback.</span>
      <span class="org-keyword">return</span> <span class="org-constant">true</span>;
    }

    <span class="org-comment-delimiter">// </span><span class="org-comment">Stop request arrived while we were trying to register</span>
    assert(old_state == stop_requested_state());

    <span class="org-keyword">return</span> <span class="org-constant">false</span>;
  }

  <span class="org-keyword">inline</span> <span class="org-type">void</span> <span class="org-constant">single_inplace_stop_source</span>::<span class="org-function-name">deregister_callback</span>(
      <span class="org-type">callback_base</span> * <span class="org-variable-name">base</span>) <span class="org-keyword">const</span> <span class="org-keyword">noexcept</span> {
    <span class="org-comment-delimiter">// </span><span class="org-comment">Initially assume that the callback has not been invoked and that the</span>
    <span class="org-comment-delimiter">// </span><span class="org-comment">state still points to the registered callback_base structure.</span>
    <span class="org-type">void</span>* <span class="org-variable-name">old_state</span> = <span class="org-keyword">static_cast</span>&lt;<span class="org-type">void</span>*&gt;(base);
    <span class="org-keyword">if</span> (state_.compare_exchange_strong(old_state, no_callback_state(),
                                       memory_order_relaxed,
                                       memory_order_acquire)) {
      <span class="org-comment-delimiter">// </span><span class="org-comment">Successfully deregistered the callback before it could be invoked.</span>
      <span class="org-keyword">return</span>;
    }

    <span class="org-comment-delimiter">// </span><span class="org-comment">Otherwise, a call to request_stop() is invoking the callback.</span>
    <span class="org-keyword">if</span> (old_state == stop_requested_state()) {
      <span class="org-comment-delimiter">// </span><span class="org-comment">Callback not finished executing yet.</span>
      <span class="org-keyword">if</span> (thread_requesting_stop_.load(<span class="org-constant">std</span>::memory_order_relaxed) ==
          <span class="org-constant">std</span>::<span class="org-constant">this_thread</span>::get_id()) {
        <span class="org-comment-delimiter">// </span><span class="org-comment">Deregistering from the same thread that is invoking the callback.</span>
        <span class="org-comment-delimiter">// </span><span class="org-comment">Either the invocation of the callback has completed and the thread</span>
        <span class="org-comment-delimiter">// </span><span class="org-comment">has gone on to do other things (in which case it's safe to destroy)</span>
        <span class="org-comment-delimiter">// </span><span class="org-comment">or we are still in the middle of executing the callback (in which</span>
        <span class="org-comment-delimiter">// </span><span class="org-comment">case we can't block as it would cause a deadlock).</span>
        <span class="org-keyword">return</span>;
      }

      <span class="org-comment-delimiter">// </span><span class="org-comment">Otherwise, callback is being called from another thread.</span>
      <span class="org-comment-delimiter">// </span><span class="org-comment">Wait for callback to finish (state changes from stop_requested_state()</span>
      <span class="org-comment-delimiter">// </span><span class="org-comment">to stop_requested_callback_done_state()).</span>
      state_.wait(old_state, memory_order_acquire);
    }
  }

  <span class="org-comment-delimiter">//////////////////////////////////////////////////////////////</span>
  <span class="org-comment-delimiter">// </span><span class="org-comment">single_inplace_stop_token</span>
  <span class="org-comment-delimiter">//</span>

  <span class="org-keyword">class</span> <span class="org-type">single_inplace_stop_token</span> {
   <span class="org-keyword">public</span>:
    <span class="org-keyword">template</span> &lt;<span class="org-keyword">typename</span> <span class="org-type">CB</span>&gt;
    <span class="org-keyword">using</span> <span class="org-type">callback_type</span> = <span class="org-type">single_inplace_stop_callback</span>&lt;<span class="org-type">CB</span>&gt;;

    <span class="org-function-name">single_inplace_stop_token</span>() <span class="org-keyword">noexcept</span> : source_(<span class="org-constant">nullptr</span>) {}

    <span class="org-type">bool</span> <span class="org-function-name">stop_possible</span>() <span class="org-keyword">noexcept</span> { <span class="org-keyword">return</span> source_ != <span class="org-constant">nullptr</span>; }

    <span class="org-type">bool</span> <span class="org-function-name">stop_requested</span>() <span class="org-keyword">noexcept</span> {
      <span class="org-keyword">return</span> source_ != <span class="org-constant">nullptr</span> &amp;&amp; source_-&gt;stop_requested();
    }

    <span class="org-type">bool</span> <span class="org-keyword">operator</span><span class="org-function-name">==</span>(<span class="org-keyword">const</span> <span class="org-type">single_inplace_stop_token</span>&amp; <span class="org-variable-name">a</span>) <span class="org-keyword">const</span> <span class="org-keyword">noexcept</span> = <span class="org-keyword">default</span>;

   <span class="org-keyword">private</span>:
    <span class="org-keyword">friend</span> <span class="org-type">single_inplace_stop_source</span>;
    <span class="org-keyword">template</span> &lt;<span class="org-keyword">typename</span> <span class="org-type">CB</span>&gt;
    <span class="org-keyword">friend</span> <span class="org-keyword">class</span> <span class="org-type">single_inplace_stop_callback</span>;

    <span class="org-keyword">explicit</span> <span class="org-function-name">single_inplace_stop_token</span>(
        <span class="org-keyword">const</span> <span class="org-type">single_inplace_stop_source</span>* <span class="org-variable-name">source</span>) <span class="org-keyword">noexcept</span>
        : source_(source) {}

    <span class="org-keyword">const</span> <span class="org-type">single_inplace_stop_source</span>* <span class="org-variable-name">source_</span>;
  };

  <span class="org-keyword">inline</span> <span class="org-type">single_inplace_stop_token</span> <span class="org-constant">single_inplace_stop_source</span>::<span class="org-function-name">get_token</span>()
      <span class="org-keyword">const</span> <span class="org-keyword">noexcept</span> {
    <span class="org-keyword">return</span> single_inplace_stop_token{<span class="org-keyword">this</span>};
  }

  <span class="org-comment-delimiter">//////////////////////////////////////////////////////////////</span>
  <span class="org-comment-delimiter">// </span><span class="org-comment">single_inplace_stop_callback</span>
  <span class="org-comment-delimiter">//</span>

  <span class="org-keyword">template</span> &lt;<span class="org-keyword">typename</span> <span class="org-type">CB</span>&gt;
  <span class="org-keyword">struct</span> <span class="org-type">single_inplace_stop_callback</span>
      : <span class="org-keyword">private</span> <span class="org-constant">single_inplace_stop_source</span>::<span class="org-type">callback_base</span> {
   <span class="org-keyword">public</span>:
    <span class="org-keyword">template</span> &lt;<span class="org-keyword">typename</span> <span class="org-type">Init</span>&gt;
      <span class="org-keyword">requires</span> <span class="org-constant">std</span>::<span class="org-type">constructible_from</span>&lt;<span class="org-type">CB</span>, <span class="org-type">Init</span>&gt;
    <span class="org-function-name">single_inplace_stop_callback</span>(
        <span class="org-type">single_inplace_stop_token</span> <span class="org-variable-name">st</span>,
        <span class="org-type">Init</span>&amp;&amp; <span class="org-variable-name">init</span>) <span class="org-keyword">noexcept</span>(<span class="org-type">is_nothrow_constructible_v</span>&lt;<span class="org-type">CB</span>, <span class="org-type">Init</span>&gt;)
        : source_(st.source_), callback_(<span class="org-constant">std</span>::forward&lt;<span class="org-type">Init</span>&gt;(init)) {
      <span class="org-keyword">this</span>-&gt;execute = &amp;execute_impl;
      <span class="org-keyword">if</span> (source_ != <span class="org-constant">nullptr</span>) {
        <span class="org-keyword">if</span> (<span class="org-negation-char">!</span>source_-&gt;try_register_callback(<span class="org-keyword">this</span>)) {
          source_ = <span class="org-constant">nullptr</span>;
          execute_impl(<span class="org-keyword">this</span>);
        }
      }
    }

    ~<span class="org-function-name">single_inplace_stop_callback</span>() {
      <span class="org-keyword">if</span> (source_ != <span class="org-constant">nullptr</span>) {
        source_-&gt;deregister_callback(<span class="org-keyword">this</span>);
      }
    }

    <span class="org-function-name">single_inplace_stop_callback</span>(<span class="org-type">single_inplace_stop_callback</span>&amp;&amp;) = <span class="org-keyword">delete</span>;
    <span class="org-function-name">single_inplace_stop_callback</span>(<span class="org-keyword">const</span> <span class="org-type">single_inplace_stop_callback</span>&amp;) = <span class="org-keyword">delete</span>;
    <span class="org-type">single_inplace_stop_callback</span>&amp; <span class="org-keyword">operator</span><span class="org-function-name">=</span>(<span class="org-type">single_inplace_stop_callback</span>&amp;&amp;) =
        <span class="org-keyword">delete</span>;
    <span class="org-type">single_inplace_stop_callback</span>&amp; <span class="org-keyword">operator</span><span class="org-function-name">=</span>(
        <span class="org-keyword">const</span> <span class="org-type">single_inplace_stop_callback</span>&amp;) = <span class="org-keyword">delete</span>;

   <span class="org-keyword">private</span>:
    <span class="org-keyword">static</span> <span class="org-type">void</span> <span class="org-function-name">execute_impl</span>(
        <span class="org-constant">single_inplace_stop_source</span>::<span class="org-type">callback_base</span>* <span class="org-variable-name">base</span>) <span class="org-keyword">noexcept</span> {
      <span class="org-keyword">auto</span>&amp; <span class="org-variable-name">self</span> = *<span class="org-keyword">static_cast</span>&lt;<span class="org-type">single_inplace_stop_callback</span>*&gt;(base);
      <span class="org-constant">std</span>::forward&lt;<span class="org-type">CB</span>&gt;(self.callback_)();
    }

    <span class="org-keyword">const</span> <span class="org-type">single_inplace_stop_source</span>* <span class="org-variable-name">source_</span>;
    [[no_unique_address]] <span class="org-type">CB</span> <span class="org-variable-name">callback_</span>;
  };

  <span class="org-keyword">template</span> &lt;<span class="org-keyword">typename</span> <span class="org-type">CB</span>&gt;
  <span class="org-function-name">single_inplace_stop_callback</span>(<span class="org-type">single_inplace_stop_token</span>, <span class="org-type">CB</span>)
    -&gt; <span class="org-type">single_inplace_stop_callback</span>&lt;<span class="org-type">CB</span>&gt;;
}
</pre>
</div>
</div>
</div>
</div>
</body>
</html>