来学习一下.NET Framework的源代码（2）——线程安全的循环队列

发表于2013年3月29日2020年7月7日作者 daozhihun

　　M$源代码学习计划歇菜了一段时间，那么现在继续。

　　M$在System.Collections命名空间提供了一个名为Queue的类，顾名思义是实现队列的数据结构，让我们来看看它长神马样子。

一、System.Collections.Queue

　　（这个类不是泛型类，但代码应该和泛型类类似。）

　　这个类实现了ICollection和IClonable接口，这很正常，直接忽略。

　　再看它定义的私有成员：

private Object[] _array;
private int _head;       // First valid element in the queue
private int _tail;       // Last valid element in the queue 
private int _size;       // Number of elements.
private int _growFactor; // 100 == 1.0, 130 == 1.3, 200 == 2.0 
private int _version; 
[NonSerialized]
private Object _syncRoot; 
                                               
private const int _MinimumGrow = 4;
private const int _ShrinkThreshold = 32;

　　理所当然，队列是顺序存储比较合适，所以直接定义成一个数组。设置_growFactor是M$的一贯作风，个人认为每次容量不足翻倍比较合适，M$也这么认为，所以默认的构造方法就是将这个其设置为200。

　　可以目测是一个循环队列，看看入队的方法就知道了：

// Adds obj to the tail of the queue.
// 
public virtual void Enqueue(Object obj) { 
    if (_size == _array.Length) {
        int newcapacity = (int)((long)_array.Length * (long)_growFactor / 100); 
        if (newcapacity < _array.Length + _MinimumGrow) {
            newcapacity = _array.Length + _MinimumGrow;
        }
        SetCapacity(newcapacity); 
    }
                                            
    _array[_tail] = obj; 
    _tail = (_tail + 1) % _array.Length;
    _size++; 
    _version++;
}

果然是这样。那么是如何实现扩容的呢？M$是不是用了什么比较好的方法呢？我们看看SetCapacity方法：

// PRIVATE Grows or shrinks the buffer to hold capacity objects. Capacity
// must be >= _size. 
private void SetCapacity(int capacity) {
    Object[] newarray = new Object[capacity];
    if (_size > 0) {
        if (_head < _tail) { 
            Array.Copy(_array, _head, newarray, 0, _size);
        } else { 
            Array.Copy(_array, _head, newarray, 0, _array.Length - _head); 
            Array.Copy(_array, 0, newarray, _array.Length - _head, _tail);
        } 
    }
                                            
    _array = newarray;
    _head = 0; 
    _tail = (_size == capacity) ? 0 : _size;
    _version++; 
}

　　所以M$也很偷懒，直接生成一个新的数组，然后把原来的元素复制过去就完事了。

　　貌似没有什么有价值的代码。所以这个类就学习完了(^_^)。

二、System.Collections.Concurrent.ConcurrentQueue类

　　众所周知，上面那个类没有实现任何多线程的保护，所以如果在多线程中要使用队列显然不能用Queue类。我们来看看这个线程安全的队列是如何实现的。首先我们要知道这个类的存储方式和Queue类不同，它是使用链式存储：

　　链表中的元素称为Segment（段），它的私有成员定义如下：

internal volatile T[] m_array;
            
internal volatile VolatileBool[] m_state;
            
//pointer to the next segment. null if the current segment is the last segment 
private volatile Segment m_next;
internal readonly long m_index; 
private volatile int m_low; 
private volatile int m_high;
            
private volatile ConcurrentQueue<T> m_source;

　　它包含了如下内容：段内数据T[]（M$目前设定每段最多存32个元素）、m_state数组记录段内每个位置是否含有有效元素（以后会说明）、队首和队尾指针、当前是第几段（从0开始编号）。

　　那么，ConcurrentQueue的私有成员就好理解了：

//fields of ConcurrentQueue
[NonSerialized]
private volatile Segment m_head; 
      
[NonSerialized] 
private volatile Segment m_tail; 
      
private T[] m_serializationArray; // Used for custom serialization. 
      
private const int SEGMENT_SIZE = 32;
      
//number of snapshot takers, GetEnumerator(), ToList() and ToArray() operations take snapshot. 
[NonSerialized]
internal volatile int m_numSnapshotTakers = 0;

　　首先我们来看看入队的方法：

public void Enqueue(T item) 
{
    SpinWait spin = new SpinWait();
    while (true)
    { 
        Segment tail = m_tail;
        if (tail.TryAppend(item)) 
            return; 
        spin.SpinOnce();
    } 
}

　　貌似还看不出什么，继续看Segment的TryAppend方法：

internal bool TryAppend(T value) 
{
    //quickly check if m_high is already over the boundary, if so, bail out
    if (m_high >= SEGMENT_SIZE - 1)
    { 
        return false;
    } 
  
    //Now we will use a CAS to increment m_high, and store the result in newhigh.
    //Depending on how many free spots left in this segment and how many threads are doing this Increment 
    //at this time, the returning "newhigh" can be
    // 1) < SEGMENT_SIZE - 1 : we took a spot in this segment, and not the last one, just insert the value
    // 2) == SEGMENT_SIZE - 1 : we took the last spot, insert the value AND grow the segment
    // 3) > SEGMENT_SIZE - 1 : we failed to reserve a spot in this segment, we return false to 
    //    Queue.Enqueue method, telling it to try again in the next segment.
  
    int newhigh = SEGMENT_SIZE; //initial value set to be over the boundary 
  
    //We need do Interlocked.Increment and value/state update in a finally block to ensure that they run 
    //without interuption. This is to prevent anything from happening between them, and another dequeue
    //thread maybe spinning forever to wait for m_state[] to be true;
    try
    { } 
    finally
    { 
        newhigh = Interlocked.Increment(ref m_high); 
        if (newhigh <= SEGMENT_SIZE - 1)
        { 
            m_array[newhigh] = value;
            m_state[newhigh].m_value = true;
        }
  
        //if this thread takes up the last slot in the segment, then this thread is responsible
        //to grow a new segment. Calling Grow must be in the finally block too for reliability reason: 
        //if thread abort during Grow, other threads will be left busy spinning forever. 
        if (newhigh == SEGMENT_SIZE - 1)
        { 
            Grow();
        }
    }
  
    //if newhigh <= SEGMENT_SIZE-1, it means the current thread successfully takes up a spot
    return newhigh <= SEGMENT_SIZE - 1; 
}

　　第一个if判断当前Segment是不是满了。为什么要这样判断呢？假设现在队尾的Segment已经有31个元素了，而此时有两个线程同时访问Enqueue方法。那么可能发生如下情况：第一个线程插入了一个元素，这样当前Segment已经满了，进行扩容（参见try-finally块里的判断，至于为什么要写在finnally可以看注释，主要是保证这些操作一次性执行完，不会被打断或中止）；第二个线程可能得到的tail仍然是这个满的段。所以如果发生这种情况，return false，这样Enqueue方法就可以尝试获得新的队尾的段。

　　Grow方法直接生成一个新的段，并将队尾的m_next指针指向它，代码省略。

　　出队的时候也要访问多线程的情况：

public bool TryDequeue(out T result) 
{ 
    while (!IsEmpty)
    { 
        Segment head = m_head;
        if (head.TryRemove(out result))
            return true;
        //since method IsEmpty spins, we don't need to spin in the while loop 
    }
    result = default(T); 
    return false; 
}

internal bool TryRemove(out T result) 
{
    SpinWait spin = new SpinWait(); 
    int lowLocal = Low, highLocal = High;
    while (lowLocal <= highLocal)
    {
        //try to update m_low 
        if (Interlocked.CompareExchange(ref m_low, lowLocal + 1, lowLocal) == lowLocal)
        { 
            //if the specified value is not available (this spot is taken by a push operation, 
            // but the value is not written into yet), then spin
            SpinWait spinLocal = new SpinWait(); 
            while (!m_state[lowLocal].m_value)
            {
                spinLocal.SpinOnce();
            } 
            result = m_array[lowLocal];
  
            // If there is no other thread taking snapshot (GetEnumerator(), ToList(), etc), reset the deleted entry to null. 
            // It is ok if after this conditional check m_numSnapshotTakers becomes > 0, because new snapshots won't include
            // the deleted entry at m_array[lowLocal]. 
            if (m_source.m_numSnapshotTakers <= 0)
            {
                m_array[lowLocal] = default(T); //release the reference to the object.
            } 
  
            //if the current thread sets m_low to SEGMENT_SIZE, which means the current segment becomes 
            //disposable, then this thread is responsible to dispose this segment, and reset m_head 
            if (lowLocal + 1 >= SEGMENT_SIZE)
            { 
                //  Invariant: we only dispose the current m_head, not any other segment
                //  In usual situation, disposing a segment is simply seting m_head to m_head.m_next
                //  But there is one special case, where m_head and m_tail points to the same and ONLY
                //segment of the queue: Another thread A is doing Enqueue and finds that it needs to grow, 
                //while the *current* thread is doing *this* Dequeue operation, and finds that it needs to
                //dispose the current (and ONLY) segment. Then we need to wait till thread A finishes its 
                //Grow operation, this is the reason of having the following while loop 
                spinLocal = new SpinWait();
                while (m_next == null) 
                {
                    spinLocal.SpinOnce();
                }
                Contract.Assert(m_source.m_head == this); 
                m_source.m_head = m_next;
            } 
            return true; 
        }
        else
        {
            //CAS failed due to contention: spin briefly and retry
            spin.SpinOnce();
            lowLocal = Low; highLocal = High; 
        }
    }//end of while 
    result = default(T); 
    return false;
}

　　此时要考虑一种情况：当前位置被入队操作“加锁”（值还没有写入，不是真正的锁），那么要等入队操作执行完（极端情况是空队列刚入队一个元素，在未完成前调用了出队方法，当然多线程时可能会发生更加复杂的情况）。考虑与ToList等方法进行并发，在ToList等方法未执行完时先不立即释放队尾元素（这很自然，因为ToList先调用嘛，所以当然要包含这个等待出队的元素），在最后进行释放。

　　还有一点要说明的时，假设当前的段元素都出队了，那么就要将这个段释放，此时只要把m_next指向自己就行了，因为这样操作之后就没有其他对象引用这个段，这个段就可以被GC回收。

　　自然，即将出队的元素是head所指的段的第一个元素，我们来看看Segment的TryPeek方法（获取即将出队的元素）：

internal bool TryPeek(out T result)
{ 
    result = default(T);
    int lowLocal = Low;
    if (lowLocal > High)
        return false; 
    SpinWait spin = new SpinWait();
    while (!m_state[lowLocal].m_value) 
    { 
        spin.SpinOnce();
    } 
    result = m_array[lowLocal];
    return true;
}

　　lowLocal = Low语句将此时的队首指针记录（因为别的线程可能在进行出队操作，Low可能会发生改变）。

　　最后来看看怎样计算队列中元素的数量。我们看一下Count属性的代码：

public int Count 
{ 
    get
    { 
        //store head and tail positions in buffer,
        Segment head, tail;
        int headLow, tailHigh;
        GetHeadTailPositions(out head, out tail, out headLow, out tailHigh); 
  
        if (head == tail) 
        { 
            return tailHigh - headLow + 1;
        } 
  
        //head segment
        int count = SEGMENT_SIZE - headLow;
  
        //middle segment(s), if any, are full.
        //We don't deal with overflow to be consistent with the behavior of generic types in CLR. 
        count += SEGMENT_SIZE * ((int)(tail.m_index - head.m_index - 1)); 
  
        //tail segment 
        count += tailHigh + 1;
  
        return count;
    } 
}

　　我们先不管是怎么取得队首和队尾指针的，先看后面的代码。很好理解，如果只有一个段（首=尾），那么元素数量就是队首和队尾位置只差加一。如果有多个段，就是首段元素数量+尾段元素数量+段数*每个段的容量。我们看一下获取队首和队尾指针的代码：

private void GetHeadTailPositions(out Segment head, out Segment tail,
    out int headLow, out int tailHigh) 
{ 
    head = m_head;
    tail = m_tail; 
    headLow = head.Low;
    tailHigh = tail.High;
    SpinWait spin = new SpinWait();
  
    //we loop until the observed values are stable and sensible.
    //This ensures that any update order by other methods can be tolerated. 
    while ( 
        //if head and tail changed, retry
        head != m_head || tail != m_tail 
        //if low and high pointers, retry
        || headLow != head.Low || tailHigh != tail.High
        //if head jumps ahead of tail because of concurrent grow and dequeue, retry
        || head.m_index > tail.m_index) 
    {
        spin.SpinOnce(); 
        head = m_head; 
        tail = m_tail;
        headLow = head.Low; 
        tailHigh = tail.High;
    }
}

　　这个方法非常有意思，它是等到队列元素稳定后才进行确定（有点怀疑这个做法，毕竟SpinOnce的时间片是很少的）。

　　现在主要的代码已经看完了，对如何保证线程安全又多了一定的了解。最后有一个问题是为什么要进行分段（为什么不像链表那样每个元素都有m_next），这个很显然，除了节省存储空间外，还有提升程序的运行效率（包括减少GC的次数）。

　　如果你对SpinWait这个类不熟悉，请参考：http://msdn.microsoft.com/zh-cn/library/vstudio/system.threading.thread.spinwait.aspx，如果你对CompareExcange不熟悉，请参考：http://msdn.microsoft.com/zh-cn/library/801kt583(v=VS.80).aspx。

参考资料：http://www.codethinked.com/net-40-and-system_collections_concurrent_concurrentqueue