我有下面的课。
class Test{
public HashSet<string> Data = new HashSet<string>();
}
我需要从不同的线程改变字段“数据”,所以我想对我目前的线程安全实现的一些意见。
class Test{
public HashSet<string> Data = new HashSet<string>();
public void Add(string Val){
lock(Data) Data.Add(Val);
}
public void Remove(string Val){
lock(Data) Data.Remove(Val);
}
}
是否有更好的解决方案,直接到字段并保护它免受多线程的并发访问?
我发现既不能简单地锁定System.Collections.Generic的添加和删除方法。HashSet,或者包装框架的ConcurrentDictionary在需要良好性能的“高吞吐量”场景中是足够的。
通过使用这个简单的想法就可以获得相当好的性能:
public class ExampleHashSet<T>
{
const int ConcurrencyLevel = 124;
const int Lower31BitMask = 0x7FFFFFFF;
HashSet<T>[] sets = new HashSet<T>[ConcurrencyLevel];
IEqualityComparer<T> comparer;
public ExampleHashSet()
{
comparer = EqualityComparer<T>.Default;
for(int i = 0; i < ConcurrencyLevel; i++)
sets[i] = new HashSet<T>();
}
public bool Add(T item)
{
int hash = (comparer.GetHashCode(item) & Lower31BitMask) % ConcurrencyLevel;
lock(sets[hash])
{
return sets[hash].Add(item);
}
}
public bool Remove(T item)
{
int hash = (comparer.GetHashCode(item) & Lower31BitMask) % ConcurrencyLevel;
lock(sets[hash])
{
return sets[hash].Remove(item);
}
}
// further methods ...
}
系统的HashSet是包装的,但与其他答案不同的是,我们对多个答案持有锁
hashset。不同的线程能够在不同的hashset上“工作”,从而降低了总体等待时间。
这个想法可以在HashSet本身中被概括和直接实现(持有桶上的锁,而不是锁)
成套)。这里可以找到一个例子。
使ISet<T>并发的棘手之处在于集合方法(并、交、差)本质上是迭代的。至少,您必须遍历操作中涉及的一个集合的所有n个成员,同时锁定两个集合。
当您必须在迭代期间锁定整个集合时,您将失去ConcurrentDictionary<T,byte>的优势。如果没有锁定,这些操作就不是线程安全的。
考虑到ConcurrentDictionary<T,byte>所增加的开销,使用权重较轻的HashSet<T>并将所有内容都锁起来可能更明智。
如果您不需要设置操作,则使用ConcurrentDictionary<T,字节>,并且在添加键时仅使用default(byte)作为值。
我更喜欢完整的解决方案,所以我这样做了:请注意,我的计数以一种不同的方式实现,因为我不明白为什么应该禁止在尝试计数它的值时读取哈希集。
@Zen,谢谢你让它开始。
[DebuggerDisplay("Count = {Count}")]
[Serializable]
public class ConcurrentHashSet<T> : ICollection<T>, ISet<T>, ISerializable, IDeserializationCallback
{
private readonly ReaderWriterLockSlim _lock = new ReaderWriterLockSlim(LockRecursionPolicy.SupportsRecursion);
private readonly HashSet<T> _hashSet = new HashSet<T>();
public ConcurrentHashSet()
{
}
public ConcurrentHashSet(IEqualityComparer<T> comparer)
{
_hashSet = new HashSet<T>(comparer);
}
public ConcurrentHashSet(IEnumerable<T> collection)
{
_hashSet = new HashSet<T>(collection);
}
public ConcurrentHashSet(IEnumerable<T> collection, IEqualityComparer<T> comparer)
{
_hashSet = new HashSet<T>(collection, comparer);
}
protected ConcurrentHashSet(SerializationInfo info, StreamingContext context)
{
_hashSet = new HashSet<T>();
// not sure about this one really...
var iSerializable = _hashSet as ISerializable;
iSerializable.GetObjectData(info, context);
}
#region Dispose
public void Dispose()
{
Dispose(true);
GC.SuppressFinalize(this);
}
protected virtual void Dispose(bool disposing)
{
if (disposing)
if (_lock != null)
_lock.Dispose();
}
public IEnumerator<T> GetEnumerator()
{
return _hashSet.GetEnumerator();
}
~ConcurrentHashSet()
{
Dispose(false);
}
public void OnDeserialization(object sender)
{
_hashSet.OnDeserialization(sender);
}
public void GetObjectData(SerializationInfo info, StreamingContext context)
{
_hashSet.GetObjectData(info, context);
}
IEnumerator IEnumerable.GetEnumerator()
{
return GetEnumerator();
}
#endregion
public void Add(T item)
{
_lock.EnterWriteLock();
try
{
_hashSet.Add(item);
}
finally
{
if(_lock.IsWriteLockHeld) _lock.ExitWriteLock();
}
}
public void UnionWith(IEnumerable<T> other)
{
_lock.EnterWriteLock();
_lock.EnterReadLock();
try
{
_hashSet.UnionWith(other);
}
finally
{
if (_lock.IsWriteLockHeld) _lock.ExitWriteLock();
if (_lock.IsReadLockHeld) _lock.ExitReadLock();
}
}
public void IntersectWith(IEnumerable<T> other)
{
_lock.EnterWriteLock();
_lock.EnterReadLock();
try
{
_hashSet.IntersectWith(other);
}
finally
{
if (_lock.IsWriteLockHeld) _lock.ExitWriteLock();
if (_lock.IsReadLockHeld) _lock.ExitReadLock();
}
}
public void ExceptWith(IEnumerable<T> other)
{
_lock.EnterWriteLock();
_lock.EnterReadLock();
try
{
_hashSet.ExceptWith(other);
}
finally
{
if (_lock.IsWriteLockHeld) _lock.ExitWriteLock();
if (_lock.IsReadLockHeld) _lock.ExitReadLock();
}
}
public void SymmetricExceptWith(IEnumerable<T> other)
{
_lock.EnterWriteLock();
try
{
_hashSet.SymmetricExceptWith(other);
}
finally
{
if (_lock.IsWriteLockHeld) _lock.ExitWriteLock();
}
}
public bool IsSubsetOf(IEnumerable<T> other)
{
_lock.EnterWriteLock();
try
{
return _hashSet.IsSubsetOf(other);
}
finally
{
if (_lock.IsWriteLockHeld) _lock.ExitWriteLock();
}
}
public bool IsSupersetOf(IEnumerable<T> other)
{
_lock.EnterWriteLock();
try
{
return _hashSet.IsSupersetOf(other);
}
finally
{
if (_lock.IsWriteLockHeld) _lock.ExitWriteLock();
}
}
public bool IsProperSupersetOf(IEnumerable<T> other)
{
_lock.EnterWriteLock();
try
{
return _hashSet.IsProperSupersetOf(other);
}
finally
{
if (_lock.IsWriteLockHeld) _lock.ExitWriteLock();
}
}
public bool IsProperSubsetOf(IEnumerable<T> other)
{
_lock.EnterWriteLock();
try
{
return _hashSet.IsProperSubsetOf(other);
}
finally
{
if (_lock.IsWriteLockHeld) _lock.ExitWriteLock();
}
}
public bool Overlaps(IEnumerable<T> other)
{
_lock.EnterWriteLock();
try
{
return _hashSet.Overlaps(other);
}
finally
{
if (_lock.IsWriteLockHeld) _lock.ExitWriteLock();
}
}
public bool SetEquals(IEnumerable<T> other)
{
_lock.EnterWriteLock();
try
{
return _hashSet.SetEquals(other);
}
finally
{
if (_lock.IsWriteLockHeld) _lock.ExitWriteLock();
}
}
bool ISet<T>.Add(T item)
{
_lock.EnterWriteLock();
try
{
return _hashSet.Add(item);
}
finally
{
if (_lock.IsWriteLockHeld) _lock.ExitWriteLock();
}
}
public void Clear()
{
_lock.EnterWriteLock();
try
{
_hashSet.Clear();
}
finally
{
if (_lock.IsWriteLockHeld) _lock.ExitWriteLock();
}
}
public bool Contains(T item)
{
_lock.EnterWriteLock();
try
{
return _hashSet.Contains(item);
}
finally
{
if (_lock.IsWriteLockHeld) _lock.ExitWriteLock();
}
}
public void CopyTo(T[] array, int arrayIndex)
{
_lock.EnterWriteLock();
try
{
_hashSet.CopyTo(array, arrayIndex);
}
finally
{
if (_lock.IsWriteLockHeld) _lock.ExitWriteLock();
}
}
public bool Remove(T item)
{
_lock.EnterWriteLock();
try
{
return _hashSet.Remove(item);
}
finally
{
if (_lock.IsWriteLockHeld) _lock.ExitWriteLock();
}
}
public int Count
{
get
{
_lock.EnterWriteLock();
try
{
return _hashSet.Count;
}
finally
{
if(_lock.IsWriteLockHeld) _lock.ExitWriteLock();
}
}
}
public bool IsReadOnly
{
get { return false; }
}
}
我发现既不能简单地锁定System.Collections.Generic的添加和删除方法。HashSet,或者包装框架的ConcurrentDictionary在需要良好性能的“高吞吐量”场景中是足够的。
通过使用这个简单的想法就可以获得相当好的性能:
public class ExampleHashSet<T>
{
const int ConcurrencyLevel = 124;
const int Lower31BitMask = 0x7FFFFFFF;
HashSet<T>[] sets = new HashSet<T>[ConcurrencyLevel];
IEqualityComparer<T> comparer;
public ExampleHashSet()
{
comparer = EqualityComparer<T>.Default;
for(int i = 0; i < ConcurrencyLevel; i++)
sets[i] = new HashSet<T>();
}
public bool Add(T item)
{
int hash = (comparer.GetHashCode(item) & Lower31BitMask) % ConcurrencyLevel;
lock(sets[hash])
{
return sets[hash].Add(item);
}
}
public bool Remove(T item)
{
int hash = (comparer.GetHashCode(item) & Lower31BitMask) % ConcurrencyLevel;
lock(sets[hash])
{
return sets[hash].Remove(item);
}
}
// further methods ...
}
系统的HashSet是包装的,但与其他答案不同的是,我们对多个答案持有锁
hashset。不同的线程能够在不同的hashset上“工作”,从而降低了总体等待时间。
这个想法可以在HashSet本身中被概括和直接实现(持有桶上的锁,而不是锁)
成套)。这里可以找到一个例子。
由于没有人提到它,我将提供另一种方法,可能适合也可能不适合你的特定目的:
微软不可变集合
来自微软团队的一篇博客文章:
While creating and running concurrently is easier than ever, one of the fundamental problems still exists: mutable shared state. Reading from multiple threads is typically very easy, but once the state needs to be updated, it gets a lot harder, especially in designs that require locking.
An alternative to locking is making use of immutable state. Immutable data structures are guaranteed to never change and can thus be passed freely between different threads without worrying about stepping on somebody else’s toes.
This design creates a new problem though: How do you manage changes in state without copying the entire state each time? This is especially tricky when collections are involved.
This is where immutable collections come in.
这些集合包括ImmutableHashSet<T>和immutableelist <T>。
性能
由于不可变集合在底层使用树数据结构来支持结构共享,因此它们的性能特征不同于可变集合。当与锁定可变集合进行比较时,结果将取决于锁争用和访问模式。然而,摘自另一篇关于不可变集合的博客文章:
Q: I’ve heard that immutable collections are slow. Are these any different? Can I use them when performance or memory is important?
A: These immutable collections have been highly tuned to have competitive performance characteristics to the mutable collections while balancing memory sharing. In some cases they are very nearly as fast as the mutable collections both algorithmically and in actual time, sometimes even faster, while in other cases they are algorithmically more complex. In many cases however the difference will be negligible. Generally you should use the simplest code to get the job done and then tune for performance as necessary. The immutable collections help you write simple code, especially when thread-safety has to be considered.
换句话说,在许多情况下,差异不会被注意到,您应该选择更简单的选择——对于并发集,将使用ImmutableHashSet<T>,因为您没有现有的锁定可变实现!: -)
我基于ConcurrentDictionary创建了一个实际的ConcurrentHashSet,而不是包装ConcurrentDictionary或锁定HashSet。
这个实现支持每个项目的基本操作,而不需要HashSet的set操作,因为它们在并发场景中没有意义:
var concurrentHashSet = new ConcurrentHashSet<string>(
new[]
{
"hamster",
"HAMster",
"bar",
},
StringComparer.OrdinalIgnoreCase);
concurrentHashSet.TryRemove("foo");
if (concurrentHashSet.Contains("BAR"))
{
Console.WriteLine(concurrentHashSet.Count);
}
输出:2
你可以从NuGet这里得到它,在GitHub这里看到源代码。