The fastest way is to use a HashSet
.
The Contains
for a HashSet
is O(1).
I took your code and added a benchmark for HashSet<int>
The performance cost of HashSet<int> set = new HashSet<int>(list);
is nearly zero.
Code
void Main()
{
ContainsExistsAnyVeryShort();
ContainsExistsAnyShort();
ContainsExistsAny();
}
private static void ContainsExistsAny()
{
Console.WriteLine("***************************************");
Console.WriteLine("********* ContainsExistsAny ***********");
Console.WriteLine("***************************************");
List<int> list = new List<int>(6000000);
Random random = new Random();
for (int i = 0; i < 6000000; i++)
{
list.Add(random.Next(6000000));
}
int[] arr = list.ToArray();
HashSet<int> set = new HashSet<int>(list);
find(list, arr, set, (method, stopwatch) => $"{method}: {stopwatch.ElapsedMilliseconds}ms");
}
private static void ContainsExistsAnyShort()
{
Console.WriteLine("***************************************");
Console.WriteLine("***** ContainsExistsAnyShortRange *****");
Console.WriteLine("***************************************");
List<int> list = new List<int>(2000);
Random random = new Random();
for (int i = 0; i < 2000; i++)
{
list.Add(random.Next(6000000));
}
int[] arr = list.ToArray();
HashSet<int> set = new HashSet<int>(list);
find(list, arr, set, (method, stopwatch) => $"{method}: {stopwatch.ElapsedMilliseconds}ms");
}
private static void ContainsExistsAnyVeryShort()
{
Console.WriteLine("*******************************************");
Console.WriteLine("***** ContainsExistsAnyVeryShortRange *****");
Console.WriteLine("*******************************************");
List<int> list = new List<int>(10);
Random random = new Random();
for (int i = 0; i < 10; i++)
{
list.Add(random.Next(6000000));
}
int[] arr = list.ToArray();
HashSet<int> set = new HashSet<int>(list);
find(list, arr, set, (method, stopwatch) => $"{method}: {stopwatch.ElapsedTicks} ticks");
}
private static void find(List<int> list, int[] arr, HashSet<int> set, Func<string, Stopwatch, string> format)
{
Random random = new Random();
int[] find = new int[10000];
for (int i = 0; i < 10000; i++)
{
find[i] = random.Next(6000000);
}
Stopwatch watch = Stopwatch.StartNew();
for (int rpt = 0; rpt < 10000; rpt++)
{
list.Contains(find[rpt]);
}
watch.Stop();
Console.WriteLine(format("List/Contains", watch));
watch = Stopwatch.StartNew();
for (int rpt = 0; rpt < 10000; rpt++)
{
list.Exists(a => a == find[rpt]);
}
watch.Stop();
Console.WriteLine(format("List/Exists", watch));
watch = Stopwatch.StartNew();
for (int rpt = 0; rpt < 10000; rpt++)
{
list.Any(a => a == find[rpt]);
}
watch.Stop();
Console.WriteLine(format("List/Any", watch));
watch = Stopwatch.StartNew();
for (int rpt = 0; rpt < 10000; rpt++)
{
arr.Contains(find[rpt]);
}
watch.Stop();
Console.WriteLine(format("Array/Contains", watch));
watch = Stopwatch.StartNew();
for (int rpt = 0; rpt < 10000; rpt++)
{
Array.Exists(arr, a => a == find[rpt]);
}
watch.Stop();
Console.WriteLine(format("Array/Exists", watch));
watch = Stopwatch.StartNew();
for (int rpt = 0; rpt < 10000; rpt++)
{
Array.IndexOf(arr, find[rpt]);
}
watch.Stop();
Console.WriteLine(format("Array/IndexOf", watch));
watch = Stopwatch.StartNew();
for (int rpt = 0; rpt < 10000; rpt++)
{
arr.Any(a => a == find[rpt]);
}
watch.Stop();
Console.WriteLine(format("Array/Any", watch));
watch = Stopwatch.StartNew();
for (int rpt = 0; rpt < 10000; rpt++)
{
set.Contains(find[rpt]);
}
watch.Stop();
Console.WriteLine(format("HashSet/Contains", watch));
}
RESULTS
*******************************************
***** ContainsExistsAnyVeryShortRange *****
*******************************************
List/Contains: 1067 ticks
List/Exists: 2884 ticks
List/Any: 10520 ticks
Array/Contains: 1880 ticks
Array/Exists: 5526 ticks
Array/IndexOf: 601 ticks
Array/Any: 13295 ticks
HashSet/Contains: 6629 ticks
***************************************
***** ContainsExistsAnyShortRange *****
***************************************
List/Contains: 4ms
List/Exists: 28ms
List/Any: 138ms
Array/Contains: 6ms
Array/Exists: 34ms
Array/IndexOf: 3ms
Array/Any: 96ms
HashSet/Contains: 0ms
***************************************
********* ContainsExistsAny ***********
***************************************
List/Contains: 11504ms
List/Exists: 57084ms
List/Any: 257659ms
Array/Contains: 11643ms
Array/Exists: 52477ms
Array/IndexOf: 11741ms
Array/Any: 194184ms
HashSet/Contains: 3ms
Edit (2021-08-25)
I added a comparison for very short collections (10 items) and also added Array.Contains
and Array.IndexOf
. You can see that Array.IndexOf
is the fastest for such small ranges. That is, because as @lucky-brian said n is so small here, that a for
-loop performs better than a somewhat complex search algorithm. However I still advice to use HashSet<T>
whenever possible, as it better reflects the intend of only having unique values and the difference for small collections is below 1ms.
与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…