I am extracting data from excel that is in this format
product1 | unnamedcol2 | product2 | unnamedcol4 | product3 | unnamedcol6 |
-------------------------------------------------------------------------------
@1foo | 1.10 | @1foo | 0.3 | @1foo | 0.3
@2foo | 1.00 | @2foo | 2 | @2foo |
@3foo | 1.52 | @3foo | 2.53 | @3foo |
@4foo | 1.47 | | | @4foo | 1.31
@5foo | 1.49 | | | @5foo | 1.31
The file uses all 255 fields. Using dapper-dot-net i get the data through this code
IEnumerable<IDictionary<string, object>> excelDataRaw =
conn.Query(string.Format("select * from {0}", table)).Cast<IDictionary<string, object>>();
I pass this data to these test methods. The data is returned as an IEnumerable of IDictionaries where each key is a product and each value is an IDictionary where each key is a value from the product column and the corresponding value is a value from unnamedcol that is to the right of the product column.
var excelDataRefined = new List<IDictionary<string, IDictionary<string, decimal>>>();
excelDataRefined.Add(new Dictionary<string, IDictionary<string, decimal>>());
excelDataRefined[0].Add( "product", new Dictionary<string, decimal>());
excelDataRefined[0]["product"].Add("@1foo", 1.1m);
The methods:
private static Dictionary<string, IDictionary<string, decimal>> Benchmark_foreach(IEnumerable<IDictionary<string, object>> excelDataRaw)
{
Console.WriteLine("1. Using foreach");
var watch = new Stopwatch();
watch.Start();
List<string> headers = excelDataRaw.Select(dictionary => dictionary.Keys).First().ToList();
bool isEven = false;
List<string> products = headers.Where(h => isEven = !isEven).ToList();
var dates = new List<IEnumerable<object>>();
var prices = new List<IEnumerable<object>>();
foreach (string field in headers)
{
string product1 = field;
if (headers.IndexOf(field) % 2 == 0)
{
dates.Add(
excelDataRaw.AsParallel().AsOrdered().Select(col => col[product1]).Where(row => row != null));
}
if (headers.IndexOf(field) % 2 == 1)
{
prices.Add(
excelDataRaw.AsParallel().AsOrdered().Select(col => col[product1] ?? 0m).Take(dates.Last().Count()));
}
}
watch.Stop();
Console.WriteLine("Rearange the data in: {0}s", watch.Elapsed.TotalSeconds);
watch.Restart();
var excelDataRefined = new Dictionary<string, IDictionary<string, decimal>>();
foreach (IEnumerable<object> datelist in dates)
{
decimal num;
IEnumerable<object> datelist1 = datelist;
IEnumerable<object> pricelist =
prices[dates.IndexOf(datelist1)].Select(value => value ?? 0m).Where(
content => decimal.TryParse(content.ToString(), out num));
Dictionary<string, decimal> dict =
datelist1.Zip(pricelist, (k, v) => new { k, v }).ToDictionary(
x => (string)x.k, x => decimal.Parse(x.v.ToString()));
if (!excelDataRefined.ContainsKey(products[dates.IndexOf(datelist1)]))
{
excelDataRefined.Add(products[dates.IndexOf(datelist1)], dict);
}
}
watch.Stop();
Console.WriteLine("Zipped the data in: {0}s", watch.Elapsed.TotalSeconds);
return excelDataRefined;
}
private static Dictionary<string, IDictionary<string, decimal>> Benchmark_AsParallel(IEnumerable<IDictionary<string, object>> excelDataRaw)
{
Console.WriteLine("2. Using AsParallel().AsOrdered().ForAll");
var watch = new Stopwatch();
watch.Start();
List<string> headers = excelDataRaw.Select(dictionary => dictionary.Keys).First().ToList();
bool isEven = false;
List<string> products = headers.Where(h => isEven = !isEven).ToList();
var dates = new List<IEnumerable<object>>();
var prices = new List<IEnumerable<object>>();
headers.AsParallel().AsOrdered().ForAll(
field =>
dates.Add(
excelDataRaw.AsParallel().AsOrdered().TakeWhile(x => headers.IndexOf(field) % 2 == 0).Select(
col => col[field]).Where(row => row != null).ToList()));
headers.AsParallel().AsOrdered().ForAll(
field =>
prices.Add(
excelDataRaw.AsParallel().AsOrdered().TakeWhile(x => headers.IndexOf(field) % 2 == 1).Select(
col => col[field] ?? 0m).Take(256).ToList()));
dates.RemoveAll(x => x.Count() == 0);
prices.RemoveAll(x => x.Count() == 0);
watch.Stop();
Console.WriteLine("Rearange the data in: {0}s", watch.Elapsed.TotalSeconds);
watch.Restart();
var excelDataRefined = new Dictionary<string, IDictionary<string, decimal>>();
foreach (IEnumerable<object> datelist in dates)
{
decimal num;
IEnumerable<object> datelist1 = datelist;
IEnumerable<object> pricelist =
prices[dates.IndexOf(datelist1)].Select(value => value ?? 0m).Where(
content => decimal.TryParse(content.ToString(), out num));
Dictionary<string, decimal> dict =
datelist1.Zip(pricelist, (k, v) => new { k, v }).ToDictionary(
x => (string)x.k, x => decimal.Parse(x.v.ToString()));
if (!excelDataRefined.ContainsKey(products[dates.IndexOf(datelist1)]))
{
excelDataRefined.Add(products[dates.IndexOf(datelist1)], dict);
}
}
watch.Stop();
Console.WriteLine("Zipped the data in: {0}s", watch.Elapsed.TotalSeconds);
return excelDataRefined;
}
private static Dictionary<string, IDictionary<string, decimal>> Benchmark_ForEach(IEnumerable<IDictionary<string, object>> excelDataRaw)
{
Console.WriteLine("3. Using ForEach");
var watch = new Stopwatch();
watch.Start();
List<string> headers = excelDataRaw.Select(dictionary => dictionary.Keys).First().ToList();
bool isEven = false;
List<string> products = headers.Where(h => isEven = !isEven).ToList();
var dates = new List<IEnumerable<object>>();
var prices = new List<IEnumerable<object>>();
headers.ForEach(
field =>
dates.Add(
excelDataRaw.TakeWhile(x => headers.IndexOf(field) % 2 == 0).Select(col => col[field]).Where(
row => row != null).ToList()));
headers.ForEach(
field =>
prices.Add(
excelDataRaw.TakeWhile(x => headers.IndexOf(field) % 2 == 1).Select(col => col[field] ?? 0m).
Take(256).ToList()));
dates.RemoveAll(x => x.Count() == 0);
prices.RemoveAll(x => x.Count() == 0);
watch.Stop();
Console.WriteLine("Rearange the data in: {0}s", watch.Elapsed.TotalSeconds);
watch.Restart();
var excelDataRefined = new Dictionary<string, IDictionary<string, decimal>>();
foreach (IEnumerable<object> datelist in dates)
{
decimal num;
IEnumerable<object> datelist1 = datelist;
IEnumerable<object> pricelist =
prices[dates.IndexOf(datelist1)].Select(value => value ?? 0m).Where(
content => decimal.TryParse(content.ToString(), out num));
Dictionary<string, decimal> dict =
datelist1.Zip(pricelist, (k, v) => new { k, v }).ToDictionary(
x => (string)x.k, x => decimal.Parse(x.v.ToString()));
if (!excelDataRefined.ContainsKey(products[dates.IndexOf(datelist1)]))
{
excelDataRefined.Add(products[dates.IndexOf(datelist1)], dict);
}
}
watch.Stop();
Console.WriteLine("Zipped the data in: {0}s", watch.Elapsed.TotalSeconds);
return excelDataRefined;
}
- Benchmark_foreach needs app. 3,5s to rearrange and 3s to zip the data.
- Benchmark_AsParallel needs app. 12s to rearrange and 0,005s to zip the data.
- Benchmark_ForEach needs app. 16s to rearrange and 0,005s to zip the data.
Why does it behave like this? I expected AsParallel to be the fastest because it executes in parallel instead of sequential. Ho do i optimize this?
See Question&Answers more detail:
os