When I used OLEDB, it takes only 2 - 3 seconds to read 3200 rows from an Excel Sheet. I changed to the OpenXML format and now it takes more than 1 minute to read 3200 rows from an Excel Sheet.
Below is my code:
public static DataTable ReadExcelFileDOM(string filename)
{
DataTable table;
using (SpreadsheetDocument myDoc = SpreadsheetDocument.Open(filename, true))
{
WorkbookPart workbookPart = myDoc.WorkbookPart;
Sheet worksheet = workbookPart.Workbook.Descendants<Sheet>().First();
WorksheetPart worksheetPart =
(WorksheetPart)(workbookPart.GetPartById(worksheet.Id));
SheetData sheetData =
worksheetPart.Worksheet.Elements<SheetData>().First();
List<List<string>> totalRows = new List<List<string>>();
int maxCol = 0;
foreach (Row r in sheetData.Elements<Row>())
{
// Add the empty row.
string value = null;
while (totalRows.Count < r.RowIndex - 1)
{
List<string> emptyRowValues = new List<string>();
for (int i = 0; i < maxCol; i++)
{
emptyRowValues.Add("");
}
totalRows.Add(emptyRowValues);
}
List<string> tempRowValues = new List<string>();
foreach (Cell c in r.Elements<Cell>())
{
#region get the cell value of c.
if (c != null)
{
value = c.InnerText;
// If the cell represents a numeric value, you are done.
// For dates, this code returns the serialized value that
// represents the date. The code handles strings and Booleans
// individually. For shared strings, the code looks up the
// corresponding value in the shared string table. For Booleans,
// the code converts the value into the words TRUE or FALSE.
if (c.DataType != null)
{
switch (c.DataType.Value)
{
case CellValues.SharedString:
// For shared strings, look up the value in the shared
// strings table.
var stringTable = workbookPart.
GetPartsOfType<SharedStringTablePart>().FirstOrDefault();
// If the shared string table is missing, something is
// wrong. Return the index that you found in the cell.
// Otherwise, look up the correct text in the table.
if (stringTable != null)
{
value = stringTable.SharedStringTable.
ElementAt(int.Parse(value)).InnerText;
}
break;
case CellValues.Boolean:
switch (value)
{
case "0":
value = "FALSE";
break;
default:
value = "TRUE";
break;
}
break;
}
}
Console.Write(value + " ");
}
#endregion
// Add the cell to the row list.
int i = Convert.ToInt32(c.CellReference.ToString().ToCharArray().First() - 'A');
// Add the blank cell in the row.
while (tempRowValues.Count < i)
{
tempRowValues.Add("");
}
tempRowValues.Add(value);
}
// add the row to the totalRows.
maxCol = processList(tempRowValues, totalRows, maxCol);
Console.WriteLine();
}
table = ConvertListListStringToDataTable(totalRows, maxCol);
}
return table;
}
/// <summary>
/// Add each row to the totalRows.
/// </summary>
/// <param name="tempRows"></param>
/// <param name="totalRows"></param>
/// <param name="MaxCol">the max column number in rows of the totalRows</param>
/// <returns></returns>
private static int processList(List<string> tempRows, List<List<string>> totalRows, int MaxCol)
{
if (tempRows.Count > MaxCol)
{
MaxCol = tempRows.Count;
}
totalRows.Add(tempRows);
return MaxCol;
}
private static DataTable ConvertListListStringToDataTable(List<List<string>> totalRows, int maxCol)
{
DataTable table = new DataTable();
for (int i = 0; i < maxCol; i++)
{
table.Columns.Add();
}
foreach (List<string> row in totalRows)
{
while (row.Count < maxCol)
{
row.Add("");
}
table.Rows.Add(row.ToArray());
}
return table;
}
Is there an efficient way to change this code somewhere so that the read process can be a little faster? How I can change this to code to read faster?
See Question&Answers more detail:
os 与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…