This code is for reading a table content. all the values are enclosed by ()Tj, so we look for all the values, you can do anything then with the string resulst.
string _filePath = @"~MyPDF.pdf";
public List<String> Read()
{
var pdfReader = new PdfReader(_filePath);
var pages = new List<String>();
for (int i = 0; i < pdfReader.NumberOfPages; i++)
{
string textFromPage = Encoding.UTF8.GetString(Encoding.Convert(Encoding.Default, Encoding.UTF8, pdfReader.GetPageContent(i + 1)));
pages.Add(GetDataConvertedData(textFromPage));
}
return pages;
}
string GetDataConvertedData(string textFromPage)
{
var texts = textFromPage.Split(new[] { "
" }, StringSplitOptions.None)
.Where(text => text.Contains("Tj")).ToList();
return texts.Aggregate(string.Empty, (current, t) => current +
t.TrimStart('(')
.TrimEnd('j')
.TrimEnd('T')
.TrimEnd(')'));
}
与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…