You could use IHTMLDocument2
DOM to parse whatever elements you need from the HTML:
uses ActiveX, MSHTML;
const
HTML =
'<div class="tvRow tvFirst hasLabel tvFirst" title="example1">' +
'<label class="tvLabel">Name:</label>' +
'<span class="tvValue">Value</span>' +
'<div class="clear"></div>' +
'</div>';
procedure TForm1.Button1Click(Sender: TObject);
var
doc: OleVariant;
el: OleVariant;
i: Integer;
begin
doc := coHTMLDocument.Create as IHTMLDocument2;
doc.write(HTML);
doc.close;
ShowMessage(doc.body.innerHTML);
for i := 0 to doc.body.all.length - 1 do
begin
el := doc.body.all.item(i);
if (el.tagName = 'LABEL') and (el.className = 'tvLabel') then
ShowMessage(el.innerText);
if (el.tagName = 'SPAN') and (el.className = 'tvValue') then
ShowMessage(el.innerText);
end;
end;
I wanted to mention another very nice HTML parser I found today: htmlp
(Delphi Dom HTML Parser and Converter). It's not as flexible as the IHTMLDocument2
obviously, but it's very easy to work with, fast, free, and supports Unicode for older Delphi versions.
Sample usage:
uses HtmlParser, DomCore;
function GetDocBody(HtmlDoc: TDocument): TElement;
var
i: integer;
node: TNode;
begin
Result := nil;
for i := 0 to HtmlDoc.documentElement.childNodes.length - 1 do
begin
node := HtmlDoc.documentElement.childNodes.item(i);
if node.nodeName = 'body' then
begin
Result := node as TElement;
Break;
end;
end;
end;
procedure THTMLForm.Button2Click(Sender: TObject);
var
HtmlParser: THtmlParser;
HtmlDoc: TDocument;
i: Integer;
body, el: TElement;
node: TNode;
begin
HtmlParser := THtmlParser.Create;
try
HtmlDoc := HtmlParser.parseString(HTML);
try
body := GetDocBody(HtmlDoc);
if Assigned(body) then
for i := 0 to body.childNodes.length - 1 do
begin
node := body.childNodes.item(i);
if (node is TElement) then
begin
el := node as TElement;
if (el.tagName = 'div') and (el.GetAttribute('class') = 'tvRow tvFirst hasLabel tvFirst') then
begin
// iterate el.childNodes here...
ShowMessage(IntToStr(el.childNodes.length));
end;
end;
end;
finally
HtmlDoc.Free;
end;
finally
HtmlParser.Free
end;
end;
与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…