Basead in this page: page
public sealed class UtilParserHTML
{
//Private Fields
private Uri Uri;
private Stream StreamPage;
private HttpWebRequest HttpRequest;
private HttpWebResponse HttpResponse;
//Public Fields
public HtmlDocument HtmlDocument { private set; get; }
public UtilParserHTML()
{
if (this.HtmlDocument == null)
HtmlDocument = new HtmlDocument();
}
public void LoadHTMLPage(string UrlPage)
{
if (string.IsNullOrEmpty(UrlPage))
throw new ArgumentNullException("");
CookieContainer cookieContainer = new CookieContainer();
this.Uri = new Uri(UrlPage);
this.HttpRequest = (HttpWebRequest)WebRequest.Create(UrlPage);
this.HttpRequest.Method = WebRequestMethods.Http.Get;
this.HttpRequest.CookieContainer = cookieContainer;
this.HttpResponse = (HttpWebResponse)this.HttpRequest.GetResponse();
this.StreamPage = this.HttpResponse.GetResponseStream();
this.HtmlDocument.Load(StreamPage);
}
public void LoadHTMLPage(FileStream StreamPage)
{
if (StreamPage == null)
throw new ArgumentNullException("");
HtmlDocument.Load(StreamPage);
}
public HtmlNodeCollection GetNodesByExpression(string XPathExpression)
{
if (string.IsNullOrEmpty(XPathExpression))
throw new ArgumentNullException("");
return this.HtmlDocument.DocumentNode.SelectNodes(XPathExpression);
}
Use XPath to navigate in html.
In this case I used this Xpath expression : //div[@class='arrowRibbon'] //img
Look:
...
this.ParserHTML.LoadHTMLPage("http://www.img.com.br/default.aspx");
HtmlNodeCollection HtmlNodeCollectionResult = this.ParserHTML.GetNodesByExpression(Page.XPathExpression);
if (HtmlNodeCollectionResult != null)
{
foreach (HtmlNode NodeResult in HtmlNodeCollectionResult)
{
var src = NodeResult.Attributes["src"].Value;
}
}
...
EDIT
Look this complete example with width and height:
this.ParserHTML.LoadHTMLPage("http://www.w3schools.com/tags/tag_img.asp");
HtmlNodeCollection HtmlNodeCollectionResult = this.ParserHTML.GetNodesByExpression("//div[@class='tryit_ex'] //img");
if (HtmlNodeCollectionResult != null)
{
foreach (HtmlNode NodeResult in HtmlNodeCollectionResult)
{
var src = NodeResult.Attributes["src"].Value;
var w = NodeResult.Attributes["width"].Value;
var h = NodeResult.Attributes["height"].Value;
}
}
Hope this help.
与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…