File size: 5,149 Bytes
e26fba6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 |
using System;
using System.Collections.Generic;
using System.Text;
using System.Net;
using HtmlAgilityPack;
using Fizzler.Systems.HtmlAgilityPack;
using System.Linq;
using System.Collections.Specialized;
namespace MovieTube.Client.Scraper
{
public abstract class ScraperBase
{
#region Fields
private WebClientEx client = new WebClientEx();
#endregion
public static string Substring(string text, string startText, string endText = null)
{
try
{
var start = text.IndexOf(startText) + startText.Length;
if (start < startText.Length)
return null;
if (endText == null)
return text.Substring(start, text.Length - start);
var end = text.IndexOf(endText, start);
if (end == -1)
return null;
return text.Substring(start, end - start);
}
catch (Exception)
{
return null;
}
}
protected string SubstringBetween(string text, string startText, string endText = null)
{
return ScraperBase.Substring(text, startText, endText);
}
protected HtmlNode Get(string url)
{
HtmlDocument doc = new HtmlDocument();
doc.LoadHtml(client.DownloadString(url));
return doc.DocumentNode;
}
protected HtmlNode GetFromHtml(string html)
{
HtmlDocument doc = new HtmlDocument();
doc.LoadHtml(html);
return doc.DocumentNode;
}
protected HtmlNode Post(string url, NameValueCollection data, CookieCollection cookies = null, NameValueCollection headers = null)
{
System.Net.ServicePointManager.Expect100Continue = false;
if (cookies != null)
client.AddCookies(cookies);
if (headers != null)
client.AddHeaders(headers);
HtmlDocument doc = new HtmlDocument();
doc.LoadHtml(Encoding.ASCII.GetString(client.UploadValues(url, data)));
return doc.DocumentNode;
}
protected IEnumerable<HtmlNode> SelectItems(HtmlNode cq, string css)
{
return cq.QuerySelectorAll(css);
}
protected HtmlNode SelectItem(HtmlNode cq, string css)
{
return cq.QuerySelector(css);
}
protected string ReadAttribute(HtmlNode element, string attribute)
{
return element.Attributes[attribute] != null ? element.Attributes[attribute].Value : String.Empty;
}
protected string ReadText(HtmlNode element)
{
return element.InnerText;// WebUtility.HtmlDecode(StepReadText.Process(element).ToString());
}
public abstract string RootUrl { get;}
public abstract string ID { get; }
public abstract string Title { get; }
public virtual bool CanProcess(string url)
{
return url.ToLower().Contains(RootUrl.ToLower());
}
public virtual bool IsWebSupported
{
get
{
return true;
}
}
public virtual bool IsDesktopSupported
{
get
{
return true;
}
}
}
public class WebClientEx : WebClient
{
private readonly CookieContainer container = new CookieContainer();
protected override WebRequest GetWebRequest(Uri address)
{
WebRequest r = base.GetWebRequest(address);
var request = r as HttpWebRequest;
if (request != null)
{
request.CookieContainer = container;
}
return r;
}
protected override WebResponse GetWebResponse(WebRequest request, IAsyncResult result)
{
WebResponse response = base.GetWebResponse(request, result);
ReadCookies(response);
return response;
}
protected override WebResponse GetWebResponse(WebRequest request)
{
WebResponse response = base.GetWebResponse(request);
ReadCookies(response);
return response;
}
private void ReadCookies(WebResponse r)
{
var response = r as HttpWebResponse;
if (response != null)
{
CookieCollection cookies = response.Cookies;
container.Add(cookies);
}
}
public void AddCookies(CookieCollection cookies)
{
foreach(Cookie c in cookies)
container.Add(c);
}
public void AddHeaders(NameValueCollection headers)
{
foreach (string key in headers)
this.Headers.Add(key,headers[key]);
}
}
}
|