niobures's picture
VideoSearch
e26fba6 verified
using System;
using System.Collections.Generic;
using System.Text;
using System.Net;
using HtmlAgilityPack;
using Fizzler.Systems.HtmlAgilityPack;
using System.Linq;
using System.Collections.Specialized;
namespace MovieTube.Client.Scraper
{
public abstract class ScraperBase
{
#region Fields
private WebClientEx client = new WebClientEx();
#endregion
public static string Substring(string text, string startText, string endText = null)
{
try
{
var start = text.IndexOf(startText) + startText.Length;
if (start < startText.Length)
return null;
if (endText == null)
return text.Substring(start, text.Length - start);
var end = text.IndexOf(endText, start);
if (end == -1)
return null;
return text.Substring(start, end - start);
}
catch (Exception)
{
return null;
}
}
protected string SubstringBetween(string text, string startText, string endText = null)
{
return ScraperBase.Substring(text, startText, endText);
}
protected HtmlNode Get(string url)
{
HtmlDocument doc = new HtmlDocument();
doc.LoadHtml(client.DownloadString(url));
return doc.DocumentNode;
}
protected HtmlNode GetFromHtml(string html)
{
HtmlDocument doc = new HtmlDocument();
doc.LoadHtml(html);
return doc.DocumentNode;
}
protected HtmlNode Post(string url, NameValueCollection data, CookieCollection cookies = null, NameValueCollection headers = null)
{
System.Net.ServicePointManager.Expect100Continue = false;
if (cookies != null)
client.AddCookies(cookies);
if (headers != null)
client.AddHeaders(headers);
HtmlDocument doc = new HtmlDocument();
doc.LoadHtml(Encoding.ASCII.GetString(client.UploadValues(url, data)));
return doc.DocumentNode;
}
protected IEnumerable<HtmlNode> SelectItems(HtmlNode cq, string css)
{
return cq.QuerySelectorAll(css);
}
protected HtmlNode SelectItem(HtmlNode cq, string css)
{
return cq.QuerySelector(css);
}
protected string ReadAttribute(HtmlNode element, string attribute)
{
return element.Attributes[attribute] != null ? element.Attributes[attribute].Value : String.Empty;
}
protected string ReadText(HtmlNode element)
{
return element.InnerText;// WebUtility.HtmlDecode(StepReadText.Process(element).ToString());
}
public abstract string RootUrl { get;}
public abstract string ID { get; }
public abstract string Title { get; }
public virtual bool CanProcess(string url)
{
return url.ToLower().Contains(RootUrl.ToLower());
}
public virtual bool IsWebSupported
{
get
{
return true;
}
}
public virtual bool IsDesktopSupported
{
get
{
return true;
}
}
}
public class WebClientEx : WebClient
{
private readonly CookieContainer container = new CookieContainer();
protected override WebRequest GetWebRequest(Uri address)
{
WebRequest r = base.GetWebRequest(address);
var request = r as HttpWebRequest;
if (request != null)
{
request.CookieContainer = container;
}
return r;
}
protected override WebResponse GetWebResponse(WebRequest request, IAsyncResult result)
{
WebResponse response = base.GetWebResponse(request, result);
ReadCookies(response);
return response;
}
protected override WebResponse GetWebResponse(WebRequest request)
{
WebResponse response = base.GetWebResponse(request);
ReadCookies(response);
return response;
}
private void ReadCookies(WebResponse r)
{
var response = r as HttpWebResponse;
if (response != null)
{
CookieCollection cookies = response.Cookies;
container.Add(cookies);
}
}
public void AddCookies(CookieCollection cookies)
{
foreach(Cookie c in cookies)
container.Add(c);
}
public void AddHeaders(NameValueCollection headers)
{
foreach (string key in headers)
this.Headers.Add(key,headers[key]);
}
}
}