File size: 4,770 Bytes
e26fba6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 |
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using CsQuery;
using System.Net;
using CsQuery.Web;
namespace MovieFinder.Scraper
{
public abstract class ScraperBase
{
#region Fields
private static readonly GotoUrlStep StepGotoUrl = new GotoUrlStep();
private static readonly SelectStep StepSelect = new SelectStep();
private static readonly ReadAttributeStep StepReadAttribute = new ReadAttributeStep();
private static readonly ReadTextStep StepReadText = new ReadTextStep();
#endregion
protected string SubstringBetween(string text, string startText, string endText = "")
{
var start = text.IndexOf(startText) + startText.Length;
var end = text.IndexOf(endText, start);
return text.Substring(start,
String.IsNullOrEmpty(endText) ?
text.Length - start :
end - start);
}
protected CQ GotoUrl(string url, int attempt = 1)
{
while (attempt-- > 0)
{
try
{
var r = (CQ)StepGotoUrl.Process(url);
return r;
}
catch { if (attempt == 0) throw; }
}
throw new Exception("Network error");
}
protected List<IDomElement> SelectItems(CQ cq, string css)
{
return (List<IDomElement>)StepSelect.Process(cq, css);
}
protected IDomElement SelectItem(CQ cq, string css)
{
try
{
var e = ((List<IDomElement>)StepSelect.Process(cq, css));
if (e.Count > 0)
return e[0];
return null;
}
catch
{
return null;
}
}
//protected List<IDomElement> SelectItems(IDomElement elem, string css)
//{
// return SelectItems(elem.Cq(), css);
//}
//protected IDomElement SelectItem(IDomElement elem, string css)
//{
// return SelectItem(elem.Cq(), css);
//}
protected string ReadAttribute(IDomElement element, string attribute)
{
return StepReadAttribute.Process(element, attribute).ToString();
}
protected string ReadText(IDomElement element)
{
return WebUtility.HtmlDecode( StepReadText.Process(element).ToString());
}
public abstract string RootUrl { get;}
public abstract string ID { get; }
public abstract string Title { get; }
public abstract ImagePriorityRank ImagePriority { get; }
public virtual bool IsWebSupported
{
get
{
return true;
}
}
public virtual bool IsDesktopSupported
{
get
{
return true;
}
}
}
public interface IScraperStep
{
object Process(params object[] args);
}
public class GotoUrlStep : IScraperStep
{
private ServerConfig serverConfig = new ServerConfig
{
TimeoutSeconds = 30,
UserAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.97 Safari/537.11"
};
public object Process(params object[] args)
{
if (args == null || args.Length < 1)
throw new ArgumentException("args");
return CQ.CreateFromUrl(args[0].ToString(), serverConfig);
}
}
public class SelectStep : IScraperStep
{
public object Process(params object[] args)
{
if (args == null || args.Length < 2)
throw new ArgumentException("args");
return ((CQ)args[0]).Select(args[1].ToString()).Elements.ToList();
}
}
public class ReadAttributeStep : IScraperStep
{
public object Process(params object[] args)
{
if (args == null || args.Length < 2)
throw new ArgumentException("args");
return ((IDomElement)args[0]).Attributes[args[1].ToString()];
}
}
public class ReadTextStep : IScraperStep
{
public object Process(params object[] args)
{
if (args == null || args.Length < 1)
throw new ArgumentException("args");
return ((IDomElement)args[0]).InnerText;
}
}
}
|