File size: 4,770 Bytes
e26fba6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using CsQuery;
using System.Net;
using CsQuery.Web;

namespace MovieFinder.Scraper
{
    public abstract class ScraperBase
    {
        #region Fields

        private static readonly GotoUrlStep StepGotoUrl = new GotoUrlStep();
        private static readonly SelectStep StepSelect = new SelectStep();
        private static readonly ReadAttributeStep StepReadAttribute = new ReadAttributeStep();
        private static readonly ReadTextStep StepReadText = new ReadTextStep();

      
        
        #endregion

        protected string SubstringBetween(string text, string startText, string endText = "")
        {
            var start = text.IndexOf(startText) + startText.Length;
            var end = text.IndexOf(endText, start);
            return text.Substring(start,
                String.IsNullOrEmpty(endText) ?
                text.Length - start :
                end - start);
        }


        protected CQ GotoUrl(string url, int attempt = 1)
        {
            while (attempt-- > 0)
            {
                try
                {
                    var r = (CQ)StepGotoUrl.Process(url);
                    return r;
                }
                catch { if (attempt == 0) throw; }
            }
            throw new Exception("Network error");
        }

        protected List<IDomElement> SelectItems(CQ cq, string css)
        {
            return (List<IDomElement>)StepSelect.Process(cq, css);
        }

        protected IDomElement SelectItem(CQ cq, string css)
        {
            try
            {
                 var e = ((List<IDomElement>)StepSelect.Process(cq, css));
                 if (e.Count > 0)
                     return e[0];
                 return null;
            }
            catch
            {
                return null;
            }
        }


        //protected List<IDomElement> SelectItems(IDomElement elem, string css)
        //{
        //    return SelectItems(elem.Cq(), css);
        //}

        //protected IDomElement SelectItem(IDomElement elem, string css)
        //{
        //    return SelectItem(elem.Cq(), css);
        //}


        protected string ReadAttribute(IDomElement element, string attribute)
        {
            return StepReadAttribute.Process(element, attribute).ToString();
        }

        protected string ReadText(IDomElement element)
        {
            return WebUtility.HtmlDecode(  StepReadText.Process(element).ToString());
        }

        public abstract string RootUrl { get;}
        public abstract string ID { get; }
        public abstract string Title { get; }
        public abstract ImagePriorityRank ImagePriority { get; }
        public virtual bool IsWebSupported
        {
            get
            {
                return true;
            }
        }

        public virtual bool IsDesktopSupported
        {
            get
            {
                return true;
            }
        }
    }

    public interface IScraperStep
    {
        object Process(params object[] args);
    }

    public class GotoUrlStep : IScraperStep
    {
        private ServerConfig serverConfig = new ServerConfig
        {
            TimeoutSeconds = 30,
            UserAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.97 Safari/537.11"
        };

        public object Process(params object[] args)
        {
            if (args == null || args.Length < 1)
                throw new ArgumentException("args");
            return CQ.CreateFromUrl(args[0].ToString(), serverConfig);
        }
    }

    public class SelectStep : IScraperStep
    {
        public object Process(params object[] args)
        {
            if (args == null || args.Length < 2)
                throw new ArgumentException("args");
            return  ((CQ)args[0]).Select(args[1].ToString()).Elements.ToList();
        }
    }

    public class ReadAttributeStep : IScraperStep
    {
        public object Process(params object[] args)
        {

            if (args == null || args.Length < 2)
                throw new ArgumentException("args");
            return ((IDomElement)args[0]).Attributes[args[1].ToString()];
        }
    }

    public class ReadTextStep : IScraperStep
    {
        public object Process(params object[] args)
        {
            if (args == null || args.Length < 1)
                throw new ArgumentException("args");
            return ((IDomElement)args[0]).InnerText;
        }
    }

  


  
}