File size: 5,149 Bytes
e26fba6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
using System;
using System.Collections.Generic;
using System.Text;
using System.Net;
using HtmlAgilityPack;
using Fizzler.Systems.HtmlAgilityPack;
using System.Linq;
using System.Collections.Specialized;

namespace  MovieTube.Client.Scraper
{
    public abstract class ScraperBase
    {
        #region Fields

        private WebClientEx client = new WebClientEx();
        #endregion

        public static string Substring(string text, string startText, string endText = null)
        {
            try
            {
                var start = text.IndexOf(startText) + startText.Length;
                if (start < startText.Length)
                    return null;
                if (endText == null)
                    return text.Substring(start, text.Length - start);

                var end = text.IndexOf(endText, start);
                if (end == -1)
                    return null;
                return text.Substring(start, end - start);
            }
            catch (Exception)
            {
                return null;
            }
        }
        protected string SubstringBetween(string text, string startText, string endText = null)
        {
            return ScraperBase.Substring(text, startText, endText);
        }

        protected HtmlNode Get(string url)
        {
            HtmlDocument doc = new HtmlDocument();
            doc.LoadHtml(client.DownloadString(url));
            return doc.DocumentNode;
        }

        protected HtmlNode GetFromHtml(string html)
        {
            HtmlDocument doc = new HtmlDocument();
            doc.LoadHtml(html);
            return doc.DocumentNode;
        }

        protected HtmlNode Post(string url, NameValueCollection data, CookieCollection cookies = null, NameValueCollection headers = null)
        {
            System.Net.ServicePointManager.Expect100Continue = false;
            if (cookies != null)
                client.AddCookies(cookies);
            if (headers != null)
                client.AddHeaders(headers);

            HtmlDocument doc = new HtmlDocument();
            doc.LoadHtml(Encoding.ASCII.GetString(client.UploadValues(url, data)));
            return doc.DocumentNode;
        }

        protected IEnumerable<HtmlNode> SelectItems(HtmlNode cq, string css)
        {
            return cq.QuerySelectorAll(css);
        }

        protected HtmlNode SelectItem(HtmlNode cq, string css)
        {
            return cq.QuerySelector(css);
        }

        protected string ReadAttribute(HtmlNode element, string attribute)
        {
            return element.Attributes[attribute] != null ? element.Attributes[attribute].Value : String.Empty;
        }

        protected string ReadText(HtmlNode element)
        {
            return element.InnerText;// WebUtility.HtmlDecode(StepReadText.Process(element).ToString());
        }

        public abstract string RootUrl { get;}
        public abstract string ID { get; }
        public abstract string Title { get; }
        public virtual bool CanProcess(string url)
        {
            return url.ToLower().Contains(RootUrl.ToLower());
        }

        public virtual bool IsWebSupported
        {
            get
            {
                return true;
            }
        }

        public virtual bool IsDesktopSupported
        {
            get
            {
                return true;
            }
        }
    }

    public class WebClientEx : WebClient
    {
        

        private readonly CookieContainer container = new CookieContainer();

        protected override WebRequest GetWebRequest(Uri address)
        {
            WebRequest r = base.GetWebRequest(address);
            var request = r as HttpWebRequest;
            if (request != null)
            {
                request.CookieContainer = container;
            }
            return r;
        }

        protected override WebResponse GetWebResponse(WebRequest request, IAsyncResult result)
        {
            WebResponse response = base.GetWebResponse(request, result);
            ReadCookies(response);
            return response;
        }

        protected override WebResponse GetWebResponse(WebRequest request)
        {
            WebResponse response = base.GetWebResponse(request);
            ReadCookies(response);
            return response;
        }

        private void ReadCookies(WebResponse r)
        {
            var response = r as HttpWebResponse;
            if (response != null)
            {
                CookieCollection cookies = response.Cookies;
                container.Add(cookies);
            }
        }

        public void AddCookies(CookieCollection cookies)
        {
            foreach(Cookie c in cookies)
                container.Add(c);
        }

        public void AddHeaders(NameValueCollection headers)
        {
            foreach (string key in headers)
                this.Headers.Add(key,headers[key]);
        }
    }
  
}