LovnishVerma commited on
Commit
65e962b
·
verified ·
1 Parent(s): 2b15398

Upload 4 files

Browse files
Files changed (4) hide show
  1. app.py +26 -0
  2. requirements.txt +12 -0
  3. templates/index.html +36 -0
  4. templates/result.html +26 -0
app.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, render_template, request
2
+ import requests
3
+ from bs4 import BeautifulSoup
4
+
5
+ app = Flask(__name__)
6
+
7
+ @app.route("/")
8
+ def index():
9
+ return render_template("index.html")
10
+
11
+ @app.route("/scrape", methods=["POST"])
12
+ def scrape():
13
+ url, tag = request.form.get("url"), request.form.get("tag")
14
+ if not url or not tag:
15
+ return render_template("result.html", error="Both URL and Tag are required.")
16
+
17
+ response = requests.get(url, headers={"User-Agent": "Mozilla/5.0"})
18
+ response.raise_for_status() # This will automatically raise an error if the request fails
19
+
20
+ soup = BeautifulSoup(response.text, "html.parser")
21
+ elements = [e.get_text() for e in soup.find_all(tag)]
22
+
23
+ return render_template("result.html", tag=tag, url=url, title=soup.title.string or "No Title", elements=elements)
24
+
25
+ if __name__ == "__main__":
26
+ app.run(debug=True)
requirements.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ flask
2
+ requests
3
+ bs4
4
+ gunicorn
5
+
6
+
7
+
8
+
9
+
10
+
11
+
12
+
templates/index.html ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <html>
2
+ <head>
3
+ <link rel="icon" type="image/png"
4
+ href="https://cdn.glitch.global/011875c1-2e8a-4ff4-806a-793934a0acda/android-chrome-512x512.png?v=1734461641548" />
5
+ <title>Web Scraper</title>
6
+ <style>
7
+ body {
8
+ font-family: Arial, sans-serif;
9
+ margin: 0;
10
+ padding: 0;
11
+ background-color: #f9f9f9;
12
+ text-align: center;
13
+ }
14
+ .logo {
15
+ margin-top: 30px;
16
+ width: 128px;
17
+ height: 128px;
18
+ }
19
+ </style>
20
+ </head>
21
+ <body>
22
+ <img src="https://cdn.glitch.global/011875c1-2e8a-4ff4-806a-793934a0acda/android-chrome-512x512.png?v=1734461641548"
23
+ alt="Logo" class="logo">
24
+ <h1>Customizable Web Scraper</h1>
25
+ <form action="/scrape" method="POST">
26
+ <label for="url">Enter URL:</label><br>
27
+ <input type="text" id="url" name="url" placeholder="https://example.com" required><br><br>
28
+
29
+ <label for="tag">Enter Tag to Scrape (e.g., p, h1, img):</label><br>
30
+ <input type="text" id="tag" name="tag" placeholder="p" required><br><br>
31
+
32
+ <button type="submit">Scrape</button>
33
+ </form>
34
+
35
+ </body>
36
+ </html>
templates/result.html ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <html>
2
+ <head>
3
+ <link rel="icon" type="image/png"
4
+ href="https://cdn.glitch.global/011875c1-2e8a-4ff4-806a-793934a0acda/android-chrome-512x512.png?v=1734461641548" />
5
+ <title>Scraped Results</title>
6
+ </head>
7
+ <body>
8
+ <h1>Scraped Results</h1>
9
+ {% if error %}
10
+ <p style="color: red;">{{ error }}</p>
11
+ {% else %}
12
+ <h2>Title: {{ title }}</h2>
13
+ <h3>Content from: <a href="{{ url }}" target="_blank">{{ url }}</a></h3>
14
+ <h3>Scraped Elements for Tag: &lt;{{ tag }}&gt;</h3>
15
+ <ul>
16
+ {% for element in elements %}
17
+ <li>{{ element }}</li>
18
+ {% else %}
19
+ <li>No content found for tag &lt;{{ tag }}&gt;.</li>
20
+ {% endfor %}
21
+ </ul>
22
+ {% endif %}
23
+ <br>
24
+ <a href="/">Go Back</a>
25
+ </body>
26
+ </html>