nielitropar commited on
Commit
32e10fa
·
verified ·
1 Parent(s): 883ed3b

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -0
app.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, render_template, request
2
+ import requests
3
+ from bs4 import BeautifulSoup
4
+
5
+ app = Flask(__name__)
6
+
7
+ # Home Route - Display the Form
8
+ @app.route("/")
9
+ def index():
10
+ return render_template("index.html")
11
+
12
+ # Scraping Route - Process URL and Display Results Based on User Input
13
+ @app.route("/scrape", methods=["POST"])
14
+ def scrape():
15
+ if request.method == "POST":
16
+ # Safely get URL and tag from the Form using .get()
17
+ url = request.form.get("urll")
18
+ tag = request.form.get("tag")
19
+
20
+ # Check if both URL and tag are provided
21
+ if not url or not tag:
22
+ error_message = "Both URL and Tag are required fields."
23
+ return render_template("result.html", error=error_message)
24
+
25
+ try:
26
+ # Custom headers to mimic a real browser request to bypass 403 Client Error: Forbidden for url: such errors
27
+ headers = {
28
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
29
+ }
30
+
31
+ # Fetch page content with custom headers
32
+ response = requests.get(url, headers=headers)
33
+ response.raise_for_status() # Raise exception for invalid responses
34
+
35
+ # Parse content with BeautifulSoup
36
+ soup = BeautifulSoup(response.content, "html.parser")
37
+
38
+ # Extract content based on user-defined tag
39
+ elements = [element.get_text() for element in soup.find_all(tag)]
40
+ title = soup.title.string if soup.title else "No Title Found"
41
+
42
+ return render_template("result.html", title=title, elements=elements, tag=tag, url=url)
43
+
44
+ except Exception as e:
45
+ error_message = "An error occurred: {}".format(str(e)) # Using str.format()
46
+ return render_template("result.html", error=error_message)
47
+
48
+
49
+ if __name__ == "__main__":
50
+ app.run(debug=True)