File size: 1,963 Bytes
32e10fa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
from flask import Flask, render_template, request
import requests
from bs4 import BeautifulSoup

app = Flask(__name__)

# Home Route - Display the Form
@app.route("/")
def index():
    return render_template("index.html")

# Scraping Route - Process URL and Display Results Based on User Input
@app.route("/scrape", methods=["POST"])
def scrape():
    if request.method == "POST":
        # Safely get URL and tag from the Form using .get()
        url = request.form.get("urll")
        tag = request.form.get("tag")

        # Check if both URL and tag are provided
        if not url or not tag:
            error_message = "Both URL and Tag are required fields."
            return render_template("result.html", error=error_message)

        try:
            # Custom headers to mimic a real browser request to bypass 403 Client Error: Forbidden for url: such errors
            headers = {
                "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
            }

            # Fetch page content with custom headers
            response = requests.get(url, headers=headers)
            response.raise_for_status()  # Raise exception for invalid responses
            
            # Parse content with BeautifulSoup
            soup = BeautifulSoup(response.content, "html.parser")
            
            # Extract content based on user-defined tag
            elements = [element.get_text() for element in soup.find_all(tag)]
            title = soup.title.string if soup.title else "No Title Found"
            
            return render_template("result.html", title=title, elements=elements, tag=tag, url=url)
        
        except Exception as e:
            error_message = "An error occurred: {}".format(str(e))  # Using str.format()
            return render_template("result.html", error=error_message)


if __name__ == "__main__":
    app.run(debug=True)