File size: 5,334 Bytes
4e78973
 
 
 
 
 
 
 
 
 
 
 
 
 
fd2e077
4e78973
 
 
 
 
 
 
 
 
 
fd2e077
 
 
4e78973
 
 
 
 
fd2e077
 
4e78973
 
 
 
 
 
fd2e077
4e78973
fd2e077
 
 
4e78973
 
 
 
fd2e077
 
 
4e78973
 
fd2e077
 
 
 
 
 
 
 
4e78973
fd2e077
 
 
 
 
 
 
 
 
 
 
 
 
4e78973
fd2e077
4e78973
 
 
 
 
 
fd2e077
4e78973
fd2e077
 
 
 
 
 
 
 
 
4e78973
 
 
 
6db4a08
 
 
 
 
 
 
 
 
 
4e78973
 
 
 
 
 
 
 
 
 
 
 
 
fd2e077
 
 
 
 
4e78973
 
 
 
5dc8fa2
 
fd2e077
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
import gradio as gr
import json
import requests
from bs4 import BeautifulSoup

try:
    from sentence_transformers import SentenceTransformer, util
    from transformers import pipeline
    MODULES_AVAILABLE = True
except (ModuleNotFoundError, ImportError):
    print("Warning: Required ML modules are missing. Running in fallback mode.")
    MODULES_AVAILABLE = False

class URLValidator:
    def __init__(self):
        if MODULES_AVAILABLE:
            self.similarity_model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')
            self.sentiment_analyzer = pipeline("text-classification", model="cardiffnlp/twitter-roberta-base-sentiment")
        else:
            self.similarity_model = None
            self.sentiment_analyzer = None

    def fetch_page_content(self, url):
        """Fetches webpage text content."""
        headers = {
            "User-Agent": ("Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
                           "AppleWebKit/537.36 (KHTML, like Gecko) "
                           "Chrome/91.0.4472.124 Safari/537.36")
        }
        try:
            response = requests.get(url, headers=headers, timeout=10)
            response.raise_for_status()
            soup = BeautifulSoup(response.text, "html.parser")
            paragraphs = [p.get_text(strip=True) for p in soup.find_all("p")]
            return " ".join(paragraphs)
        except requests.RequestException:
            return "ERROR: Unable to fetch webpage content."

    def rate_url_validity(self, user_query, url):
        """Validates URL credibility."""
        content = self.fetch_page_content(url)
        if content.startswith("ERROR"):
            return {
                "Status": "Error",
                "Message": content,
                "Suggestion": "Try another URL or check if the website blocks bots."
            }

        if not MODULES_AVAILABLE:
            return {
                "Status": "Warning",
                "Message": "Machine learning modules are unavailable.",
                "Suggestion": "Install the required ML modules to enable full functionality."
            }

        # Compute similarity score
        try:
            user_embedding = self.similarity_model.encode(user_query)
            content_embedding = self.similarity_model.encode(content)
            similarity_score = int(util.pytorch_cos_sim(user_embedding, content_embedding).item() * 100)
        except Exception as e:
            similarity_score = 0
            print(f"Error computing similarity: {e}")

        # Analyze sentiment on first 512 characters
        try:
            sentiment_result = self.sentiment_analyzer(content[:512])[0]
            label = sentiment_result.get("label", "").upper()
            if label == "POSITIVE":
                bias_score = 100
            elif label == "NEUTRAL":
                bias_score = 50
            else:
                bias_score = 30
        except Exception as e:
            bias_score = 50
            print(f"Error analyzing sentiment: {e}")

        final_score = round((0.5 * similarity_score) + (0.5 * bias_score), 2)
        return {
            "Content Relevance Score": f"{similarity_score} / 100",
            "Bias Score": f"{bias_score} / 100",
            "Final Validity Score": f"{final_score} / 100"
        }

# New sample queries and URLs (with commas between items)
sample_queries = [
    "What are the benefits of a plant-based diet?",
    "How does quantum computing work?",
    "What are the causes of climate change?",
    "Explain the basics of blockchain technology.",
    "How can I learn a new language quickly?",
    "What are the symptoms of diabetes?",
    "What are the best books for personal development?",
    "How does 5G technology impact daily life?",
    "What are the career opportunities in data science?",
    "What are the ethical concerns surrounding AI?"
]

sample_urls = [
    "https://www.healthline.com/nutrition/plant-based-diet-guide",
    "https://www.ibm.com/quantum-computing/what-is-quantum-computing",
    "https://climate.nasa.gov/evidence/",
    "https://www.investopedia.com/terms/b/blockchain.asp",
    "https://www.duolingo.com/",
    "https://www.diabetes.org/diabetes",
    "https://jamesclear.com/book-summaries",
    "https://www.qualcomm.com/news/onq/2020/01/10/what-5g-and-how-it-changing-everything",
    "https://datasciencedegree.wisconsin.edu/data-science/what-do-data-scientists-do/",
    "https://aiethicslab.com/"
]

validator = URLValidator()

def validate_url(user_query, url):
    """Gradio function to validate URLs."""
    result = validator.rate_url_validity(user_query, url)
    return json.dumps(result, indent=2)

with gr.Blocks() as demo:
    gr.Markdown("# URL Credibility Validator")
    gr.Markdown("### Validate the credibility of any webpage using AI")
    
    with gr.Row():
        user_query = gr.Dropdown(choices=sample_queries, label="Select a search query:")
        url_input = gr.Dropdown(choices=sample_urls, label="Select a URL to validate:")

    output = gr.Textbox(label="Validation Results", lines=10)  
    
    validate_button = gr.Button("Validate URL")
    validate_button.click(validate_url, inputs=[user_query, url_input], outputs=output)

if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0", server_port=7860)