Spaces:

Velpurisubbarao19
/

Algorithm_of_data_science_HF

Sleeping

App Files Files Community

Velpurisubbarao19 commited on Feb 25, 2025

Commit

fd2e077

verified ·

1 Parent(s): 5dc8fa2

Update app.py

Browse files

Files changed (1) hide show

app.py +51 -31

app.py CHANGED Viewed

@@ -12,7 +12,7 @@ except (ModuleNotFoundError, ImportError):
     MODULES_AVAILABLE = False
 class URLValidator:
-    def _init_(self):
         if MODULES_AVAILABLE:
             self.similarity_model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')
             self.sentiment_analyzer = pipeline("text-classification", model="cardiffnlp/twitter-roberta-base-sentiment")
@@ -23,59 +23,77 @@ class URLValidator:
     def fetch_page_content(self, url):
         """Fetches webpage text content."""
         headers = {
-            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
         }
         try:
             response = requests.get(url, headers=headers, timeout=10)
             response.raise_for_status()
             soup = BeautifulSoup(response.text, "html.parser")
-            return " ".join([p.text for p in soup.find_all("p")])
         except requests.RequestException:
             return "ERROR: Unable to fetch webpage content."
     def rate_url_validity(self, user_query, url):
         """Validates URL credibility."""
         content = self.fetch_page_content(url)
-        if not content:
             return {
-                "status": "error",
-                "message": "ERROR: Failed to retrieve webpage content.",
-                "suggestion": "Try another URL or check if the website blocks bots."
             }
         if not MODULES_AVAILABLE:
             return {
-                "status": "warning",
-                "message": "Machine learning models unavailable.",
-                "suggestion": "Install necessary ML modules."
             }
-        similarity_score = int(util.pytorch_cos_sim(
-            self.similarity_model.encode(user_query),
-            self.similarity_model.encode(content)
-        ).item() * 100)
-        sentiment_result = self.sentiment_analyzer(content[:512])[0]
-        bias_score = 100 if sentiment_result["label"].upper() == "POSITIVE" else 50 if sentiment_result["label"].upper() == "NEUTRAL" else 30
-        final_score = round((0.5 * similarity_score) + (0.5 * bias_score), 2)
         return {
             "Content Relevance Score": f"{similarity_score} / 100",
             "Bias Score": f"{bias_score} / 100",
             "Final Validity Score": f"{final_score} / 100"
         }
-# Sample queries and URLs
 sample_queries = [
-    "What are the benefits of a plant-based diet?"
-    "How does quantum computing work?"
-    "What are the causes of climate change?"
-    "Explain the basics of blockchain technology."
-    "How can I learn a new language quickly?"
-    "What are the symptoms of diabetes?"
-    "What are the best books for personal development?"
-    "How does 5G technology impact daily life?"
-    "What are the career opportunities in data science?"
     "What are the ethical concerns surrounding AI?"
 ]
@@ -102,14 +120,16 @@ def validate_url(user_query, url):
 with gr.Blocks() as demo:
     gr.Markdown("# URL Credibility Validator")
     gr.Markdown("### Validate the credibility of any webpage using AI")
-    user_query = gr.Dropdown(choices=sample_queries, label="Select a search query:")
-    url_input = gr.Dropdown(choices=sample_urls, label="Select a URL to validate:")
-    output = gr.Textbox(label="Validation Results")
     validate_button = gr.Button("Validate URL")
     validate_button.click(validate_url, inputs=[user_query, url_input], outputs=output)
 if __name__ == "__main__":
     demo.launch(server_name="0.0.0.0", server_port=7860)

     MODULES_AVAILABLE = False
 class URLValidator:
+    def __init__(self):
         if MODULES_AVAILABLE:
             self.similarity_model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')
             self.sentiment_analyzer = pipeline("text-classification", model="cardiffnlp/twitter-roberta-base-sentiment")
     def fetch_page_content(self, url):
         """Fetches webpage text content."""
         headers = {
+            "User-Agent": ("Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
+                           "AppleWebKit/537.36 (KHTML, like Gecko) "
+                           "Chrome/91.0.4472.124 Safari/537.36")
         }
         try:
             response = requests.get(url, headers=headers, timeout=10)
             response.raise_for_status()
             soup = BeautifulSoup(response.text, "html.parser")
+            paragraphs = [p.get_text(strip=True) for p in soup.find_all("p")]
+            return " ".join(paragraphs)
         except requests.RequestException:
             return "ERROR: Unable to fetch webpage content."
     def rate_url_validity(self, user_query, url):
         """Validates URL credibility."""
         content = self.fetch_page_content(url)
+        if content.startswith("ERROR"):
             return {
+                "Status": "Error",
+                "Message": content,
+                "Suggestion": "Try another URL or check if the website blocks bots."
             }
         if not MODULES_AVAILABLE:
             return {
+                "Status": "Warning",
+                "Message": "Machine learning modules are unavailable.",
+                "Suggestion": "Install the required ML modules to enable full functionality."
             }
+        # Compute similarity score
+        try:
+            user_embedding = self.similarity_model.encode(user_query)
+            content_embedding = self.similarity_model.encode(content)
+            similarity_score = int(util.pytorch_cos_sim(user_embedding, content_embedding).item() * 100)
+        except Exception as e:
+            similarity_score = 0
+            print(f"Error computing similarity: {e}")
+        # Analyze sentiment on first 512 characters
+        try:
+            sentiment_result = self.sentiment_analyzer(content[:512])[0]
+            label = sentiment_result.get("label", "").upper()
+            if label == "POSITIVE":
+                bias_score = 100
+            elif label == "NEUTRAL":
+                bias_score = 50
+            else:
+                bias_score = 30
+        except Exception as e:
+            bias_score = 50
+            print(f"Error analyzing sentiment: {e}")
+        final_score = round((0.5 * similarity_score) + (0.5 * bias_score), 2)
         return {
             "Content Relevance Score": f"{similarity_score} / 100",
             "Bias Score": f"{bias_score} / 100",
             "Final Validity Score": f"{final_score} / 100"
         }
+# New sample queries and URLs (with commas between items)
 sample_queries = [
+    "What are the benefits of a plant-based diet?",
+    "How does quantum computing work?",
+    "What are the causes of climate change?",
+    "Explain the basics of blockchain technology.",
+    "How can I learn a new language quickly?",
+    "What are the symptoms of diabetes?",
+    "What are the best books for personal development?",
+    "How does 5G technology impact daily life?",
+    "What are the career opportunities in data science?",
     "What are the ethical concerns surrounding AI?"
 ]
 with gr.Blocks() as demo:
     gr.Markdown("# URL Credibility Validator")
     gr.Markdown("### Validate the credibility of any webpage using AI")
+    with gr.Row():
+        user_query = gr.Dropdown(choices=sample_queries, label="Select a search query:")
+        url_input = gr.Dropdown(choices=sample_urls, label="Select a URL to validate:")
+    output = gr.Textbox(label="Validation Results", lines=10)
     validate_button = gr.Button("Validate URL")
     validate_button.click(validate_url, inputs=[user_query, url_input], outputs=output)
 if __name__ == "__main__":
     demo.launch(server_name="0.0.0.0", server_port=7860)