Velpurisubbarao19 commited on
Commit
fd2e077
·
verified ·
1 Parent(s): 5dc8fa2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -31
app.py CHANGED
@@ -12,7 +12,7 @@ except (ModuleNotFoundError, ImportError):
12
  MODULES_AVAILABLE = False
13
 
14
  class URLValidator:
15
- def _init_(self):
16
  if MODULES_AVAILABLE:
17
  self.similarity_model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')
18
  self.sentiment_analyzer = pipeline("text-classification", model="cardiffnlp/twitter-roberta-base-sentiment")
@@ -23,59 +23,77 @@ class URLValidator:
23
  def fetch_page_content(self, url):
24
  """Fetches webpage text content."""
25
  headers = {
26
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
 
 
27
  }
28
  try:
29
  response = requests.get(url, headers=headers, timeout=10)
30
  response.raise_for_status()
31
  soup = BeautifulSoup(response.text, "html.parser")
32
- return " ".join([p.text for p in soup.find_all("p")])
 
33
  except requests.RequestException:
34
  return "ERROR: Unable to fetch webpage content."
35
 
36
  def rate_url_validity(self, user_query, url):
37
  """Validates URL credibility."""
38
  content = self.fetch_page_content(url)
39
- if not content:
40
  return {
41
- "status": "error",
42
- "message": "ERROR: Failed to retrieve webpage content.",
43
- "suggestion": "Try another URL or check if the website blocks bots."
44
  }
45
 
46
  if not MODULES_AVAILABLE:
47
  return {
48
- "status": "warning",
49
- "message": "Machine learning models unavailable.",
50
- "suggestion": "Install necessary ML modules."
51
  }
52
 
53
- similarity_score = int(util.pytorch_cos_sim(
54
- self.similarity_model.encode(user_query),
55
- self.similarity_model.encode(content)
56
- ).item() * 100)
 
 
 
 
57
 
58
- sentiment_result = self.sentiment_analyzer(content[:512])[0]
59
- bias_score = 100 if sentiment_result["label"].upper() == "POSITIVE" else 50 if sentiment_result["label"].upper() == "NEUTRAL" else 30
60
- final_score = round((0.5 * similarity_score) + (0.5 * bias_score), 2)
 
 
 
 
 
 
 
 
 
 
61
 
 
62
  return {
63
  "Content Relevance Score": f"{similarity_score} / 100",
64
  "Bias Score": f"{bias_score} / 100",
65
  "Final Validity Score": f"{final_score} / 100"
66
  }
67
 
68
- # Sample queries and URLs
69
  sample_queries = [
70
- "What are the benefits of a plant-based diet?"
71
- "How does quantum computing work?"
72
- "What are the causes of climate change?"
73
- "Explain the basics of blockchain technology."
74
- "How can I learn a new language quickly?"
75
- "What are the symptoms of diabetes?"
76
- "What are the best books for personal development?"
77
- "How does 5G technology impact daily life?"
78
- "What are the career opportunities in data science?"
79
  "What are the ethical concerns surrounding AI?"
80
  ]
81
 
@@ -102,14 +120,16 @@ def validate_url(user_query, url):
102
  with gr.Blocks() as demo:
103
  gr.Markdown("# URL Credibility Validator")
104
  gr.Markdown("### Validate the credibility of any webpage using AI")
105
-
106
- user_query = gr.Dropdown(choices=sample_queries, label="Select a search query:")
107
- url_input = gr.Dropdown(choices=sample_urls, label="Select a URL to validate:")
108
 
109
- output = gr.Textbox(label="Validation Results")
 
 
 
 
110
 
111
  validate_button = gr.Button("Validate URL")
112
  validate_button.click(validate_url, inputs=[user_query, url_input], outputs=output)
113
 
114
  if __name__ == "__main__":
115
  demo.launch(server_name="0.0.0.0", server_port=7860)
 
 
12
  MODULES_AVAILABLE = False
13
 
14
  class URLValidator:
15
+ def __init__(self):
16
  if MODULES_AVAILABLE:
17
  self.similarity_model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')
18
  self.sentiment_analyzer = pipeline("text-classification", model="cardiffnlp/twitter-roberta-base-sentiment")
 
23
  def fetch_page_content(self, url):
24
  """Fetches webpage text content."""
25
  headers = {
26
+ "User-Agent": ("Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
27
+ "AppleWebKit/537.36 (KHTML, like Gecko) "
28
+ "Chrome/91.0.4472.124 Safari/537.36")
29
  }
30
  try:
31
  response = requests.get(url, headers=headers, timeout=10)
32
  response.raise_for_status()
33
  soup = BeautifulSoup(response.text, "html.parser")
34
+ paragraphs = [p.get_text(strip=True) for p in soup.find_all("p")]
35
+ return " ".join(paragraphs)
36
  except requests.RequestException:
37
  return "ERROR: Unable to fetch webpage content."
38
 
39
  def rate_url_validity(self, user_query, url):
40
  """Validates URL credibility."""
41
  content = self.fetch_page_content(url)
42
+ if content.startswith("ERROR"):
43
  return {
44
+ "Status": "Error",
45
+ "Message": content,
46
+ "Suggestion": "Try another URL or check if the website blocks bots."
47
  }
48
 
49
  if not MODULES_AVAILABLE:
50
  return {
51
+ "Status": "Warning",
52
+ "Message": "Machine learning modules are unavailable.",
53
+ "Suggestion": "Install the required ML modules to enable full functionality."
54
  }
55
 
56
+ # Compute similarity score
57
+ try:
58
+ user_embedding = self.similarity_model.encode(user_query)
59
+ content_embedding = self.similarity_model.encode(content)
60
+ similarity_score = int(util.pytorch_cos_sim(user_embedding, content_embedding).item() * 100)
61
+ except Exception as e:
62
+ similarity_score = 0
63
+ print(f"Error computing similarity: {e}")
64
 
65
+ # Analyze sentiment on first 512 characters
66
+ try:
67
+ sentiment_result = self.sentiment_analyzer(content[:512])[0]
68
+ label = sentiment_result.get("label", "").upper()
69
+ if label == "POSITIVE":
70
+ bias_score = 100
71
+ elif label == "NEUTRAL":
72
+ bias_score = 50
73
+ else:
74
+ bias_score = 30
75
+ except Exception as e:
76
+ bias_score = 50
77
+ print(f"Error analyzing sentiment: {e}")
78
 
79
+ final_score = round((0.5 * similarity_score) + (0.5 * bias_score), 2)
80
  return {
81
  "Content Relevance Score": f"{similarity_score} / 100",
82
  "Bias Score": f"{bias_score} / 100",
83
  "Final Validity Score": f"{final_score} / 100"
84
  }
85
 
86
+ # New sample queries and URLs (with commas between items)
87
  sample_queries = [
88
+ "What are the benefits of a plant-based diet?",
89
+ "How does quantum computing work?",
90
+ "What are the causes of climate change?",
91
+ "Explain the basics of blockchain technology.",
92
+ "How can I learn a new language quickly?",
93
+ "What are the symptoms of diabetes?",
94
+ "What are the best books for personal development?",
95
+ "How does 5G technology impact daily life?",
96
+ "What are the career opportunities in data science?",
97
  "What are the ethical concerns surrounding AI?"
98
  ]
99
 
 
120
  with gr.Blocks() as demo:
121
  gr.Markdown("# URL Credibility Validator")
122
  gr.Markdown("### Validate the credibility of any webpage using AI")
 
 
 
123
 
124
+ with gr.Row():
125
+ user_query = gr.Dropdown(choices=sample_queries, label="Select a search query:")
126
+ url_input = gr.Dropdown(choices=sample_urls, label="Select a URL to validate:")
127
+
128
+ output = gr.Textbox(label="Validation Results", lines=10)
129
 
130
  validate_button = gr.Button("Validate URL")
131
  validate_button.click(validate_url, inputs=[user_query, url_input], outputs=output)
132
 
133
  if __name__ == "__main__":
134
  demo.launch(server_name="0.0.0.0", server_port=7860)
135
+