| | import gradio as gr |
| | import json |
| | import requests |
| | from bs4 import BeautifulSoup |
| |
|
| | try: |
| | from sentence_transformers import SentenceTransformer, util |
| | from transformers import pipeline |
| | MODULES_AVAILABLE = True |
| | except (ModuleNotFoundError, ImportError): |
| | print("Warning: Required ML modules are missing. Running in fallback mode.") |
| | MODULES_AVAILABLE = False |
| |
|
| | class URLValidator: |
| | def __init__(self): |
| | if MODULES_AVAILABLE: |
| | self.similarity_model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2') |
| | self.sentiment_analyzer = pipeline("text-classification", model="cardiffnlp/twitter-roberta-base-sentiment") |
| | else: |
| | self.similarity_model = None |
| | self.sentiment_analyzer = None |
| |
|
| | def fetch_page_content(self, url): |
| | """Fetches webpage text content.""" |
| | headers = { |
| | "User-Agent": ("Mozilla/5.0 (Windows NT 10.0; Win64; x64) " |
| | "AppleWebKit/537.36 (KHTML, like Gecko) " |
| | "Chrome/91.0.4472.124 Safari/537.36") |
| | } |
| | try: |
| | response = requests.get(url, headers=headers, timeout=10) |
| | response.raise_for_status() |
| | soup = BeautifulSoup(response.text, "html.parser") |
| | paragraphs = [p.get_text(strip=True) for p in soup.find_all("p")] |
| | return " ".join(paragraphs) |
| | except requests.RequestException: |
| | return "ERROR: Unable to fetch webpage content." |
| |
|
| | def rate_url_validity(self, user_query, url): |
| | """Validates URL credibility.""" |
| | content = self.fetch_page_content(url) |
| | if content.startswith("ERROR"): |
| | return { |
| | "Status": "Error", |
| | "Message": content, |
| | "Suggestion": "Try another URL or check if the website blocks bots." |
| | } |
| |
|
| | if not MODULES_AVAILABLE: |
| | return { |
| | "Status": "Warning", |
| | "Message": "Machine learning modules are unavailable.", |
| | "Suggestion": "Install the required ML modules to enable full functionality." |
| | } |
| |
|
| | |
| | try: |
| | user_embedding = self.similarity_model.encode(user_query) |
| | content_embedding = self.similarity_model.encode(content) |
| | similarity_score = int(util.pytorch_cos_sim(user_embedding, content_embedding).item() * 100) |
| | except Exception as e: |
| | similarity_score = 0 |
| | print(f"Error computing similarity: {e}") |
| |
|
| | |
| | try: |
| | sentiment_result = self.sentiment_analyzer(content[:512])[0] |
| | label = sentiment_result.get("label", "").upper() |
| | if label == "POSITIVE": |
| | bias_score = 100 |
| | elif label == "NEUTRAL": |
| | bias_score = 50 |
| | else: |
| | bias_score = 30 |
| | except Exception as e: |
| | bias_score = 50 |
| | print(f"Error analyzing sentiment: {e}") |
| |
|
| | final_score = round((0.5 * similarity_score) + (0.5 * bias_score), 2) |
| | return { |
| | "Content Relevance Score": f"{similarity_score} / 100", |
| | "Bias Score": f"{bias_score} / 100", |
| | "Final Validity Score": f"{final_score} / 100" |
| | } |
| |
|
| | |
| | sample_queries = [ |
| | "What are the benefits of a plant-based diet?", |
| | "How does quantum computing work?", |
| | "What are the causes of climate change?", |
| | "Explain the basics of blockchain technology.", |
| | "How can I learn a new language quickly?", |
| | "What are the symptoms of diabetes?", |
| | "What are the best books for personal development?", |
| | "How does 5G technology impact daily life?", |
| | "What are the career opportunities in data science?", |
| | "What are the ethical concerns surrounding AI?" |
| | ] |
| |
|
| | sample_urls = [ |
| | "https://www.healthline.com/nutrition/plant-based-diet-guide", |
| | "https://www.ibm.com/quantum-computing/what-is-quantum-computing", |
| | "https://climate.nasa.gov/evidence/", |
| | "https://www.investopedia.com/terms/b/blockchain.asp", |
| | "https://www.duolingo.com/", |
| | "https://www.diabetes.org/diabetes", |
| | "https://jamesclear.com/book-summaries", |
| | "https://www.qualcomm.com/news/onq/2020/01/10/what-5g-and-how-it-changing-everything", |
| | "https://datasciencedegree.wisconsin.edu/data-science/what-do-data-scientists-do/", |
| | "https://aiethicslab.com/" |
| | ] |
| |
|
| | validator = URLValidator() |
| |
|
| | def validate_url(user_query, url): |
| | """Gradio function to validate URLs.""" |
| | result = validator.rate_url_validity(user_query, url) |
| | return json.dumps(result, indent=2) |
| |
|
| | with gr.Blocks() as demo: |
| | gr.Markdown("# URL Credibility Validator") |
| | gr.Markdown("### Validate the credibility of any webpage using AI") |
| | |
| | with gr.Row(): |
| | user_query = gr.Dropdown(choices=sample_queries, label="Select a search query:") |
| | url_input = gr.Dropdown(choices=sample_urls, label="Select a URL to validate:") |
| |
|
| | output = gr.Textbox(label="Validation Results", lines=10) |
| | |
| | validate_button = gr.Button("Validate URL") |
| | validate_button.click(validate_url, inputs=[user_query, url_input], outputs=output) |
| |
|
| | if __name__ == "__main__": |
| | demo.launch(server_name="0.0.0.0", server_port=7860) |
| |
|
| |
|