Spaces:
Sleeping
Sleeping
| from smolagents import ToolCallingAgent, DuckDuckGoSearchTool, HfApiModel, load_tool, tool | |
| import requests | |
| from bs4 import BeautifulSoup | |
| import re | |
| import pytz | |
| import yaml | |
| from tools.final_answer import FinalAnswerTool | |
| from Gradio_UI import GradioUI | |
| def reliable_url(url:str)-> bool: | |
| """A tool that decides if a given url is a reliable source | |
| Args: | |
| url: the address of the webpage we want to know whether it is realible or not | |
| """ | |
| known_fact_checkers = ["reuters.com/fact-check", | |
| "g1.globo.com/fato-ou-fake", | |
| "estadao.com.br/estadao-verifica", | |
| "snopes.com", | |
| "factcheck.org"] | |
| known_academic_sources = ['.edu', '.ac.'] | |
| known_news = ["globo.com", | |
| "bbc.com", | |
| "midianinja.org", | |
| "estadao.com", | |
| "brasil247.com", | |
| "espn.com.br" | |
| ] | |
| reliable = known_academic_sources + known_fact_checkers + known_news | |
| for pattern in reliable: | |
| if pattern in url: | |
| return True | |
| return False | |
| def url_to_news(url:str)-> dict: | |
| """A tool that receives a news url and returns the main claim of the webpage content and some aditional context | |
| Args: | |
| url: the address of the webpage we want to summarize into main claim and context | |
| """ | |
| try: | |
| # Add headers to avoid being blocked | |
| headers = { | |
| 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36' | |
| } | |
| response = requests.get(url, timeout=10, headers=headers) | |
| response.raise_for_status() | |
| content_type = response.headers.get('content-type', '') | |
| if 'text/html' not in content_type: | |
| return {"title": "Error", "context": "Non-HTML content"} | |
| soup = BeautifulSoup(response.content, "html.parser") | |
| if soup.title: | |
| title = soup.title.text | |
| else: | |
| title = "" | |
| context = soup.get_text()[:5000] | |
| return {"title": title, "context": context} | |
| except Exception as e: | |
| return {"title": "Error", "context": f"Failed to process URL: {str(e)}"} | |
| def support_decision(model, claim:str, news:dict=None)-> str: | |
| """A tool that decides if a given news article supports, contradicts or have an unclear opinion about a given claim | |
| Args: | |
| claim: the claim we want to decide whether it is supported or contradicted | |
| news: the news that may support or contradict the claim | |
| """ | |
| if news is None: | |
| news = {"title": "", "context": ""} | |
| if news["title"] == "Error": | |
| return "unclear" | |
| prompt = f""" | |
| Analyse the sentiment of the provided news article in contrast with the given claim and respond with only one of the following words | |
| 'supports', 'contradicts', or 'unclear'. | |
| Claim: "{claim}" | |
| News: "{news["title"]} | |
| {news["context"]}" | |
| Response: | |
| """ | |
| try: | |
| response = model.run(prompt) | |
| response = response.strip().lower() | |
| if response in ["supports", "contradicts", "unclear"]: | |
| return response | |
| else: | |
| return "unclear" | |
| except Exception: | |
| return "unclear" | |
| def search(claim:str)-> list: | |
| """A tool that receives a claim and gather news about it by making a web search | |
| Args: | |
| claim: the claim we are searching about | |
| """ | |
| search_tool = DuckDuckGoSearchTool() | |
| url_pattern = r'https?://[^\s\)\]\}]+' | |
| query1 = f"{claim}" | |
| results1 = search_tool(query1) | |
| urls = re.findall(url_pattern, results1) | |
| query2 = f"{claim} fact check" | |
| results2 = search_tool(query2) | |
| urls.extend(re.findall(url_pattern, results2)) | |
| return list(set(urls)) | |
| def check(claim:str)-> str: | |
| """A tool that receives a claim and answers if that information has support from reliable sources or not. This is the first tool you should use. | |
| Args: | |
| claim: the claim we want to check for support | |
| """ | |
| analyzer = HfApiModel() | |
| support_reliable = [] | |
| contradict_reliable = [] | |
| support_others = [] | |
| contradict_others = [] | |
| news_articles = search(claim) | |
| for article in news_articles: | |
| reliable_source = reliable_url(article) | |
| news = url_to_news(article) | |
| decision = support_decision(analyzer, claim, news) | |
| if reliable_source: | |
| if decision == "supports": | |
| support_reliable.append(article) | |
| elif decision == "contradicts": | |
| contradict_reliable.append(article) | |
| else: | |
| if decision == "supports": | |
| support_others.append(article) | |
| elif decision == "contradicts": | |
| contradict_others.append(article) | |
| if len(support_reliable) == 0: | |
| if len(contradict_reliable) > 0: | |
| return f"There are reliable sources that contradict this information: {contradict_reliable}" | |
| else: | |
| if len(support_others) == 0: | |
| if len(contradict_others) == 0: | |
| return "There are no sources available that talk about this topic" | |
| else: | |
| return f"There are unverified sources that contradict this claim: {contradict_others}" | |
| else: | |
| if len(contradict_others) == 0: | |
| return f"There are unverified sources that support this information: {support_others}" | |
| else: | |
| return f"There are unverified sources that contradict this claim and other unverified sources that support it: {contradict_others + support_others}" | |
| else: | |
| if len(contradict_reliable) == 0: | |
| return f"There are reliable sources that support this information: {support_reliable}" | |
| else: | |
| return f"The information about this claim is inconsistent. More search is advised: {support_reliable, contradict_reliable}" | |
| final_answer = FinalAnswerTool() | |
| # If the agent does not answer, the model is overloaded, please use another model or the following Hugging Face Endpoint that also contains qwen2.5 coder: | |
| # model_id='https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud' | |
| model = HfApiModel( | |
| max_tokens=2096, | |
| temperature=0.5, | |
| model_id='Qwen/Qwen2.5-Coder-32B-Instruct', | |
| custom_role_conversions=None, | |
| ) | |
| with open("prompts.yaml", 'r') as stream: | |
| prompt_templates = yaml.safe_load(stream) | |
| agent = ToolCallingAgent( | |
| model=model, | |
| tools=[final_answer, | |
| check], | |
| max_steps=6, | |
| verbosity_level=1, | |
| grammar=None, | |
| planning_interval=None, | |
| name=None, | |
| description=None, | |
| prompt_templates=prompt_templates | |
| ) | |
| GradioUI(agent).launch() |