loteriof's picture
fix bugs
6bb3655 verified
from smolagents import ToolCallingAgent, DuckDuckGoSearchTool, HfApiModel, load_tool, tool
import requests
from bs4 import BeautifulSoup
import re
import pytz
import yaml
from tools.final_answer import FinalAnswerTool
from Gradio_UI import GradioUI
def reliable_url(url:str)-> bool:
"""A tool that decides if a given url is a reliable source
Args:
url: the address of the webpage we want to know whether it is realible or not
"""
known_fact_checkers = ["reuters.com/fact-check",
"g1.globo.com/fato-ou-fake",
"estadao.com.br/estadao-verifica",
"snopes.com",
"factcheck.org"]
known_academic_sources = ['.edu', '.ac.']
known_news = ["globo.com",
"bbc.com",
"midianinja.org",
"estadao.com",
"brasil247.com",
"espn.com.br"
]
reliable = known_academic_sources + known_fact_checkers + known_news
for pattern in reliable:
if pattern in url:
return True
return False
def url_to_news(url:str)-> dict:
"""A tool that receives a news url and returns the main claim of the webpage content and some aditional context
Args:
url: the address of the webpage we want to summarize into main claim and context
"""
try:
# Add headers to avoid being blocked
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
}
response = requests.get(url, timeout=10, headers=headers)
response.raise_for_status()
content_type = response.headers.get('content-type', '')
if 'text/html' not in content_type:
return {"title": "Error", "context": "Non-HTML content"}
soup = BeautifulSoup(response.content, "html.parser")
if soup.title:
title = soup.title.text
else:
title = ""
context = soup.get_text()[:5000]
return {"title": title, "context": context}
except Exception as e:
return {"title": "Error", "context": f"Failed to process URL: {str(e)}"}
def support_decision(model, claim:str, news:dict=None)-> str:
"""A tool that decides if a given news article supports, contradicts or have an unclear opinion about a given claim
Args:
claim: the claim we want to decide whether it is supported or contradicted
news: the news that may support or contradict the claim
"""
if news is None:
news = {"title": "", "context": ""}
if news["title"] == "Error":
return "unclear"
prompt = f"""
Analyse the sentiment of the provided news article in contrast with the given claim and respond with only one of the following words
'supports', 'contradicts', or 'unclear'.
Claim: "{claim}"
News: "{news["title"]}
{news["context"]}"
Response:
"""
try:
response = model.run(prompt)
response = response.strip().lower()
if response in ["supports", "contradicts", "unclear"]:
return response
else:
return "unclear"
except Exception:
return "unclear"
def search(claim:str)-> list:
"""A tool that receives a claim and gather news about it by making a web search
Args:
claim: the claim we are searching about
"""
search_tool = DuckDuckGoSearchTool()
url_pattern = r'https?://[^\s\)\]\}]+'
query1 = f"{claim}"
results1 = search_tool(query1)
urls = re.findall(url_pattern, results1)
query2 = f"{claim} fact check"
results2 = search_tool(query2)
urls.extend(re.findall(url_pattern, results2))
return list(set(urls))
@tool
def check(claim:str)-> str:
"""A tool that receives a claim and answers if that information has support from reliable sources or not. This is the first tool you should use.
Args:
claim: the claim we want to check for support
"""
analyzer = HfApiModel()
support_reliable = []
contradict_reliable = []
support_others = []
contradict_others = []
news_articles = search(claim)
for article in news_articles:
reliable_source = reliable_url(article)
news = url_to_news(article)
decision = support_decision(analyzer, claim, news)
if reliable_source:
if decision == "supports":
support_reliable.append(article)
elif decision == "contradicts":
contradict_reliable.append(article)
else:
if decision == "supports":
support_others.append(article)
elif decision == "contradicts":
contradict_others.append(article)
if len(support_reliable) == 0:
if len(contradict_reliable) > 0:
return f"There are reliable sources that contradict this information: {contradict_reliable}"
else:
if len(support_others) == 0:
if len(contradict_others) == 0:
return "There are no sources available that talk about this topic"
else:
return f"There are unverified sources that contradict this claim: {contradict_others}"
else:
if len(contradict_others) == 0:
return f"There are unverified sources that support this information: {support_others}"
else:
return f"There are unverified sources that contradict this claim and other unverified sources that support it: {contradict_others + support_others}"
else:
if len(contradict_reliable) == 0:
return f"There are reliable sources that support this information: {support_reliable}"
else:
return f"The information about this claim is inconsistent. More search is advised: {support_reliable, contradict_reliable}"
final_answer = FinalAnswerTool()
# If the agent does not answer, the model is overloaded, please use another model or the following Hugging Face Endpoint that also contains qwen2.5 coder:
# model_id='https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud'
model = HfApiModel(
max_tokens=2096,
temperature=0.5,
model_id='Qwen/Qwen2.5-Coder-32B-Instruct',
custom_role_conversions=None,
)
with open("prompts.yaml", 'r') as stream:
prompt_templates = yaml.safe_load(stream)
agent = ToolCallingAgent(
model=model,
tools=[final_answer,
check],
max_steps=6,
verbosity_level=1,
grammar=None,
planning_interval=None,
name=None,
description=None,
prompt_templates=prompt_templates
)
GradioUI(agent).launch()