Spaces:
Sleeping
Sleeping
File size: 6,792 Bytes
bae7031 b664ac4 bae7031 64535ee bae7031 7b86f60 bae7031 369aa4e bae7031 369aa4e 6bb3655 bae7031 6bb3655 bae7031 6bb3655 bae7031 7b86f60 bae7031 dbdc490 bae7031 7b86f60 6bb3655 bae7031 dbdc490 bae7031 7b86f60 bae7031 7b86f60 dbdc490 6bb3655 dbdc490 bae7031 89c17f2 3e87b44 bae7031 313addc bae7031 b664ac4 bae7031 7b86f60 bae7031 7b86f60 bae7031 7b86f60 bae7031 b664ac4 bae7031 b664ac4 bae7031 b664ac4 bae7031 b664ac4 bae7031 87f6a51 bae7031 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 |
from smolagents import ToolCallingAgent, DuckDuckGoSearchTool, HfApiModel, load_tool, tool
import requests
from bs4 import BeautifulSoup
import re
import pytz
import yaml
from tools.final_answer import FinalAnswerTool
from Gradio_UI import GradioUI
def reliable_url(url:str)-> bool:
"""A tool that decides if a given url is a reliable source
Args:
url: the address of the webpage we want to know whether it is realible or not
"""
known_fact_checkers = ["reuters.com/fact-check",
"g1.globo.com/fato-ou-fake",
"estadao.com.br/estadao-verifica",
"snopes.com",
"factcheck.org"]
known_academic_sources = ['.edu', '.ac.']
known_news = ["globo.com",
"bbc.com",
"midianinja.org",
"estadao.com",
"brasil247.com",
"espn.com.br"
]
reliable = known_academic_sources + known_fact_checkers + known_news
for pattern in reliable:
if pattern in url:
return True
return False
def url_to_news(url:str)-> dict:
"""A tool that receives a news url and returns the main claim of the webpage content and some aditional context
Args:
url: the address of the webpage we want to summarize into main claim and context
"""
try:
# Add headers to avoid being blocked
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
}
response = requests.get(url, timeout=10, headers=headers)
response.raise_for_status()
content_type = response.headers.get('content-type', '')
if 'text/html' not in content_type:
return {"title": "Error", "context": "Non-HTML content"}
soup = BeautifulSoup(response.content, "html.parser")
if soup.title:
title = soup.title.text
else:
title = ""
context = soup.get_text()[:5000]
return {"title": title, "context": context}
except Exception as e:
return {"title": "Error", "context": f"Failed to process URL: {str(e)}"}
def support_decision(model, claim:str, news:dict=None)-> str:
"""A tool that decides if a given news article supports, contradicts or have an unclear opinion about a given claim
Args:
claim: the claim we want to decide whether it is supported or contradicted
news: the news that may support or contradict the claim
"""
if news is None:
news = {"title": "", "context": ""}
if news["title"] == "Error":
return "unclear"
prompt = f"""
Analyse the sentiment of the provided news article in contrast with the given claim and respond with only one of the following words
'supports', 'contradicts', or 'unclear'.
Claim: "{claim}"
News: "{news["title"]}
{news["context"]}"
Response:
"""
try:
response = model.run(prompt)
response = response.strip().lower()
if response in ["supports", "contradicts", "unclear"]:
return response
else:
return "unclear"
except Exception:
return "unclear"
def search(claim:str)-> list:
"""A tool that receives a claim and gather news about it by making a web search
Args:
claim: the claim we are searching about
"""
search_tool = DuckDuckGoSearchTool()
url_pattern = r'https?://[^\s\)\]\}]+'
query1 = f"{claim}"
results1 = search_tool(query1)
urls = re.findall(url_pattern, results1)
query2 = f"{claim} fact check"
results2 = search_tool(query2)
urls.extend(re.findall(url_pattern, results2))
return list(set(urls))
@tool
def check(claim:str)-> str:
"""A tool that receives a claim and answers if that information has support from reliable sources or not. This is the first tool you should use.
Args:
claim: the claim we want to check for support
"""
analyzer = HfApiModel()
support_reliable = []
contradict_reliable = []
support_others = []
contradict_others = []
news_articles = search(claim)
for article in news_articles:
reliable_source = reliable_url(article)
news = url_to_news(article)
decision = support_decision(analyzer, claim, news)
if reliable_source:
if decision == "supports":
support_reliable.append(article)
elif decision == "contradicts":
contradict_reliable.append(article)
else:
if decision == "supports":
support_others.append(article)
elif decision == "contradicts":
contradict_others.append(article)
if len(support_reliable) == 0:
if len(contradict_reliable) > 0:
return f"There are reliable sources that contradict this information: {contradict_reliable}"
else:
if len(support_others) == 0:
if len(contradict_others) == 0:
return "There are no sources available that talk about this topic"
else:
return f"There are unverified sources that contradict this claim: {contradict_others}"
else:
if len(contradict_others) == 0:
return f"There are unverified sources that support this information: {support_others}"
else:
return f"There are unverified sources that contradict this claim and other unverified sources that support it: {contradict_others + support_others}"
else:
if len(contradict_reliable) == 0:
return f"There are reliable sources that support this information: {support_reliable}"
else:
return f"The information about this claim is inconsistent. More search is advised: {support_reliable, contradict_reliable}"
final_answer = FinalAnswerTool()
# If the agent does not answer, the model is overloaded, please use another model or the following Hugging Face Endpoint that also contains qwen2.5 coder:
# model_id='https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud'
model = HfApiModel(
max_tokens=2096,
temperature=0.5,
model_id='Qwen/Qwen2.5-Coder-32B-Instruct',
custom_role_conversions=None,
)
with open("prompts.yaml", 'r') as stream:
prompt_templates = yaml.safe_load(stream)
agent = ToolCallingAgent(
model=model,
tools=[final_answer,
check],
max_steps=6,
verbosity_level=1,
grammar=None,
planning_interval=None,
name=None,
description=None,
prompt_templates=prompt_templates
)
GradioUI(agent).launch() |