loteriof commited on
Commit
bae7031
·
verified ·
1 Parent(s): 8c5c24b

implement functions to fact check

Browse files

Implement functions:
reliable_url: returns if an url is part of a selection of reliable sources
url_to_claim: gets the message of the news
support_decision: decides if a news is supporting a given claim
web_search: search for news related to the main claim
check: returns the final decision about some claim based on the web results

Files changed (2) hide show
  1. app.py +0 -69
  2. fact_checker.py +179 -0
app.py DELETED
@@ -1,69 +0,0 @@
1
- from smolagents import CodeAgent,DuckDuckGoSearchTool, HfApiModel,load_tool,tool
2
- import datetime
3
- import requests
4
- import pytz
5
- import yaml
6
- from tools.final_answer import FinalAnswerTool
7
-
8
- from Gradio_UI import GradioUI
9
-
10
- # Below is an example of a tool that does nothing. Amaze us with your creativity !
11
- @tool
12
- def my_custom_tool(arg1:str, arg2:int)-> str: #it's import to specify the return type
13
- #Keep this format for the description / args / args description but feel free to modify the tool
14
- """A tool that does nothing yet
15
- Args:
16
- arg1: the first argument
17
- arg2: the second argument
18
- """
19
- return "What magic will you build ?"
20
-
21
- @tool
22
- def get_current_time_in_timezone(timezone: str) -> str:
23
- """A tool that fetches the current local time in a specified timezone.
24
- Args:
25
- timezone: A string representing a valid timezone (e.g., 'America/New_York').
26
- """
27
- try:
28
- # Create timezone object
29
- tz = pytz.timezone(timezone)
30
- # Get current time in that timezone
31
- local_time = datetime.datetime.now(tz).strftime("%Y-%m-%d %H:%M:%S")
32
- return f"The current local time in {timezone} is: {local_time}"
33
- except Exception as e:
34
- return f"Error fetching time for timezone '{timezone}': {str(e)}"
35
-
36
-
37
- final_answer = FinalAnswerTool()
38
-
39
- # If the agent does not answer, the model is overloaded, please use another model or the following Hugging Face Endpoint that also contains qwen2.5 coder:
40
- # model_id='https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud'
41
-
42
- model = HfApiModel(
43
- max_tokens=2096,
44
- temperature=0.5,
45
- model_id='Qwen/Qwen2.5-Coder-32B-Instruct',# it is possible that this model may be overloaded
46
- custom_role_conversions=None,
47
- )
48
-
49
-
50
- # Import tool from Hub
51
- image_generation_tool = load_tool("agents-course/text-to-image", trust_remote_code=True)
52
-
53
- with open("prompts.yaml", 'r') as stream:
54
- prompt_templates = yaml.safe_load(stream)
55
-
56
- agent = CodeAgent(
57
- model=model,
58
- tools=[final_answer], ## add your tools here (don't remove final answer)
59
- max_steps=6,
60
- verbosity_level=1,
61
- grammar=None,
62
- planning_interval=None,
63
- name=None,
64
- description=None,
65
- prompt_templates=prompt_templates
66
- )
67
-
68
-
69
- GradioUI(agent).launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fact_checker.py ADDED
@@ -0,0 +1,179 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from smolagents import ToolCallingAgent, DuckDuckGoSearchTool, HfApiModel, load_tool, tool
2
+ import requests
3
+ from typing import Literal
4
+ from bs4 import BeautifulSoup
5
+ import pytz
6
+ import yaml
7
+ from tools.final_answer import FinalAnswerTool
8
+
9
+ from Gradio_UI import GradioUI
10
+
11
+ def reliable_url(url)-> bool:
12
+ """A tool that decides if a given url is a reliable source
13
+ Args:
14
+ url: the address of the webpage we want to know whether it is realible or not
15
+ """
16
+ known_fact_checkers = ["reuters.com/fact-check",
17
+ "g1.globo.com/fato-ou-fake",
18
+ "estadao.com.br/estadao-verifica",
19
+ "snopes.com",
20
+ "factcheck.org"]
21
+ known_academic_sources = ['.edu', '.ac.']
22
+ known_news = ["globo.com",
23
+ "bbc.com",
24
+ "midianinja.org",
25
+ "estadao.com",
26
+ "brasil247.com",
27
+ "espn.com.br"
28
+ ]
29
+
30
+ reliable = known_academic_sources + known_fact_checkers + known_news
31
+
32
+ for pattern in reliable:
33
+ if pattern in url:
34
+ return True
35
+ return False
36
+
37
+ def url_to_claim(url:str)-> dict:
38
+ """A tool that receives a news url and returns the main claim of the webpage content and some aditional context
39
+ Args:
40
+ url: the address of the webpage we want to summarize into main claim and context
41
+ """
42
+ try:
43
+ response = requests.get(url, timeout=10)
44
+ response.raise_for_status()
45
+ except:
46
+ requests.exceptions.RequestException
47
+ return f"An error occurred: {requests.exceptions.RequestExceptions}"
48
+
49
+ soup = BeautifulSoup(response.content, "html.parser")
50
+ title = soup.title.text
51
+ context = soup.get_text()[:2000]
52
+
53
+ return {"claim": title, "context": context}
54
+
55
+ def support_decision(claim:str, news:dict)-> Literal["supports", "contradicts", "unclear"]:
56
+ """A tool that decides if a given news article supports, contradicts or have an unclear opinion about a given claim
57
+ Args:
58
+ claim: the claim we want to decide whether it is supported or contradicted
59
+ news: the news that may be supporting or contradicting the claim
60
+ """
61
+ model = HfApiModel()
62
+
63
+ prompt = f"""
64
+ Analyze the sentiment of the provided news article in contrast with the given claim and respond with only one of the following words
65
+ 'supports', 'contradicts', or 'unclear'.
66
+
67
+ Claim: "{claim}"
68
+
69
+ News: "{news}"
70
+
71
+ Response:
72
+ """
73
+
74
+ response = model.run(prompt)
75
+ response = response.strip().lower()
76
+
77
+ if response in ["supports", "contradicts", "unclear"]:
78
+ return response
79
+ else:
80
+ return "An error ocurred: the model was not able to analyze the news article"
81
+
82
+ def web_search(claim:str)-> list:
83
+ """A tool that receives a claim and gather news about it by making a web search
84
+ Args:
85
+ claim: the claim we are searching about
86
+ """
87
+ search_tool = DuckDuckGoSearchTool()
88
+ url_pattern = r'https?://[^\s\)\]\}]+'
89
+
90
+ query1 = f"{claim}"
91
+ results1 = search_tool(query1)
92
+ urls = re.findall(url_pattern, results1)
93
+
94
+ query2 = f"{claim} fact check"
95
+ results2 = search_tool(query2)
96
+ urls.append(re.findall(url_pattern, results2))
97
+
98
+ return urls
99
+
100
+ @tool
101
+ def check(claim:str)-> str:
102
+ """A tool that receives a claim and answers if that information has support from reliable sources or not. This is the first tool you should use.
103
+ Args:
104
+ claim: the claim we want to check for support
105
+ """
106
+ support_reliable = []
107
+ contradict_reliable = []
108
+ support_others = []
109
+ contradict_others = []
110
+
111
+ news_articles = web_search(claim)
112
+ for article in news_articles:
113
+ reliable_source = reliable_url(article)
114
+ news = url_to_claim(article)
115
+ decision = support_decision(claim, news)
116
+ if reliable_source:
117
+ if decision == "support":
118
+ support_reliable.append(article)
119
+ elif decision == "contradict":
120
+ contradict_reliable.append(article)
121
+ else:
122
+ if decision == "support":
123
+ support_others.append(article)
124
+ elif decision == "contradict":
125
+ contradict_others.append(article)
126
+
127
+ if len(support_reliable) == 0:
128
+ if len(contradict_reliable) > 0:
129
+ return f"There are reliable sources that contradict this information: {contradict_reliable}"
130
+ else:
131
+ if len(support_others) == 0:
132
+ if len(contradict_others) == 0:
133
+ return "There are no sources available that talk about this topic"
134
+ else:
135
+ return f"There are unverified sources that contradict this claim: {contradict_others}"
136
+ else:
137
+ if len(contradict_others) == 0:
138
+ return f"There are unverified sources that support this information: {support_others}"
139
+ else:
140
+ return f"There are unverified sources that contradict this claim and other unverified sources that support it: {contradict_others + support_others}"
141
+ else:
142
+ if len(contradict_reliable) == 0:
143
+ return f"There are reliable sources that support this information: {support_reliable}"
144
+ else:
145
+ return f"The information about this claim is inconsistent. More search is advised: {support_reliable, contradict_reliable}"
146
+ final_answer = FinalAnswerTool()
147
+
148
+ # If the agent does not answer, the model is overloaded, please use another model or the following Hugging Face Endpoint that also contains qwen2.5 coder:
149
+ # model_id='https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud'
150
+
151
+ model = HfApiModel(
152
+ max_tokens=2096,
153
+ temperature=0.5,
154
+ model_id='Qwen/Qwen2.5-Coder-32B-Instruct',
155
+ custom_role_conversions=None,
156
+ )
157
+
158
+
159
+ with open("prompts.yaml", 'r') as stream:
160
+ prompt_templates = yaml.safe_load(stream)
161
+
162
+ agent = ToolCallingAgent(
163
+ model=model,
164
+ tools=[final_answer,
165
+ reliable_url,
166
+ url_to_claim,
167
+ claim_check,
168
+ DuckDuckGoSearchTool],
169
+ max_steps=6,
170
+ verbosity_level=1,
171
+ grammar=None,
172
+ planning_interval=None,
173
+ name=None,
174
+ description=None,
175
+ prompt_templates=prompt_templates
176
+ )
177
+
178
+
179
+ GradioUI(agent).launch()