File size: 6,792 Bytes
bae7031
 
 
b664ac4
bae7031
 
 
 
 
 
64535ee
bae7031
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7b86f60
bae7031
 
 
 
 
369aa4e
 
 
 
 
bae7031
369aa4e
 
 
6bb3655
bae7031
6bb3655
 
 
 
 
 
 
bae7031
6bb3655
 
 
 
bae7031
7b86f60
bae7031
 
 
dbdc490
bae7031
7b86f60
 
6bb3655
 
bae7031
 
dbdc490
bae7031
 
 
 
7b86f60
 
bae7031
 
 
 
7b86f60
dbdc490
 
 
 
 
 
6bb3655
dbdc490
 
bae7031
89c17f2
3e87b44
bae7031
 
 
 
 
 
 
 
 
 
 
 
 
313addc
bae7031
b664ac4
bae7031
 
 
 
 
 
 
7b86f60
 
bae7031
 
 
 
 
7b86f60
bae7031
 
7b86f60
 
bae7031
b664ac4
bae7031
b664ac4
bae7031
 
b664ac4
bae7031
b664ac4
bae7031
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87f6a51
bae7031
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
from smolagents import ToolCallingAgent, DuckDuckGoSearchTool, HfApiModel, load_tool, tool
import requests
from bs4 import BeautifulSoup
import re
import pytz
import yaml
from tools.final_answer import FinalAnswerTool

from Gradio_UI import GradioUI

def reliable_url(url:str)-> bool:
    """A tool that decides if a given url is a reliable source
    Args:
        url: the address of the webpage we want to know whether it is realible or not
    """
    known_fact_checkers = ["reuters.com/fact-check",
                              "g1.globo.com/fato-ou-fake",
                              "estadao.com.br/estadao-verifica",
                              "snopes.com",
                              "factcheck.org"]
    known_academic_sources = ['.edu', '.ac.']
    known_news = ["globo.com",
                  "bbc.com",
                  "midianinja.org",
                  "estadao.com",
                  "brasil247.com",
                  "espn.com.br"
                 ]

    reliable = known_academic_sources + known_fact_checkers + known_news
    
    for pattern in reliable:
        if pattern in url:
            return True
    return False

def url_to_news(url:str)-> dict:
    """A tool that receives a news url and returns the main claim of the webpage content and some aditional context
    Args:
        url: the address of the webpage we want to summarize into main claim and context
    """
    try:
        # Add headers to avoid being blocked
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
        }
        response = requests.get(url, timeout=10, headers=headers)
        response.raise_for_status()
        
        content_type = response.headers.get('content-type', '')
        if 'text/html' not in content_type:
            return {"title": "Error", "context": "Non-HTML content"}

        soup = BeautifulSoup(response.content, "html.parser")
        if soup.title:
            title = soup.title.text
        else:
            title = ""
            
        context = soup.get_text()[:5000]

        return {"title": title, "context": context}
    
    except Exception as e:
        return {"title": "Error", "context": f"Failed to process URL: {str(e)}"}

def support_decision(model, claim:str, news:dict=None)-> str:
    """A tool that decides if a given news article supports, contradicts or have an unclear opinion about a given claim
    Args:
        claim: the claim we want to decide whether it is supported or contradicted
        news: the news that may support or contradict the claim
    """
    if news is None:
        news = {"title": "", "context": ""}
    if news["title"] == "Error":
        return "unclear"

    prompt = f"""
    Analyse the sentiment of the provided news article in contrast with the given claim and respond with only one of the following words 
    'supports', 'contradicts', or 'unclear'.

    Claim: "{claim}"
    
    News: "{news["title"]}
    {news["context"]}"
    
    Response:
    """

    try:
        response = model.run(prompt)
        response = response.strip().lower()
        
        if response in ["supports", "contradicts", "unclear"]:
            return response
        else:
            return "unclear"
    except Exception:
        return "unclear"


def search(claim:str)-> list:
    """A tool that receives a claim and gather news about it by making a web search
    Args:
        claim: the claim we are searching about
    """
    search_tool = DuckDuckGoSearchTool()
    url_pattern = r'https?://[^\s\)\]\}]+'

    query1 = f"{claim}"
    results1 = search_tool(query1)
    urls = re.findall(url_pattern, results1)

    query2 = f"{claim} fact check"
    results2 = search_tool(query2)
    urls.extend(re.findall(url_pattern, results2))
    
    return list(set(urls))

@tool
def check(claim:str)-> str:
    """A tool that receives a claim and answers if that information has support from reliable sources or not. This is the first tool you should use.
    Args:
        claim: the claim we want to check for support
    """
    analyzer = HfApiModel()
    
    support_reliable = []
    contradict_reliable = []
    support_others = []
    contradict_others = []
    
    news_articles = search(claim)
    for article in news_articles:
        reliable_source = reliable_url(article)
        news = url_to_news(article)
        decision = support_decision(analyzer, claim, news)
        if reliable_source:
            if decision == "supports":
                support_reliable.append(article)
            elif decision == "contradicts":
                contradict_reliable.append(article)
        else:
            if decision == "supports":
                support_others.append(article)
            elif decision == "contradicts":
                contradict_others.append(article)

    if len(support_reliable) == 0:
        if len(contradict_reliable) > 0:
            return f"There are reliable sources that contradict this information: {contradict_reliable}"
        else:
            if len(support_others) == 0:
                if len(contradict_others) == 0:
                    return "There are no sources available that talk about this topic"
                else:
                    return f"There are unverified sources that contradict this claim: {contradict_others}"
            else:
                if len(contradict_others) == 0:
                    return f"There are unverified sources that support this information: {support_others}"
                else:
                    return f"There are unverified sources that contradict this claim and other unverified sources that support it: {contradict_others + support_others}"
    else:
        if len(contradict_reliable) == 0:
            return f"There are reliable sources that support this information: {support_reliable}"
        else:
            return f"The information about this claim is inconsistent. More search is advised: {support_reliable, contradict_reliable}"
final_answer = FinalAnswerTool()

# If the agent does not answer, the model is overloaded, please use another model or the following Hugging Face Endpoint that also contains qwen2.5 coder:
# model_id='https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud' 

model = HfApiModel(
max_tokens=2096,
temperature=0.5,
model_id='Qwen/Qwen2.5-Coder-32B-Instruct',
custom_role_conversions=None,
)


with open("prompts.yaml", 'r') as stream:
    prompt_templates = yaml.safe_load(stream)
    
agent = ToolCallingAgent(
    model=model,
    tools=[final_answer,
           check],
    max_steps=6,
    verbosity_level=1,
    grammar=None,
    planning_interval=None,
    name=None,
    description=None,
    prompt_templates=prompt_templates
)


GradioUI(agent).launch()