from smolagents import ToolCallingAgent, DuckDuckGoSearchTool, HfApiModel, load_tool, tool
import requests
from bs4 import BeautifulSoup
import re
import pytz
import yaml
from tools.final_answer import FinalAnswerTool

from Gradio_UI import GradioUI

def reliable_url(url:str)-> bool:
    """A tool that decides if a given url is a reliable source
    Args:
        url: the address of the webpage we want to know whether it is realible or not
    """
    known_fact_checkers = ["reuters.com/fact-check",
                              "g1.globo.com/fato-ou-fake",
                              "estadao.com.br/estadao-verifica",
                              "snopes.com",
                              "factcheck.org"]
    known_academic_sources = ['.edu', '.ac.']
    known_news = ["globo.com",
                  "bbc.com",
                  "midianinja.org",
                  "estadao.com",
                  "brasil247.com",
                  "espn.com.br"
                 ]

    reliable = known_academic_sources + known_fact_checkers + known_news
    
    for pattern in reliable:
        if pattern in url:
            return True
    return False

def url_to_news(url:str)-> dict:
    """A tool that receives a news url and returns the main claim of the webpage content and some aditional context
    Args:
        url: the address of the webpage we want to summarize into main claim and context
    """
    try:
        # Add headers to avoid being blocked
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
        }
        response = requests.get(url, timeout=10, headers=headers)
        response.raise_for_status()
        
        content_type = response.headers.get('content-type', '')
        if 'text/html' not in content_type:
            return {"title": "Error", "context": "Non-HTML content"}

        soup = BeautifulSoup(response.content, "html.parser")
        if soup.title:
            title = soup.title.text
        else:
            title = ""
            
        context = soup.get_text()[:5000]

        return {"title": title, "context": context}
    
    except Exception as e:
        return {"title": "Error", "context": f"Failed to process URL: {str(e)}"}

def support_decision(model, claim:str, news:dict=None)-> str:
    """A tool that decides if a given news article supports, contradicts or have an unclear opinion about a given claim
    Args:
        claim: the claim we want to decide whether it is supported or contradicted
        news: the news that may support or contradict the claim
    """
    if news is None:
        news = {"title": "", "context": ""}
    if news["title"] == "Error":
        return "unclear"

    prompt = f"""
    Analyse the sentiment of the provided news article in contrast with the given claim and respond with only one of the following words 
    'supports', 'contradicts', or 'unclear'.

    Claim: "{claim}"
    
    News: "{news["title"]}
    {news["context"]}"
    
    Response:
    """

    try:
        response = model.run(prompt)
        response = response.strip().lower()
        
        if response in ["supports", "contradicts", "unclear"]:
            return response
        else:
            return "unclear"
    except Exception:
        return "unclear"


def search(claim:str)-> list:
    """A tool that receives a claim and gather news about it by making a web search
    Args:
        claim: the claim we are searching about
    """
    search_tool = DuckDuckGoSearchTool()
    url_pattern = r'https?://[^\s\)\]\}]+'

    query1 = f"{claim}"
    results1 = search_tool(query1)
    urls = re.findall(url_pattern, results1)

    query2 = f"{claim} fact check"
    results2 = search_tool(query2)
    urls.extend(re.findall(url_pattern, results2))
    
    return list(set(urls))

@tool
def check(claim:str)-> str:
    """A tool that receives a claim and answers if that information has support from reliable sources or not. This is the first tool you should use.
    Args:
        claim: the claim we want to check for support
    """
    analyzer = HfApiModel()
    
    support_reliable = []
    contradict_reliable = []
    support_others = []
    contradict_others = []
    
    news_articles = search(claim)
    for article in news_articles:
        reliable_source = reliable_url(article)
        news = url_to_news(article)
        decision = support_decision(analyzer, claim, news)
        if reliable_source:
            if decision == "supports":
                support_reliable.append(article)
            elif decision == "contradicts":
                contradict_reliable.append(article)
        else:
            if decision == "supports":
                support_others.append(article)
            elif decision == "contradicts":
                contradict_others.append(article)

    if len(support_reliable) == 0:
        if len(contradict_reliable) > 0:
            return f"There are reliable sources that contradict this information: {contradict_reliable}"
        else:
            if len(support_others) == 0:
                if len(contradict_others) == 0:
                    return "There are no sources available that talk about this topic"
                else:
                    return f"There are unverified sources that contradict this claim: {contradict_others}"
            else:
                if len(contradict_others) == 0:
                    return f"There are unverified sources that support this information: {support_others}"
                else:
                    return f"There are unverified sources that contradict this claim and other unverified sources that support it: {contradict_others + support_others}"
    else:
        if len(contradict_reliable) == 0:
            return f"There are reliable sources that support this information: {support_reliable}"
        else:
            return f"The information about this claim is inconsistent. More search is advised: {support_reliable, contradict_reliable}"
final_answer = FinalAnswerTool()

# If the agent does not answer, the model is overloaded, please use another model or the following Hugging Face Endpoint that also contains qwen2.5 coder:
# model_id='https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud' 

model = HfApiModel(
max_tokens=2096,
temperature=0.5,
model_id='Qwen/Qwen2.5-Coder-32B-Instruct',
custom_role_conversions=None,
)


with open("prompts.yaml", 'r') as stream:
    prompt_templates = yaml.safe_load(stream)
    
agent = ToolCallingAgent(
    model=model,
    tools=[final_answer,
           check],
    max_steps=6,
    verbosity_level=1,
    grammar=None,
    planning_interval=None,
    name=None,
    description=None,
    prompt_templates=prompt_templates
)


GradioUI(agent).launch()