Spaces:

valeriow
/

veritas

Sleeping

App Files Files Community

valeriow commited on Oct 2, 2025

Commit

ccd3906

verified ·

1 Parent(s): 80b7c70

Upload 2 files

Browse files

feat: first version

Files changed (2) hide show

app.py +353 -0
requirements.txt +7 -0

app.py ADDED Viewed

	@@ -0,0 +1,353 @@

+import dspy
+import pandas as pd
+import requests
+from ddgs import DDGS
+import wikipedia
+import json
+from typing import List, Optional
+from pydantic import BaseModel
+from dataclasses import dataclass
+import gradio as gr
+from google.colab import userdata
+import os
+# Define a threshold for confidence
+threshold_perc_confianca = 0.95
+# Definir estruturas de dados padronizadas
+@dataclass
+class SearchResult:
+    """Individual search result structure."""
+    title: str
+    snippet: str
+    url: str
+    relevance_score: Optional[float] = None
+class ToolResponse(BaseModel):
+    """Standardized response format for all tools."""
+    success: bool
+    tool_name: str
+    query: str
+    results_count: int
+    results: List[dict]
+    error_message: Optional[str] = None
+    metadata: dict = {}
+def search_web(query: str, max_results: int = 5) -> str:
+    """Search the web using DuckDuckGo API (free). Returns JSON format."""
+    response = ToolResponse(
+        success=False,
+        tool_name="search_web",
+        query=query,
+        results_count=0,
+        results=[],
+        metadata={"max_results": max_results}
+    )
+    try:
+        with DDGS() as ddgs:
+            results = list(ddgs.text(query, max_results=max_results))
+        if not results:
+            response.error_message = f"No results found for '{query}'"
+            return response.model_dump_json(indent=2)
+        # Format results for the agent
+        formatted_results = []
+        for i, result in enumerate(results[:max_results], 1):
+            formatted_results.append({
+                "rank": i,
+                "title": result.get('title', 'No title'),
+                "snippet": result.get('body', 'No description'),
+                "url": result.get('href', 'No URL'),
+                "source": "duckduckgo"
+            })
+        response.success = True
+        response.results_count = len(formatted_results)
+        response.results = formatted_results
+    except Exception as e:
+        response.error_message = f"Error searching for '{query}': {str(e)}"
+    return response.model_dump_json(indent=2)
+def search_wikipedia(query: str, sentences: int = 3) -> str:
+    """Search Wikipedia for information. Returns JSON format."""
+    response = ToolResponse(
+        success=False,
+        tool_name="search_wikipedia",
+        query=query,
+        results_count=0,
+        results=[],
+        metadata={"sentences": sentences, "language": "pt"}
+    )
+    try:
+        # Set language to Portuguese for Brazilian context
+        wikipedia.set_lang("pt")
+        # Search for pages
+        search_results = wikipedia.search(query, results=3)
+        if not search_results:
+            response.error_message = f"No Wikipedia results found for '{query}'"
+            return response.model_dump_json(indent=2)
+        # Get summary of first result
+        page_title = search_results[0]
+        summary = wikipedia.summary(page_title, sentences=sentences)
+        page_url = wikipedia.page(page_title).url
+        response.success = True
+        response.results_count = 1
+        response.results = [{
+            "rank": 1,
+            "title": page_title,
+            "snippet": summary,
+            "url": page_url,
+            "source": "wikipedia",
+            "alternative_titles": search_results[1:] if len(search_results) > 1 else []
+        }]
+    except wikipedia.exceptions.DisambiguationError as e:
+        # Handle disambiguation
+        try:
+            page_title = e.options[0]
+            summary = wikipedia.summary(page_title, sentences=sentences)
+            page_url = wikipedia.page(page_title).url
+            response.success = True
+            response.results_count = 1
+            response.results = [{
+                "rank": 1,
+                "title": page_title,
+                "snippet": summary,
+                "url": page_url,
+                "source": "wikipedia",
+                "disambiguation_options": e.options[:5]
+            }]
+            response.metadata["disambiguation_resolved"] = True
+        except Exception as inner_e:
+            response.error_message = f"Disambiguation error for '{query}': {str(inner_e)}"
+            response.metadata["disambiguation_options"] = e.options[:5]
+    except Exception as e:
+        response.error_message = f"Error searching Wikipedia for '{query}': {str(e)}"
+    return response.model_dump_json(indent=2)
+def search_news_verification(claim: str) -> str:
+    """Search for fact-checking and verification information. Returns JSON format."""
+    response = ToolResponse(
+        success=False,
+        tool_name="search_news_verification",
+        query=claim,
+        results_count=0,
+        results=[],
+        metadata={"search_type": "fact_check", "target_sites": ["snopes", "factcheck", "boatos.org", "e-farsas"]}
+    )
+    try:
+        # Search for fact-checking sites specifically
+        fact_check_query = f"{claim} fact check verificação OR snopes OR factcheck OR boatos.org OR e-farsas"
+        with DDGS() as ddgs:
+            results = list(ddgs.text(fact_check_query, max_results=3))
+        if not results:
+            response.error_message = "No fact-checking results found for the claim"
+            return response.model_dump_json(indent=2)
+        formatted_results = []
+        for i, result in enumerate(results, 1):
+            # Determine if this is from a fact-checking site
+            url = result.get('href', '').lower()
+            is_fact_checker = any(site in url for site in ['snopes', 'factcheck', 'boatos', 'e-farsas', 'politifact'])
+            formatted_results.append({
+                "rank": i,
+                "title": result.get('title', 'No title'),
+                "snippet": result.get('body', 'No description'),
+                "url": result.get('href', 'No URL'),
+                "source": "duckduckgo",
+                "is_fact_checker": is_fact_checker,
+                "verification_type": "fact_check"
+            })
+        response.success = True
+        response.results_count = len(formatted_results)
+        response.results = formatted_results
+    except Exception as e:
+        response.error_message = f"Error searching for verification: {str(e)}"
+    return response.model_dump_json(indent=2)
+def search_credible_sources(topic: str) -> str:
+    """Search for information from credible news sources. Returns JSON format."""
+    response = ToolResponse(
+        success=False,
+        tool_name="search_credible_sources",
+        query=topic,
+        results_count=0,
+        results=[],
+        metadata={
+            "search_type": "credible_sources",
+            "target_sites": ["g1.com.br", "folha.uol.com.br", "estadao.com.br", "bbc.com"]
+        }
+    )
+    try:
+        # Focus on credible Brazilian news sources
+        credible_query = f"{topic} site:g1.com.br OR site:folha.uol.com.br OR site:estadao.com.br OR site:bbc.com"
+        with DDGS() as ddgs:
+            results = list(ddgs.text(credible_query, max_results=3))
+        if not results:
+            response.error_message = f"No results from credible sources found for '{topic}'"
+            return response.model_dump_json(indent=2)
+        formatted_results = []
+        for i, result in enumerate(results, 1):
+            url = result.get('href', '').lower()
+            # Determine which credible source this is from
+            source_site = "unknown"
+            for site in ["g1.com.br", "folha.uol.com.br", "estadao.com.br", "bbc.com"]:
+                if site in url:
+                    source_site = site
+                    break
+            formatted_results.append({
+                "rank": i,
+                "title": result.get('title', 'No title'),
+                "snippet": result.get('body', 'No description'),
+                "url": result.get('href', 'No URL'),
+                "source": "duckduckgo",
+                "news_source": source_site,
+                "is_credible": True,
+                "verification_type": "credible_news"
+            })
+        response.success = True
+        response.results_count = len(formatted_results)
+        response.results = formatted_results
+    except Exception as e:
+        response.error_message = f"Error searching credible sources: {str(e)}"
+    return response.model_dump_json(indent=2)
+def fetch_url_content(url: str, timeout: int = 10) -> str:
+    """
+    Fetches the content of a given URL. Returns JSON format with status and content.
+    """
+    response = ToolResponse(
+        success=False,
+        tool_name="fetch_url_content",
+        query=url,
+        results_count=0,
+        results=[],
+        metadata={"timeout": timeout}
+    )
+    try:
+        r = requests.get(url, timeout=timeout)
+        r.raise_for_status()
+        # Limit content size for safety
+        content = r.text[:5000]
+        response.success = True
+        response.results_count = 1
+        response.results = [{
+            "url": url,
+            "content": content,
+            "status_code": r.status_code
+        }]
+    except Exception as e:
+        response.error_message = f"Error fetching URL '{url}': {str(e)}"
+    return response.model_dump_json(indent=2)
+# Configurar um secret OPENAI_API_KEY com a api_key
+# On Hugging Face Spaces, the API key will be available as an environment variable
+OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
+if not OPENAI_API_KEY:
+    # Fallback for local development if not using secrets
+    try:
+        OPENAI_API_KEY = userdata.get('OPENAI_API_KEY')
+    except:
+        print("Warning: OPENAI_API_KEY not found in environment variables or Colab secrets.")
+        print("Please set the OPENAI_API_KEY environment variable.")
+lm = dspy.LM('openai/gpt-4o-mini',  api_key=OPENAI_API_KEY)
+dspy.configure(lm=lm)
+class DSPySigFakeNews(dspy.Signature):
+    f"""Você é um agente especializado em detectar notícias falsas. Use as ferramentas disponíveis para verificar a veracidade de todo o conteúdo fornecido.
+            Em conteúdos falsos, geralmente alguns fatos são verdadeiros, então muita atenção na classificação que deve ser de toda a notícia.
+            Interaja com todas as ferramentas fornecidas para obter uma variedade de fontes.
+            Itere diversas vezes, caso julgue necessário, combinando informações de diferentes ferramentas para formar uma análise abrangente.
+            Analise passo a passso o que o conteúdo está noticiando. O resultado final deve ser um detalhado processo de investigação e checagem.
+            Faça referências ao longo do texto também. E ao final relacione todas as referências novamente.
+            SEMPRE gere uma análise com referências/links de todas as evidências utilizadas para chegar no veredito final.
+            A análise deve ser um texto explicando detalhadamente o processo de investigação com as fontes utilizadas. Foque em um consumidor que quer entender tudo que está envolvido no conteúdo com o máximo de detalhes e quer reproduzir a investigação. Não poupe palavras.
+            Trace uma linha do tempo dos fatos também.
+            Sempre que considerar que a análise e a classificação estão se repetindo, finalize a investigação. Caso contrário sinalize para continuar investigando.
+            Na reflexão sempre informe um indice_confianca entre 0 e 1, sobre a confiança sobre a classificação. Queremos um índice de confiança acima de {threshold_perc_confianca}. Se estiver abaixo, continue investigando.
+            É PRECISO MÁXIMA ACURÁCIA! Muito cuidado.
+    """
+    news_content: str = dspy.InputField()
+    analise: str = dspy.OutputField(desc=("Análise final detalhada sobre a veracidade da notícia, incluindo evidências e referências. Não esqueça de incluir as referências para consulta nas analises. Em formato markdown. Faça referências ao longo do texto também. E ao final relacione todas as referências novamente."))
+    indice_confianca: float = dspy.OutputField(desc=(f"Índice de confiança sobre a classificação de 0 a 1. Queremos um indice_confianca acima de {threshold_perc_confianca}. Se estiver abaixo, continue investigando."))
+    conteudo_verdadeiro: bool = dspy.OutputField(
+        desc=("Diga se o conteúdo é verdadeiro. True se for verdadeiro e False se for falso/fake")
+        )
+fake_news_agent = dspy.ReAct(
+    DSPySigFakeNews,
+    tools=[search_credible_sources, search_wikipedia, fetch_url_content],
+    max_iters=20 # Quantidade máxima de iterações do agente
+)
+def predict_fake_news(news_content: str):
+    """
+    Predicts fake news using the fake_news_agent.
+    Args:
+        news_content: The content of the news to analyze.
+    Returns:
+        A tuple containing the analysis, confidence score, and classification as text.
+    """
+    if not OPENAI_API_KEY:
+        return "Error: OPENAI_API_KEY not set.", 0.0, "N/A"
+    result = fake_news_agent(news_content=news_content)
+    analise = result.analise
+    indice_confianca = result.indice_confianca
+    conteudo_verdadeiro_text = "Verdadeiro" if result.conteudo_verdadeiro else "Falso"
+    return analise, indice_confianca, conteudo_verdadeiro_text
+iface = gr.Interface(
+    fn=predict_fake_news,
+    inputs=gr.Textbox(label="Conteúdo da Notícia", lines=10),
+    outputs=[
+        gr.Markdown(label="Análise"),
+        gr.Number(label="Índice de Confiança"),
+        gr.Textbox(label="Conteúdo Verdadeiro?")
+    ],
+    title="Detector de Fake News - Veritas"
+)
+if __name__ == "__main__":
+    iface.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+dspy
+ddgs
+wikipedia
+requests
+gradio
+openai
+pydantic