File size: 2,955 Bytes
f7909f6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import logging
import io
from typing import Optional, List, Dict, Any

from PIL import Image

from ._config import GEMINI_CLIENT, GENAI_MODEL
from ._utils import safe_parse_gemini_json, sanitize_text
from ._media_analyzer import _get_image_data_from_source

logger = logging.getLogger("fact_checker_gemini")

def _build_evidence_snippet(serpapi_web: dict, image_analysis: dict) -> str:
    """Creates a text snippet of all gathered evidence for the Gemini prompt."""
    out = ""
    try:
        organic = serpapi_web.get("result", {}).get("organic_results", []) or []
        pieces = [f"{r.get('title','')} :: {r.get('snippet','')} :: {r.get('link','')}" for r in organic[:6]]
        if pieces: out += "WEB EVIDENCE:\n" + "\n".join(pieces)
        
        rorg = image_analysis.get("serpapi_reverse", {}).get("result", {}).get("organic_results", []) or []
        rpieces = [f"{r.get('title','')} :: {r.get('snippet','')} :: {r.get('link','')}" for r in rorg[:4]]
        if rpieces: out += "\nREVERSE IMAGE EVIDENCE:\n" + "\n".join(rpieces)
    except Exception:
        logger.exception("Building evidence snippet failed")
    return out

def gemini_generate_claim_from_image(image_source: str) -> Optional[str]:
    """Generates a testable factual claim from an image using Gemini's multimodal capacity."""
    if not GEMINI_CLIENT: return None
    try:
        img_bytes, _ = _get_image_data_from_source(image_source)
        if not img_bytes: return None
        img = Image.open(io.BytesIO(img_bytes))

        prompt = (
            "You are a cautious fact-check assistant. Look at the image and, ONLY IF you can identify a plausible short factual claim about the main subject, "
            "return a JSON object **ONLY** inside triple backticks, with the exact keys: claim, rationale.\n\n"
            "Rules:\n- If you can propose a factual testable claim, set \"claim\" to a short sentence (<= 140 chars) starting with "
            "\"Auto-generated (unverified):\" and use cautious phrasing. - If you cannot, set \"claim\": null.\n"
        )
        resp = GEMINI_CLIENT.models.generate_content(model=GENAI_MODEL, contents=[prompt, img])
        parsed = safe_parse_gemini_json(resp.text)
        if parsed and parsed.get("claim"):
            return sanitize_text(parsed["claim"])[:400]
        return None
    except Exception:
        logger.exception("Gemini multimodal claim gen failed")
    return None

def gemini_extract_claims_from_text(article_text: str, max_claims: int = 3) -> List[Dict[str, str]]:
    """Extracts testable claims from article text using Gemini."""
    if not GEMINI_CLIENT: return []
    article_text = sanitize_text(article_text)
    if not article_text: return []
    prompt = (
        f"Extract up to {max_claims} concise, testable factual claims from the article. "
        "Return ONLY a single fenced JSON block with key `claims` (list of objects with `claim` and `context`). "
        "Article:\n