Spaces:

ANISA09
/

atlas

Sleeping

File size: 1,580 Bytes

a5dc482

import re
from loguru import logger
from .serp_utils import serpapi_web_search
from .article_extractor import fetch_article_text_from_url

SERPAPI_KEY = "your-serpapi-key"  # Load from env or config

def analyze_url(news_url: str, run_serp: bool):
    """Process and extract text from a given news URL."""
    article_text, headline = fetch_article_text_from_url(news_url)
    qa_fallback_note = ""

    if not article_text and headline and run_serp and SERPAPI_KEY:
        serpapi_result = serpapi_web_search(headline, num=8)
        snippets = [res.get("snippet", "") for res in serpapi_result.get("result", {}).get("organic_results", [])]
        serp_text = "\n\n".join([s for s in snippets if s])[:3000]
        if serp_text:
            article_text = f"(SERP fallback for headline: {headline})\n\n{serp_text}"
            qa_fallback_note = (
                "Note: full article text unavailable — using SERP snippets for analysis. "
                "Please verify date/location in original sources."
            )
        else:
            article_text = f"(No extractable text) Headline: {headline}"
            qa_fallback_note = "Note: only headline extracted — limited reliability."
    elif not article_text:
        article_text = f"(Failed to extract article text from URL: {news_url})"

    return article_text, headline, qa_fallback_note


def analyze_text(news_text: str):
    """Process plain text input."""
    news_text = (news_text or "").strip()
    if not news_text:
        raise ValueError("Empty text input provided.")
    return news_text, "", ""