import re from loguru import logger from .serp_utils import serpapi_web_search from .article_extractor import fetch_article_text_from_url SERPAPI_KEY = "your-serpapi-key" # Load from env or config def analyze_url(news_url: str, run_serp: bool): """Process and extract text from a given news URL.""" article_text, headline = fetch_article_text_from_url(news_url) qa_fallback_note = "" if not article_text and headline and run_serp and SERPAPI_KEY: serpapi_result = serpapi_web_search(headline, num=8) snippets = [res.get("snippet", "") for res in serpapi_result.get("result", {}).get("organic_results", [])] serp_text = "\n\n".join([s for s in snippets if s])[:3000] if serp_text: article_text = f"(SERP fallback for headline: {headline})\n\n{serp_text}" qa_fallback_note = ( "Note: full article text unavailable — using SERP snippets for analysis. " "Please verify date/location in original sources." ) else: article_text = f"(No extractable text) Headline: {headline}" qa_fallback_note = "Note: only headline extracted — limited reliability." elif not article_text: article_text = f"(Failed to extract article text from URL: {news_url})" return article_text, headline, qa_fallback_note def analyze_text(news_text: str): """Process plain text input.""" news_text = (news_text or "").strip() if not news_text: raise ValueError("Empty text input provided.") return news_text, "", ""