atlas / analysis_helpers.py
ANISA09's picture
Create analysis_helpers.py
a5dc482 verified
import re
from loguru import logger
from .serp_utils import serpapi_web_search
from .article_extractor import fetch_article_text_from_url
SERPAPI_KEY = "your-serpapi-key" # Load from env or config
def analyze_url(news_url: str, run_serp: bool):
"""Process and extract text from a given news URL."""
article_text, headline = fetch_article_text_from_url(news_url)
qa_fallback_note = ""
if not article_text and headline and run_serp and SERPAPI_KEY:
serpapi_result = serpapi_web_search(headline, num=8)
snippets = [res.get("snippet", "") for res in serpapi_result.get("result", {}).get("organic_results", [])]
serp_text = "\n\n".join([s for s in snippets if s])[:3000]
if serp_text:
article_text = f"(SERP fallback for headline: {headline})\n\n{serp_text}"
qa_fallback_note = (
"Note: full article text unavailable — using SERP snippets for analysis. "
"Please verify date/location in original sources."
)
else:
article_text = f"(No extractable text) Headline: {headline}"
qa_fallback_note = "Note: only headline extracted — limited reliability."
elif not article_text:
article_text = f"(Failed to extract article text from URL: {news_url})"
return article_text, headline, qa_fallback_note
def analyze_text(news_text: str):
"""Process plain text input."""
news_text = (news_text or "").strip()
if not news_text:
raise ValueError("Empty text input provided.")
return news_text, "", ""