Spaces:

ANISA09
/

atlas

Sleeping

atlas / analysis_helpers.py

Create analysis_helpers.py

a5dc482 verified 3 months ago

1.58 kB

	import re
	from loguru import logger
	from .serp_utils import serpapi_web_search
	from .article_extractor import fetch_article_text_from_url

	SERPAPI_KEY = "your-serpapi-key" # Load from env or config

	def analyze_url(news_url: str, run_serp: bool):
	"""Process and extract text from a given news URL."""
	article_text, headline = fetch_article_text_from_url(news_url)
	qa_fallback_note = ""

	if not article_text and headline and run_serp and SERPAPI_KEY:
	serpapi_result = serpapi_web_search(headline, num=8)
	snippets = [res.get("snippet", "") for res in serpapi_result.get("result", {}).get("organic_results", [])]
	serp_text = "\n\n".join([s for s in snippets if s])[:3000]
	if serp_text:
	article_text = f"(SERP fallback for headline: {headline})\n\n{serp_text}"
	qa_fallback_note = (
	"Note: full article text unavailable — using SERP snippets for analysis. "
	"Please verify date/location in original sources."
	)
	else:
	article_text = f"(No extractable text) Headline: {headline}"
	qa_fallback_note = "Note: only headline extracted — limited reliability."
	elif not article_text:
	article_text = f"(Failed to extract article text from URL: {news_url})"

	return article_text, headline, qa_fallback_note


	def analyze_text(news_text: str):
	"""Process plain text input."""
	news_text = (news_text or "").strip()
	if not news_text:
	raise ValueError("Empty text input provided.")
	return news_text, "", ""