Create analysis_helpers.py
Browse files- analysis_helpers.py +37 -0
analysis_helpers.py
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import re
|
| 2 |
+
from loguru import logger
|
| 3 |
+
from .serp_utils import serpapi_web_search
|
| 4 |
+
from .article_extractor import fetch_article_text_from_url
|
| 5 |
+
|
| 6 |
+
SERPAPI_KEY = "your-serpapi-key" # Load from env or config
|
| 7 |
+
|
| 8 |
+
def analyze_url(news_url: str, run_serp: bool):
|
| 9 |
+
"""Process and extract text from a given news URL."""
|
| 10 |
+
article_text, headline = fetch_article_text_from_url(news_url)
|
| 11 |
+
qa_fallback_note = ""
|
| 12 |
+
|
| 13 |
+
if not article_text and headline and run_serp and SERPAPI_KEY:
|
| 14 |
+
serpapi_result = serpapi_web_search(headline, num=8)
|
| 15 |
+
snippets = [res.get("snippet", "") for res in serpapi_result.get("result", {}).get("organic_results", [])]
|
| 16 |
+
serp_text = "\n\n".join([s for s in snippets if s])[:3000]
|
| 17 |
+
if serp_text:
|
| 18 |
+
article_text = f"(SERP fallback for headline: {headline})\n\n{serp_text}"
|
| 19 |
+
qa_fallback_note = (
|
| 20 |
+
"Note: full article text unavailable — using SERP snippets for analysis. "
|
| 21 |
+
"Please verify date/location in original sources."
|
| 22 |
+
)
|
| 23 |
+
else:
|
| 24 |
+
article_text = f"(No extractable text) Headline: {headline}"
|
| 25 |
+
qa_fallback_note = "Note: only headline extracted — limited reliability."
|
| 26 |
+
elif not article_text:
|
| 27 |
+
article_text = f"(Failed to extract article text from URL: {news_url})"
|
| 28 |
+
|
| 29 |
+
return article_text, headline, qa_fallback_note
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def analyze_text(news_text: str):
|
| 33 |
+
"""Process plain text input."""
|
| 34 |
+
news_text = (news_text or "").strip()
|
| 35 |
+
if not news_text:
|
| 36 |
+
raise ValueError("Empty text input provided.")
|
| 37 |
+
return news_text, "", ""
|