ANISA09 commited on
Commit
a5dc482
·
verified ·
1 Parent(s): 61cfd01

Create analysis_helpers.py

Browse files
Files changed (1) hide show
  1. analysis_helpers.py +37 -0
analysis_helpers.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ from loguru import logger
3
+ from .serp_utils import serpapi_web_search
4
+ from .article_extractor import fetch_article_text_from_url
5
+
6
+ SERPAPI_KEY = "your-serpapi-key" # Load from env or config
7
+
8
+ def analyze_url(news_url: str, run_serp: bool):
9
+ """Process and extract text from a given news URL."""
10
+ article_text, headline = fetch_article_text_from_url(news_url)
11
+ qa_fallback_note = ""
12
+
13
+ if not article_text and headline and run_serp and SERPAPI_KEY:
14
+ serpapi_result = serpapi_web_search(headline, num=8)
15
+ snippets = [res.get("snippet", "") for res in serpapi_result.get("result", {}).get("organic_results", [])]
16
+ serp_text = "\n\n".join([s for s in snippets if s])[:3000]
17
+ if serp_text:
18
+ article_text = f"(SERP fallback for headline: {headline})\n\n{serp_text}"
19
+ qa_fallback_note = (
20
+ "Note: full article text unavailable — using SERP snippets for analysis. "
21
+ "Please verify date/location in original sources."
22
+ )
23
+ else:
24
+ article_text = f"(No extractable text) Headline: {headline}"
25
+ qa_fallback_note = "Note: only headline extracted — limited reliability."
26
+ elif not article_text:
27
+ article_text = f"(Failed to extract article text from URL: {news_url})"
28
+
29
+ return article_text, headline, qa_fallback_note
30
+
31
+
32
+ def analyze_text(news_text: str):
33
+ """Process plain text input."""
34
+ news_text = (news_text or "").strip()
35
+ if not news_text:
36
+ raise ValueError("Empty text input provided.")
37
+ return news_text, "", ""