Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -159,6 +159,26 @@ def merge_duplicates(entries):
|
|
| 159 |
return unique
|
| 160 |
|
| 161 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 162 |
class PDF(FPDF):
|
| 163 |
def __init__(self):
|
| 164 |
super().__init__()
|
|
@@ -240,24 +260,24 @@ if research_button and topic:
|
|
| 240 |
with st.status("🔍 Gathering and analyzing sources...") as status:
|
| 241 |
def fetch_all_sources():
|
| 242 |
sources = []
|
|
|
|
|
|
|
| 243 |
if source_type in ["Web Only", "Hybrid"]:
|
| 244 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 245 |
if source_type in ["Academic Only", "Hybrid"]:
|
| 246 |
sources += get_arxiv_papers(topic)
|
| 247 |
sources += get_semantic_papers(topic)
|
|
|
|
| 248 |
return sources
|
| 249 |
|
| 250 |
-
all_sources, retries = [], 0
|
| 251 |
-
while retries < 3:
|
| 252 |
-
all_sources = fetch_all_sources()
|
| 253 |
-
if all_sources:
|
| 254 |
-
break
|
| 255 |
-
retries += 1
|
| 256 |
-
time.sleep(2)
|
| 257 |
-
|
| 258 |
-
if not all_sources:
|
| 259 |
-
raise ValueError("❌ No sources found.")
|
| 260 |
-
|
| 261 |
merged = merge_duplicates(all_sources)
|
| 262 |
merged = sort_sources_chronologically(merged)
|
| 263 |
|
|
|
|
| 159 |
return unique
|
| 160 |
|
| 161 |
|
| 162 |
+
def load_web_url_content(url):
|
| 163 |
+
try:
|
| 164 |
+
response = tavily.search(query=url, search_depth="advanced", max_results=1)
|
| 165 |
+
if response["results"]:
|
| 166 |
+
result = response["results"][0]
|
| 167 |
+
return {
|
| 168 |
+
"title": result.get("title", "Untitled Web Page"),
|
| 169 |
+
"url": result.get("url", url),
|
| 170 |
+
"snippet": result.get("content", ""),
|
| 171 |
+
"image_url": result.get("image_url"),
|
| 172 |
+
"source": "web",
|
| 173 |
+
"year": extract_year_from_text(result.get("content", ""))
|
| 174 |
+
}
|
| 175 |
+
return None
|
| 176 |
+
except Exception as e:
|
| 177 |
+
print(f"Error loading URL: {url} — {e}")
|
| 178 |
+
return None
|
| 179 |
+
|
| 180 |
+
|
| 181 |
+
|
| 182 |
class PDF(FPDF):
|
| 183 |
def __init__(self):
|
| 184 |
super().__init__()
|
|
|
|
| 260 |
with st.status("🔍 Gathering and analyzing sources...") as status:
|
| 261 |
def fetch_all_sources():
|
| 262 |
sources = []
|
| 263 |
+
|
| 264 |
+
# Handle Web/Hybrid sources
|
| 265 |
if source_type in ["Web Only", "Hybrid"]:
|
| 266 |
+
# Check if custom_domains is a full URL
|
| 267 |
+
if custom_domains.strip().startswith("http"):
|
| 268 |
+
custom_source = load_web_url_content(custom_domains.strip())
|
| 269 |
+
if custom_source:
|
| 270 |
+
sources.append(custom_source)
|
| 271 |
+
else:
|
| 272 |
+
sources += get_sources(topic, custom_domains)
|
| 273 |
+
|
| 274 |
+
# Handle Academic sources
|
| 275 |
if source_type in ["Academic Only", "Hybrid"]:
|
| 276 |
sources += get_arxiv_papers(topic)
|
| 277 |
sources += get_semantic_papers(topic)
|
| 278 |
+
|
| 279 |
return sources
|
| 280 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 281 |
merged = merge_duplicates(all_sources)
|
| 282 |
merged = sort_sources_chronologically(merged)
|
| 283 |
|