Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -299,55 +299,46 @@ def save_full_transcript(query, text):
|
|
| 299 |
"""Save full transcript of Arxiv results as a file."""
|
| 300 |
create_file(query, text, "md")
|
| 301 |
|
| 302 |
-
# ------------------------------
|
| 303 |
-
# NEW: Helper to parse references
|
| 304 |
-
# ------------------------------
|
| 305 |
def parse_arxiv_refs(ref_text: str):
|
| 306 |
"""
|
| 307 |
-
Parse
|
| 308 |
-
|
| 309 |
-
|
| 310 |
-
|
| 311 |
-
|
| 312 |
-
|
| 313 |
"""
|
| 314 |
-
|
|
|
|
|
|
|
|
|
|
| 315 |
results = []
|
| 316 |
-
for
|
| 317 |
-
|
| 318 |
-
|
| 319 |
-
|
| 320 |
-
#
|
| 321 |
-
|
| 322 |
-
if
|
| 323 |
-
|
| 324 |
-
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
|
| 329 |
-
|
| 330 |
-
|
| 331 |
-
|
| 332 |
-
|
| 333 |
-
|
| 334 |
-
|
| 335 |
-
|
| 336 |
-
|
| 337 |
-
|
| 338 |
-
|
| 339 |
-
|
| 340 |
-
|
| 341 |
-
|
| 342 |
-
else:
|
| 343 |
-
year = None
|
| 344 |
-
|
| 345 |
-
results.append({
|
| 346 |
-
'title': raw_title,
|
| 347 |
-
'summary': summary,
|
| 348 |
-
'year': year
|
| 349 |
-
})
|
| 350 |
-
return results
|
| 351 |
|
| 352 |
|
| 353 |
def perform_ai_lookup(q, vocal_summary=True, extended_refs=False,
|
|
|
|
| 299 |
"""Save full transcript of Arxiv results as a file."""
|
| 300 |
create_file(query, text, "md")
|
| 301 |
|
|
|
|
|
|
|
|
|
|
| 302 |
def parse_arxiv_refs(ref_text: str):
|
| 303 |
"""
|
| 304 |
+
Parse paper references with format:
|
| 305 |
+
**DATE | TITLE | ⬇️**
|
| 306 |
+
AUTHORS
|
| 307 |
+
SUMMARY
|
| 308 |
+
|
| 309 |
+
Returns list of dicts with paper details, limited to 20 papers.
|
| 310 |
"""
|
| 311 |
+
# Split on the paper header pattern
|
| 312 |
+
papers = re.split(r'\*\*.*?\|\s*.*?\|\s*.*?\*\*', ref_text)
|
| 313 |
+
headers = re.findall(r'\*\*.*?\|\s*.*?\|\s*.*?\*\*', ref_text)
|
| 314 |
+
|
| 315 |
results = []
|
| 316 |
+
for i, (header, content) in enumerate(zip(headers, papers[1:])):
|
| 317 |
+
if i >= 20: # Limit to 20 papers
|
| 318 |
+
break
|
| 319 |
+
|
| 320 |
+
# Parse header parts
|
| 321 |
+
header_parts = [p.strip() for p in header.strip('*').split('|')]
|
| 322 |
+
if len(header_parts) >= 2:
|
| 323 |
+
date_str = header_parts[0].strip()
|
| 324 |
+
title = header_parts[1].strip()
|
| 325 |
+
|
| 326 |
+
# Parse content into authors and summary
|
| 327 |
+
content_parts = content.strip().split('\n', 1)
|
| 328 |
+
authors = content_parts[0].strip('*') if content_parts else ""
|
| 329 |
+
summary = content_parts[1].strip() if len(content_parts) > 1 else ""
|
| 330 |
+
|
| 331 |
+
# Extract year from date
|
| 332 |
+
year_match = re.search(r'20\d{2}', date_str)
|
| 333 |
+
year = int(year_match.group(0)) if year_match else None
|
| 334 |
+
|
| 335 |
+
results.append({
|
| 336 |
+
'title': title,
|
| 337 |
+
'summary': summary,
|
| 338 |
+
'authors': authors,
|
| 339 |
+
'year': year,
|
| 340 |
+
'date': date_str
|
| 341 |
+
})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 342 |
|
| 343 |
|
| 344 |
def perform_ai_lookup(q, vocal_summary=True, extended_refs=False,
|