DeepResearchEvaluator

Sleeping

App Files Files Community

awacke1 commited on Dec 31, 2024

Commit

d3641f1

verified ·

1 Parent(s): 0f95949

Update app.py

Browse files

Files changed (1) hide show

app.py +36 -45

app.py CHANGED Viewed

@@ -299,55 +299,46 @@ def save_full_transcript(query, text):
     """Save full transcript of Arxiv results as a file."""
     create_file(query, text, "md")
-# ------------------------------
-# NEW: Helper to parse references
-# ------------------------------
 def parse_arxiv_refs(ref_text: str):
     """
-    Parse the multi-line references returned by the RAG pipeline.
-    Typical format lines like:
-       1) [Paper Title 2023] This is the summary ...
-       2) [Another Title (2024)] Another summary text ...
-    We'll attempt to find a year with a small regex or fallback.
-    Return list of dicts: { 'title': str, 'summary': str, 'year': int or None }
     """
-    lines = ref_text.split('\n')
     results = []
-    for line in lines:
-        line = line.strip()
-        if not line:
-            continue
-        # Attempt to find [Title ...]
-        title_match = re.search(r"\[([^\]]+)\]", line)
-        if title_match:
-            raw_title = title_match.group(1).strip()
-        else:
-            # If no bracket found, skip or treat entire line as summary
-            raw_title = "No Title"
-        # Attempt to find trailing summary after bracket
-        # Example line: " [Paper Title 2024] Paper summary blah blah"
-        # So remove the bracketed portion from the line
-        remainder = line.replace(title_match.group(0), "").strip() if title_match else line
-        summary = remainder
-        # Attempt to guess year from the raw title
-        # We look for 4-digit patterns in raw_title or summary
-        year_match = re.search(r'(20\d{2})', raw_title)
-        if not year_match:
-            # fallback: try summary
-            year_match = re.search(r'(20\d{2})', summary)
-        if year_match:
-            year = int(year_match.group(1))
-        else:
-            year = None
-        results.append({
-            'title': raw_title,
-            'summary': summary,
-            'year': year
-        })
-    return results
 def perform_ai_lookup(q, vocal_summary=True, extended_refs=False,

     """Save full transcript of Arxiv results as a file."""
     create_file(query, text, "md")
 def parse_arxiv_refs(ref_text: str):
     """
+    Parse paper references with format:
+    **DATE | TITLE | ⬇️**
+    AUTHORS
+    SUMMARY
+    Returns list of dicts with paper details, limited to 20 papers.
     """
+    # Split on the paper header pattern
+    papers = re.split(r'\*\*.*?\|\s*.*?\|\s*.*?\*\*', ref_text)
+    headers = re.findall(r'\*\*.*?\|\s*.*?\|\s*.*?\*\*', ref_text)
     results = []
+    for i, (header, content) in enumerate(zip(headers, papers[1:])):
+        if i >= 20:  # Limit to 20 papers
+            break
+        # Parse header parts
+        header_parts = [p.strip() for p in header.strip('*').split('|')]
+        if len(header_parts) >= 2:
+            date_str = header_parts[0].strip()
+            title = header_parts[1].strip()
+            # Parse content into authors and summary
+            content_parts = content.strip().split('\n', 1)
+            authors = content_parts[0].strip('*') if content_parts else ""
+            summary = content_parts[1].strip() if len(content_parts) > 1 else ""
+            # Extract year from date
+            year_match = re.search(r'20\d{2}', date_str)
+            year = int(year_match.group(0)) if year_match else None
+            results.append({
+                'title': title,
+                'summary': summary,
+                'authors': authors,
+                'year': year,
+                'date': date_str
+            })
 def perform_ai_lookup(q, vocal_summary=True, extended_refs=False,