Commit ·
b72707e
1
Parent(s): 570bf19
Feat: Added the Export BibTex Citation tool
Browse files- analysis.py +0 -20
- app.py +26 -3
- utils.py +25 -3
analysis.py
CHANGED
|
@@ -21,26 +21,6 @@ Provide a comprehensive technical analysis by structuring your response in Markd
|
|
| 21 |
(Propose 2-3 concrete, technically-grounded hypotheses for extending this research. For each, describe a potential experiment or technical extension.)
|
| 22 |
"""
|
| 23 |
|
| 24 |
-
# # The master prompt that asks for everything at once.
|
| 25 |
-
# COMPREHENSIVE_ANALYSIS_PROMPT = """
|
| 26 |
-
# Provide a comprehensive technical analysis of the document for a knowledgeable audience (e.g., graduate students, researchers). Structure your response in Markdown with the following sections, in this exact order:
|
| 27 |
-
|
| 28 |
-
# ## 1. Abstract Summary
|
| 29 |
-
# (A concise summary of the paper's core contributions, methods, and key results, similar to a conference abstract.)
|
| 30 |
-
|
| 31 |
-
# ## 2. Core Architecture and Methodology
|
| 32 |
-
# (Deconstruct the system's architecture and the flow of data or logic. Use bullet points to detail key components and algorithms. Be technically precise.)
|
| 33 |
-
|
| 34 |
-
# ## 3. Quantitative Results & Critical Analysis
|
| 35 |
-
# (Present the main quantitative results in a list or responsive format (NO WIDE TABLES). Provide a brief but critical analysis of what these results mean.)
|
| 36 |
-
|
| 37 |
-
# ## 4. Positioning in the Field
|
| 38 |
-
# (Situate this work by comparing it to 1-2 key alternative approaches mentioned in the paper, highlighting its unique technical differentiators.)
|
| 39 |
-
|
| 40 |
-
# ## 5. Proposed Future Research Directions
|
| 41 |
-
# (Propose 2-3 concrete, technically-grounded hypotheses and experimental ideas for extending this research based on the paper's conclusion or limitations.)
|
| 42 |
-
# """
|
| 43 |
-
|
| 44 |
def run_analysis_on_single_paper(documents):
|
| 45 |
"""
|
| 46 |
This simplified version creates an index and runs a single, comprehensive query against it.
|
|
|
|
| 21 |
(Propose 2-3 concrete, technically-grounded hypotheses for extending this research. For each, describe a potential experiment or technical extension.)
|
| 22 |
"""
|
| 23 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
def run_analysis_on_single_paper(documents):
|
| 25 |
"""
|
| 26 |
This simplified version creates an index and runs a single, comprehensive query against it.
|
app.py
CHANGED
|
@@ -2,11 +2,11 @@ import gradio as gr
|
|
| 2 |
import os
|
| 3 |
import re
|
| 4 |
import hashlib
|
| 5 |
-
from llama_index.core import Settings
|
| 6 |
from llama_index.readers.file import PDFReader
|
| 7 |
from llama_index.embeddings.mistralai import MistralAIEmbedding
|
| 8 |
-
from utils import get_llm, download_pdf_from_url
|
| 9 |
-
from agents import create_scout_agent
|
| 10 |
from analysis import run_analysis_on_single_paper
|
| 11 |
|
| 12 |
# --- Orchestrator Functions for Gradio ---
|
|
@@ -33,6 +33,29 @@ def pdf_analysis_flow(pdf_file, progress=gr.Progress()):
|
|
| 33 |
print(f"An error occurred in pdf_analysis_flow: {e}")
|
| 34 |
return f"An error occurred: {e}"
|
| 35 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
|
| 37 |
def scout_agent_flow(topic_query, progress=gr.Progress()):
|
| 38 |
"""This function now runs the scout agent and directly returns its summary."""
|
|
|
|
| 2 |
import os
|
| 3 |
import re
|
| 4 |
import hashlib
|
| 5 |
+
from llama_index.core import Settings, Document
|
| 6 |
from llama_index.readers.file import PDFReader
|
| 7 |
from llama_index.embeddings.mistralai import MistralAIEmbedding
|
| 8 |
+
from utils import get_llm, download_pdf_from_url, format_to_bibtex
|
| 9 |
+
from agents import create_scout_agent, create_specialist_agent, CITATION_EXTRACTOR_PROMPT
|
| 10 |
from analysis import run_analysis_on_single_paper
|
| 11 |
|
| 12 |
# --- Orchestrator Functions for Gradio ---
|
|
|
|
| 33 |
print(f"An error occurred in pdf_analysis_flow: {e}")
|
| 34 |
return f"An error occurred: {e}"
|
| 35 |
|
| 36 |
+
def export_bibtex_flow(documents, file_obj):
|
| 37 |
+
"""Workflow for the 'Export Citation' button."""
|
| 38 |
+
if not documents:
|
| 39 |
+
raise gr.Error("Please analyze a paper first.")
|
| 40 |
+
|
| 41 |
+
filename = os.path.basename(file_obj.name)
|
| 42 |
+
print(f"--- BibTeX Export: Starting citation extraction for {filename} ---")
|
| 43 |
+
|
| 44 |
+
first_page_text = documents[0].text
|
| 45 |
+
|
| 46 |
+
# We only need the LLM for this, no other tools.
|
| 47 |
+
Settings.llm = get_llm()
|
| 48 |
+
extractor_agent = create_specialist_agent(CITATION_EXTRACTOR_PROMPT, Settings.llm, [])
|
| 49 |
+
|
| 50 |
+
# Give the agent the text and ask it to perform its task
|
| 51 |
+
response = extractor_agent.chat(f"Extract bibliographic data from this text: {first_page_text[:4000]}")
|
| 52 |
+
|
| 53 |
+
print(f"--- BibTeX Export: Agent responded with: {response.response} ---")
|
| 54 |
+
|
| 55 |
+
# Format the extracted JSON into a BibTeX string
|
| 56 |
+
bibtex_string = format_to_bibtex(response.response, filename)
|
| 57 |
+
|
| 58 |
+
return bibtex_string
|
| 59 |
|
| 60 |
def scout_agent_flow(topic_query, progress=gr.Progress()):
|
| 61 |
"""This function now runs the scout agent and directly returns its summary."""
|
utils.py
CHANGED
|
@@ -1,6 +1,5 @@
|
|
| 1 |
-
# utils.py (Corrected and Simplified)
|
| 2 |
-
|
| 3 |
import os
|
|
|
|
| 4 |
import requests
|
| 5 |
from io import BytesIO
|
| 6 |
from llama_index.llms.mistralai import MistralAI
|
|
@@ -24,4 +23,27 @@ def download_pdf_from_url(url: str):
|
|
| 24 |
return BytesIO(response.content)
|
| 25 |
except requests.exceptions.RequestException as e:
|
| 26 |
print(f"Error downloading {url}: {e}")
|
| 27 |
-
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import os
|
| 2 |
+
import json
|
| 3 |
import requests
|
| 4 |
from io import BytesIO
|
| 5 |
from llama_index.llms.mistralai import MistralAI
|
|
|
|
| 23 |
return BytesIO(response.content)
|
| 24 |
except requests.exceptions.RequestException as e:
|
| 25 |
print(f"Error downloading {url}: {e}")
|
| 26 |
+
return None
|
| 27 |
+
|
| 28 |
+
def format_to_bibtex(citation_json_str: str, arxiv_id: str) -> str:
|
| 29 |
+
"""Formats a JSON string of citation data into a BibTeX entry."""
|
| 30 |
+
try:
|
| 31 |
+
data = json.loads(citation_json_str)
|
| 32 |
+
title = data.get("title", "No Title Found")
|
| 33 |
+
authors = " and ".join(data.get("authors", ["N/A"]))
|
| 34 |
+
year = data.get("year", "N/A")
|
| 35 |
+
|
| 36 |
+
# Create a simple citation key, e.g., "bouzenia2024"
|
| 37 |
+
first_author_lastname = authors.split(' ')[-1].lower() if ' ' in authors else authors.lower()
|
| 38 |
+
key = f"{first_author_lastname}{year}"
|
| 39 |
+
|
| 40 |
+
bibtex_entry = f"""@article{{{key},
|
| 41 |
+
title = {{{title}}},
|
| 42 |
+
author = {{{authors}}},
|
| 43 |
+
year = {{{year}}},
|
| 44 |
+
journal = {{arXiv preprint arXiv:{arxiv_id}}}
|
| 45 |
+
}}"""
|
| 46 |
+
return bibtex_entry
|
| 47 |
+
except (json.JSONDecodeError, KeyError) as e:
|
| 48 |
+
print(f"Error formatting BibTeX: {e}")
|
| 49 |
+
return "Could not generate BibTeX citation. The required data could not be extracted."
|