DakshChaudhary commited on
Commit
b72707e
·
1 Parent(s): 570bf19

Feat: Added the Export BibTex Citation tool

Browse files
Files changed (3) hide show
  1. analysis.py +0 -20
  2. app.py +26 -3
  3. utils.py +25 -3
analysis.py CHANGED
@@ -21,26 +21,6 @@ Provide a comprehensive technical analysis by structuring your response in Markd
21
  (Propose 2-3 concrete, technically-grounded hypotheses for extending this research. For each, describe a potential experiment or technical extension.)
22
  """
23
 
24
- # # The master prompt that asks for everything at once.
25
- # COMPREHENSIVE_ANALYSIS_PROMPT = """
26
- # Provide a comprehensive technical analysis of the document for a knowledgeable audience (e.g., graduate students, researchers). Structure your response in Markdown with the following sections, in this exact order:
27
-
28
- # ## 1. Abstract Summary
29
- # (A concise summary of the paper's core contributions, methods, and key results, similar to a conference abstract.)
30
-
31
- # ## 2. Core Architecture and Methodology
32
- # (Deconstruct the system's architecture and the flow of data or logic. Use bullet points to detail key components and algorithms. Be technically precise.)
33
-
34
- # ## 3. Quantitative Results & Critical Analysis
35
- # (Present the main quantitative results in a list or responsive format (NO WIDE TABLES). Provide a brief but critical analysis of what these results mean.)
36
-
37
- # ## 4. Positioning in the Field
38
- # (Situate this work by comparing it to 1-2 key alternative approaches mentioned in the paper, highlighting its unique technical differentiators.)
39
-
40
- # ## 5. Proposed Future Research Directions
41
- # (Propose 2-3 concrete, technically-grounded hypotheses and experimental ideas for extending this research based on the paper's conclusion or limitations.)
42
- # """
43
-
44
  def run_analysis_on_single_paper(documents):
45
  """
46
  This simplified version creates an index and runs a single, comprehensive query against it.
 
21
  (Propose 2-3 concrete, technically-grounded hypotheses for extending this research. For each, describe a potential experiment or technical extension.)
22
  """
23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  def run_analysis_on_single_paper(documents):
25
  """
26
  This simplified version creates an index and runs a single, comprehensive query against it.
app.py CHANGED
@@ -2,11 +2,11 @@ import gradio as gr
2
  import os
3
  import re
4
  import hashlib
5
- from llama_index.core import Settings
6
  from llama_index.readers.file import PDFReader
7
  from llama_index.embeddings.mistralai import MistralAIEmbedding
8
- from utils import get_llm, download_pdf_from_url
9
- from agents import create_scout_agent
10
  from analysis import run_analysis_on_single_paper
11
 
12
  # --- Orchestrator Functions for Gradio ---
@@ -33,6 +33,29 @@ def pdf_analysis_flow(pdf_file, progress=gr.Progress()):
33
  print(f"An error occurred in pdf_analysis_flow: {e}")
34
  return f"An error occurred: {e}"
35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
  def scout_agent_flow(topic_query, progress=gr.Progress()):
38
  """This function now runs the scout agent and directly returns its summary."""
 
2
  import os
3
  import re
4
  import hashlib
5
+ from llama_index.core import Settings, Document
6
  from llama_index.readers.file import PDFReader
7
  from llama_index.embeddings.mistralai import MistralAIEmbedding
8
+ from utils import get_llm, download_pdf_from_url, format_to_bibtex
9
+ from agents import create_scout_agent, create_specialist_agent, CITATION_EXTRACTOR_PROMPT
10
  from analysis import run_analysis_on_single_paper
11
 
12
  # --- Orchestrator Functions for Gradio ---
 
33
  print(f"An error occurred in pdf_analysis_flow: {e}")
34
  return f"An error occurred: {e}"
35
 
36
+ def export_bibtex_flow(documents, file_obj):
37
+ """Workflow for the 'Export Citation' button."""
38
+ if not documents:
39
+ raise gr.Error("Please analyze a paper first.")
40
+
41
+ filename = os.path.basename(file_obj.name)
42
+ print(f"--- BibTeX Export: Starting citation extraction for {filename} ---")
43
+
44
+ first_page_text = documents[0].text
45
+
46
+ # We only need the LLM for this, no other tools.
47
+ Settings.llm = get_llm()
48
+ extractor_agent = create_specialist_agent(CITATION_EXTRACTOR_PROMPT, Settings.llm, [])
49
+
50
+ # Give the agent the text and ask it to perform its task
51
+ response = extractor_agent.chat(f"Extract bibliographic data from this text: {first_page_text[:4000]}")
52
+
53
+ print(f"--- BibTeX Export: Agent responded with: {response.response} ---")
54
+
55
+ # Format the extracted JSON into a BibTeX string
56
+ bibtex_string = format_to_bibtex(response.response, filename)
57
+
58
+ return bibtex_string
59
 
60
  def scout_agent_flow(topic_query, progress=gr.Progress()):
61
  """This function now runs the scout agent and directly returns its summary."""
utils.py CHANGED
@@ -1,6 +1,5 @@
1
- # utils.py (Corrected and Simplified)
2
-
3
  import os
 
4
  import requests
5
  from io import BytesIO
6
  from llama_index.llms.mistralai import MistralAI
@@ -24,4 +23,27 @@ def download_pdf_from_url(url: str):
24
  return BytesIO(response.content)
25
  except requests.exceptions.RequestException as e:
26
  print(f"Error downloading {url}: {e}")
27
- return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
+ import json
3
  import requests
4
  from io import BytesIO
5
  from llama_index.llms.mistralai import MistralAI
 
23
  return BytesIO(response.content)
24
  except requests.exceptions.RequestException as e:
25
  print(f"Error downloading {url}: {e}")
26
+ return None
27
+
28
+ def format_to_bibtex(citation_json_str: str, arxiv_id: str) -> str:
29
+ """Formats a JSON string of citation data into a BibTeX entry."""
30
+ try:
31
+ data = json.loads(citation_json_str)
32
+ title = data.get("title", "No Title Found")
33
+ authors = " and ".join(data.get("authors", ["N/A"]))
34
+ year = data.get("year", "N/A")
35
+
36
+ # Create a simple citation key, e.g., "bouzenia2024"
37
+ first_author_lastname = authors.split(' ')[-1].lower() if ' ' in authors else authors.lower()
38
+ key = f"{first_author_lastname}{year}"
39
+
40
+ bibtex_entry = f"""@article{{{key},
41
+ title = {{{title}}},
42
+ author = {{{authors}}},
43
+ year = {{{year}}},
44
+ journal = {{arXiv preprint arXiv:{arxiv_id}}}
45
+ }}"""
46
+ return bibtex_entry
47
+ except (json.JSONDecodeError, KeyError) as e:
48
+ print(f"Error formatting BibTeX: {e}")
49
+ return "Could not generate BibTeX citation. The required data could not be extracted."