|
|
import os |
|
|
import json |
|
|
import requests |
|
|
import re |
|
|
from io import BytesIO |
|
|
from llama_index.llms.mistralai import MistralAI |
|
|
|
|
|
def get_llm(): |
|
|
""" |
|
|
This function now only prepares and returns the MistralAI client. |
|
|
It securely gets the API key from environment variables. |
|
|
""" |
|
|
mistral_api_key = os.getenv("MISTRAL_API_KEY") |
|
|
if not mistral_api_key: |
|
|
raise ValueError("MISTRAL_API_KEY environment variable not set. Please set it before running the app.") |
|
|
|
|
|
return MistralAI(api_key=mistral_api_key, model="mistral-medium-latest", timeout=240) |
|
|
|
|
|
def download_pdf_from_url(url: str): |
|
|
"""Downloads PDF content from a URL and returns it as a BytesIO stream.""" |
|
|
try: |
|
|
response = requests.get(url, timeout=20) |
|
|
response.raise_for_status() |
|
|
return BytesIO(response.content) |
|
|
except requests.exceptions.RequestException as e: |
|
|
print(f"Error downloading {url}: {e}") |
|
|
return None |
|
|
|
|
|
def format_to_bibtex(citation_json_str: str, filename: str) -> str: |
|
|
"""Formats a JSON string of citation data into a BibTeX entry.""" |
|
|
try: |
|
|
|
|
|
|
|
|
|
|
|
json_match = re.search(r'\{.*\}', citation_json_str, re.DOTALL) |
|
|
|
|
|
if not json_match: |
|
|
|
|
|
raise ValueError("No valid JSON object found in the LLM response.") |
|
|
|
|
|
|
|
|
clean_json_str = json_match.group(0) |
|
|
|
|
|
|
|
|
|
|
|
data = json.loads(clean_json_str) |
|
|
|
|
|
|
|
|
match = re.search(r'(\d{4}\.\d{5})', filename) |
|
|
arxiv_id = match.group(1) if match else "N/A" |
|
|
|
|
|
title = data.get("title", "No Title Found") |
|
|
authors = " and ".join(data.get("authors", ["N/A"])) |
|
|
year = data.get("year", "N/A") |
|
|
|
|
|
first_author_lastname = authors.split(' ')[-1].lower() if ' ' in authors else "unknown" |
|
|
first_title_word = title.split(' ')[0].lower().strip(":") if ' ' in title else "untitled" |
|
|
key = f"{first_author_lastname}{year}{first_title_word}" |
|
|
|
|
|
bibtex_entry = f"""@article{{{key}, |
|
|
title = {{{title}}}, |
|
|
author = {{{authors}}}, |
|
|
year = {{{year}}}, |
|
|
journal = {{arXiv preprint arXiv:{arxiv_id}}} |
|
|
}}""" |
|
|
return bibtex_entry |
|
|
|
|
|
|
|
|
except (json.JSONDecodeError, KeyError, AttributeError, ValueError) as e: |
|
|
print(f"Error formatting BibTeX: {e}") |
|
|
return "Could not generate BibTeX citation. The required data could not be extracted." |
|
|
|