| import os |
| import json |
| import requests |
| import re |
| from io import BytesIO |
| from llama_index.llms.mistralai import MistralAI |
|
|
| def get_llm(): |
| """ |
| This function now only prepares and returns the MistralAI client. |
| It securely gets the API key from environment variables. |
| """ |
| mistral_api_key = os.getenv("MISTRAL_API_KEY") |
| if not mistral_api_key: |
| raise ValueError("MISTRAL_API_KEY environment variable not set. Please set it before running the app.") |
| |
| return MistralAI(api_key=mistral_api_key, model="mistral-medium-latest", timeout=240) |
|
|
| def download_pdf_from_url(url: str): |
| """Downloads PDF content from a URL and returns it as a BytesIO stream.""" |
| try: |
| response = requests.get(url, timeout=20) |
| response.raise_for_status() |
| return BytesIO(response.content) |
| except requests.exceptions.RequestException as e: |
| print(f"Error downloading {url}: {e}") |
| return None |
|
|
| def format_to_bibtex(citation_json_str: str, filename: str) -> str: |
| """Formats a JSON string of citation data into a BibTeX entry.""" |
| try: |
| |
| |
| |
| json_match = re.search(r'\{.*\}', citation_json_str, re.DOTALL) |
| |
| if not json_match: |
| |
| raise ValueError("No valid JSON object found in the LLM response.") |
| |
| |
| clean_json_str = json_match.group(0) |
| |
|
|
| |
| data = json.loads(clean_json_str) |
| |
| |
| match = re.search(r'(\d{4}\.\d{5})', filename) |
| arxiv_id = match.group(1) if match else "N/A" |
| |
| title = data.get("title", "No Title Found") |
| authors = " and ".join(data.get("authors", ["N/A"])) |
| year = data.get("year", "N/A") |
| |
| first_author_lastname = authors.split(' ')[-1].lower() if ' ' in authors else "unknown" |
| first_title_word = title.split(' ')[0].lower().strip(":") if ' ' in title else "untitled" |
| key = f"{first_author_lastname}{year}{first_title_word}" |
| |
| bibtex_entry = f"""@article{{{key}, |
| title = {{{title}}}, |
| author = {{{authors}}}, |
| year = {{{year}}}, |
| journal = {{arXiv preprint arXiv:{arxiv_id}}} |
| }}""" |
| return bibtex_entry |
|
|
| |
| except (json.JSONDecodeError, KeyError, AttributeError, ValueError) as e: |
| print(f"Error formatting BibTeX: {e}") |
| return "Could not generate BibTeX citation. The required data could not be extracted." |
|
|