geronimo-pericoli commited on
Commit
ad40800
·
verified ·
1 Parent(s): 27c813d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +130 -0
app.py CHANGED
@@ -1,4 +1,134 @@
1
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
  def letter_counter(word, letter):
4
  """Count the occurrences of a specific letter in a word.
 
1
  import gradio as gr
2
+ from llama_index.core import VectorStoreIndex
3
+ from llama_index.core import (
4
+ StorageContext,
5
+ load_index_from_storage,
6
+ )
7
+ from llama_index.tools.arxiv import ArxivToolSpec
8
+ from llama_index.core import Settings
9
+ from llama_index.llms.azure_openai import AzureOpenAI
10
+ from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding
11
+ from llama_index.llms.openai import OpenAI
12
+ from llama_index.embeddings.openai import OpenAIEmbedding
13
+ from typing import Optional, List, Dict, Any
14
+ from pathlib import Path
15
+ import json
16
+ import os
17
+ import asyncio
18
+
19
+
20
+ ##### LLM #####
21
+ openai_api_key = os.environ.get('OPENAI_API_KEY')
22
+
23
+
24
+ llm = OpenAI(
25
+ model="gpt-4.1",
26
+ api_key=openai_api_key,
27
+ )
28
+ embed_model = OpenAIEmbedding(
29
+ model="text-embedding-ada-002",
30
+ api_key=openai_api_key,
31
+ )
32
+
33
+ Settings.llm = llm
34
+ Settings.embed_model = embed_model
35
+ ##### END LLM #####
36
+
37
+
38
+
39
+ ##### LOAD RETRIEVERS #####
40
+ DOCUMENTS_BASE_PATH = "./"
41
+ RETRIEVERS_JSON_PATH = Path("./retrievers.json")
42
+
43
+ # Cargar metadatos
44
+ def load_retrievers_metadata():
45
+ try:
46
+ with open(RETRIEVERS_JSON_PATH, 'r', encoding='utf-8') as f:
47
+ return json.load(f)
48
+ except Exception as e:
49
+ logger.error(f"Error cargando retrievers.json: {str(e)}", exc_info=True)
50
+ return {}
51
+
52
+ retrievers_metadata = load_retrievers_metadata()
53
+ SOURCES = {source: f"{source.lower()}/" for source in retrievers_metadata.keys()}
54
+
55
+ # Cargar índices
56
+ indices: Dict[str, VectorStoreIndex] = {}
57
+
58
+ for source, rel_path in SOURCES.items():
59
+ full_path = os.path.join(DOCUMENTS_BASE_PATH, rel_path)
60
+ if not os.path.exists(full_path):
61
+ logger.warning(f"No se encontró la ruta para {source}")
62
+ continue
63
+
64
+ for root, dirs, files in os.walk(full_path):
65
+ if "storage_nodes" in dirs:
66
+ try:
67
+ storage_path = os.path.join(root, "storage_nodes")
68
+ storage_context = StorageContext.from_defaults(persist_dir=storage_path)
69
+ index_name = os.path.basename(root)
70
+ indices[index_name] = load_index_from_storage(storage_context) #, index_id="vector_index"
71
+ logger.info(f"Índice cargado correctamente: {index_name}")
72
+ except Exception as e:
73
+ logger.error(f"Error cargando índice {index_name}: {str(e)}", exc_info=True)
74
+
75
+
76
+
77
+
78
+
79
+ arxiv_tool = ArxivToolSpec(max_results=5).to_tool_list()[0]
80
+ arxiv_tool.return_direct = True
81
+
82
+ async def search_arxiv(
83
+ query: str,
84
+ max_results: int = 5
85
+ ) -> Dict[str, Any]:
86
+ """
87
+ Busca artículos académicos en ArXiv.
88
+
89
+ Args:
90
+ query: Términos de búsqueda (ej. "deep learning")
91
+ max_results: Número máximo de resultados (1-10, default 5)
92
+
93
+ Returns:
94
+ Dict: Resultados de la búsqueda con metadatos de los papers
95
+ """
96
+ try:
97
+ # Configurar máximo de resultados
98
+ max_results = min(max(1, max_results), 10)
99
+ arxiv_tool.metadata.max_results = max_results
100
+
101
+ # Ejecutar búsqueda y obtener resultados
102
+ tool_output = arxiv_tool(query=query)
103
+
104
+ # Procesar documentos
105
+ papers = []
106
+ for doc in tool_output.raw_output: # Acceder correctamente a los documentos
107
+ content = doc.text_resource.text.split('\n')
108
+ papers.append({
109
+ 'title': content[0].split(': ')[1] if ': ' in content[0] else content[0],
110
+ 'abstract': '\n'.join(content[1:]).strip(),
111
+ 'pdf_url': content[0].split(': ')[0].replace('http://', 'https://'),
112
+ 'arxiv_id': content[0].split(': ')[0].split('/')[-1].replace('v1', '')
113
+ })
114
+
115
+ return {
116
+ 'papers': papers,
117
+ 'count': len(papers),
118
+ 'query': query,
119
+ 'status': 'success'
120
+ }
121
+
122
+ except Exception as e:
123
+ return {
124
+ 'papers': [],
125
+ 'count': 0,
126
+ 'query': query,
127
+ 'status': 'error',
128
+ 'error': str(e)
129
+ }
130
+
131
+
132
 
133
  def letter_counter(word, letter):
134
  """Count the occurrences of a specific letter in a word.