Spaces:

Cmuroc27
/

final_project_agents_course

Sleeping

App Files Files Community

Cmuroc27 commited on Nov 23, 2025

Commit

602af35

1 Parent(s): ce06eb3

para leer bien documentos

Browse files

Files changed (2) hide show

app.py +10 -2
tools.py +67 -67

app.py CHANGED Viewed

@@ -56,8 +56,16 @@ class BasicAgent:
             answer = asyncio.run(
                 (asyncio.wait_for(self.agent.run(full_question), timeout=60.0)))
-            print(f" Final answer is : {answer[:60]}...")
             return answer
         except asyncio.TimeoutError:

             answer = asyncio.run(
                 (asyncio.wait_for(self.agent.run(full_question), timeout=60.0)))
+            if answer.endswith('.') and len(answer) > 2:
+                # Conservar si es "U.S.A." o similar (más de 1 punto)
+                if answer.count('.') == 1:
+                    answer = answer.rstrip('.')
+            # Normalizar comas
+            if "," in answer and ", " not in answer:
+                answer = answer.replace(", ", ",")
             return answer
         except asyncio.TimeoutError:

tools.py CHANGED Viewed

@@ -70,86 +70,86 @@ def get_youtube_transcript(video_url: str) -> str:
     except Exception as e:
         return f"Unavailable. Error: {str(e)}"
-def read_document(file_path: str) -> str:
     try:
-        # 1. Buscar archivo localmente
-        possible_paths = [
-            file_path,
-            os.path.join(".", file_path),
-            os.path.join("/tmp", file_path),
-            os.path.join(os.getcwd(), file_path),
-        ]
-        actual_path = None
-        for path in possible_paths:
-            if os.path.exists(path):
-                actual_path = path
-                break
-        # 2. Si no se encuentra, intentar descargar desde GAIA endpoint
-        if not actual_path:
-            download_url = f"https://agents-course-unit4-scoring.hf.space/files/{file_path}"
-            print(f"📥 Attempting to download: {download_url}")
-            try:
-                response = requests.get(download_url, timeout=15)
-                response.raise_for_status()
-                # Guardar en archivo temporal
-                suffix = os.path.splitext(file_path)[1] or ""
-                with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
-                    tmp.write(response.content)
-                    actual_path = tmp.name
-                print(f"✅ Downloaded and saved to: {actual_path}")
-            except Exception as dl_err:
-                # Listar archivos disponibles localmente como fallback
-                try:
-                    available = os.listdir(".")
-                except:
-                    available = []
-                return (f"Error: File '{file_path}' not found locally or via download. "
-                        f"Download failed: {str(dl_err)}. "
-                        f"Local files: {', '.join(available[:10])}")
-        # 3. Leer el archivo (ya sea local o descargado)
-        file_ext = os.path.splitext(actual_path)[1].lower()
-        # Audio
-        if file_ext in ['.mp3', '.wav', '.m4a', '.flac', '.ogg']:
             transcription = transcribe_audio_openai(actual_path)
             result = f"Audio transcription: {transcription}"
-        # Documentos soportados
-        elif file_ext in ['.txt', '.pdf', '.docx', '.csv', '.json', '.md', '.xlsx']:
             reader = SimpleDirectoryReader(input_files=[actual_path])
-            documents = reader.load_data()
-            if not documents:
-                result = "Error: No content found in file"
-            else:
-                full_text = "\n\n".join(doc.text for doc in documents)
-                result = f"File: {os.path.basename(actual_path)}\n\n{full_text}"
         else:
-            result = f"Error: Unsupported file type: {file_ext} (path: {actual_path})"
-        # 4. Limpiar: borrar archivo temporal si se descargó
-        if actual_path and not any(os.path.samefile(actual_path, p) for p in possible_paths if os.path.exists(p)):
             try:
                 os.unlink(actual_path)
-                print(f"🧹 Cleaned up temporary file: {actual_path}")
             except:
-                pass  # no hacer nada si falla la limpieza
-        return result
-    except Exception as e:
-        return f"Error reading file '{file_path}': {str(e)}"

     except Exception as e:
         return f"Unavailable. Error: {str(e)}"
+def read_document(file_spec: str) -> str:
+    """
+    file_spec can be:
+      - a local file path (e.g., "data.xlsx")
+      - a task_id (e.g., "abc123") to download from GAIA
+    """
+    import os, requests, tempfile
+    from urllib.parse import quote
+    actual_path = None
+    # 1. Si es un task_id (solo letras/números, ~10-12 chars, sin .), asumir que es ID
+    if os.path.basename(file_spec).isalnum() and len(file_spec) in range(5, 20) and '.' not in file_spec:
+        # Es probablemente un task_id → descargar
+        download_url = f"https://agents-course-unit4-scoring.hf.space/files/{quote(file_spec)}"
+        print(f"📥 Downloading from: {download_url}")
+        try:
+            response = requests.get(download_url, timeout=15)
+            response.raise_for_status()
+            # Determinar extensión desde Content-Type o por defecto .xlsx
+            content_type = response.headers.get('content-type', '').lower()
+            if 'pdf' in content_type:
+                suffix = '.pdf'
+            elif 'excel' in content_type or 'sheet' in content_type:
+                suffix = '.xlsx'
+            elif 'json' in content_type:
+                suffix = '.json'
+            elif 'text' in content_type or 'csv' in content_type:
+                suffix = '.csv'
+            else:
+                suffix = '.xlsx'  # fallback
+            with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
+                tmp.write(response.content)
+                actual_path = tmp.name
+            print(f"✅ Saved as: {actual_path}")
+        except Exception as e:
+            return f"Download failed for task_id '{file_spec}': {str(e)}"
+    else:
+        # 2. Buscar localmente
+        possible_paths = [file_spec, f"./{file_spec}", f"/tmp/{file_spec}"]
+        for p in possible_paths:
+            if os.path.exists(p):
+                actual_path = p
+                break
+    if not actual_path:
+        return f"File not found locally or downloadable: {file_spec}"
+    # 3. Leer (tu lógica existente)
     try:
+        ext = os.path.splitext(actual_path)[1].lower()
+        if ext in ['.mp3', '.wav', '.m4a', '.flac', '.ogg']:
             transcription = transcribe_audio_openai(actual_path)
             result = f"Audio transcription: {transcription}"
+        elif ext in ['.txt', '.pdf', '.docx', '.csv', '.json', '.md', '.xlsx']:
             reader = SimpleDirectoryReader(input_files=[actual_path])
+            docs = reader.load_data()
+            full_text = "\n\n".join(doc.text for doc in docs) if docs else ""
+            result = f"File: {os.path.basename(actual_path)}\n\n{full_text}"
         else:
+            result = f"Unsupported file type: {ext}"
+    except Exception as e:
+        result = f"Error reading {actual_path}: {str(e)}"
+    finally:
+        # Limpiar temporal
+        if actual_path and actual_path not in possible_paths:
             try:
                 os.unlink(actual_path)
             except:
+                pass
+    return result