Spaces:

Cmuroc27
/

final_project_agents_course

Sleeping

App Files Files Community

Cmuroc27 commited on Nov 5, 2025

Commit

6e44e8b

1 Parent(s): 9808761

para audio y respuestas concisas

Browse files

Files changed (3) hide show

agents.py +32 -17
requirements.txt +3 -0
tools.py +35 -2

agents.py CHANGED Viewed

@@ -36,23 +36,38 @@ class AlfredAdvancedWorkflow(Workflow):
         # Agente de busqueda
         self.web_agent = AgentWorkflow.from_tools_or_functions([search_tool],
                                                      llm = llm,
-                                                     system_prompt="""You are a FACTUAL web search assistant. Rules:
-1. Use web_search tool ONCE
-2. Answer in 1-2 sentences MAXIMUM
-3. Be direct and factual
-4. If unsure, say "I don't know"
-5. NO introductions, NO explanations""")
         self.doc_agent = AgentWorkflow.from_tools_or_functions([read_document_tool,
                                                                 image_analyzer_tool, youtube_transcript_tol, calculator_tool],
                                                                 llm = llm,
-                                                  system_prompt = """You are a CONCISE document processor. Rules:
-1. Use ONE appropriate tool ONCE
-2. Answer in 1 sentence MAXIMUM
-3. For YouTube: use youtube_transcript
-4. For calculations: use calculator
-5. Be direct - just the answer""")
         #self.reviewer = AgentWorkflow.from_tools_or_functions([], llm = llm,
         #                                                      system_prompt=" You are an expert reviewer. Your task is to review the provided answer to ensure its accuracy, completeness, and relevance to the question. Be concise as much as possible")
@@ -83,7 +98,7 @@ IMPORTANT RULES:
    - Files with paths like "1.E Exercises" or documents → "web" (search for it online)
    - Calculations or analyzing images → "doc"
-2. If NO file/image is explicitly provided but question references online content → "web"
 3. Examples:
    - "What does Teal'c say in YouTube video?" → "doc" (youtube_transcript)
@@ -120,7 +135,7 @@ Respond with ONLY: "web", "doc", or "both"
                 await ctx.store.set("last_agent_type", agent_type)
             if agent_type == "both":
-                        doc_result = await self.doc_agent.run(question)
                         doc_answer = str(doc_result)
                         web_question = f"""{question}
@@ -129,7 +144,7 @@ Context from document analysis:
 {doc_answer}
 Now search the web for additional current information to complete the answer."""
-                        web_result = await self.web_agent.run(web_question)
                         web_answer = str(web_result)
                         final_answer = f"""Based on document analysis and web search:
@@ -139,11 +154,11 @@ Now search the web for additional current information to complete the answer."""
             elif agent_type == "web":
-                result = await self.web_agent.run(question)
                 final_answer = str(result)
             else:  # doc
-                result = await self.doc_agent.run(question)
                 final_answer = str(result)
             return AgentResponseEvent(

         # Agente de busqueda
         self.web_agent = AgentWorkflow.from_tools_or_functions([search_tool],
                                                      llm = llm,
+                                                     system_prompt="""USE web_search ONCE. Answer in 1-2 words if possible. BE DIRECT.
+EXAMPLES:
+Question: "Capital of France?"
+Answer: "Paris"
+Question: "2+2?"
+Answer: "4"
+NO explanations, NO introductions.""")
         self.doc_agent = AgentWorkflow.from_tools_or_functions([read_document_tool,
                                                                 image_analyzer_tool, youtube_transcript_tol, calculator_tool],
                                                                 llm = llm,
+                                                  system_prompt = """USE ONE tool ONCE. Answer in FEWEST WORDS possible.
+FOR AUDIO/FILES: Use read_document tool
+FOR YOUTUBE: Use youtube_transcript tool
+FOR CALCULATIONS: Use calculator tool
+EXAMPLES:
+Question: "What is 5*5?"
+Answer: "25"
+Question: "Opposite of left?"
+Answer: "right"
+Question: "Ingredients from recipe.mp3?"
+Answer: "apples, flour, sugar"
+NO extra text.""")
         #self.reviewer = AgentWorkflow.from_tools_or_functions([], llm = llm,
         #                                                      system_prompt=" You are an expert reviewer. Your task is to review the provided answer to ensure its accuracy, completeness, and relevance to the question. Be concise as much as possible")
    - Files with paths like "1.E Exercises" or documents → "web" (search for it online)
    - Calculations or analyzing images → "doc"
+2. If NO file/image is explicitly provided but question references open question → "web"
 3. Examples:
    - "What does Teal'c say in YouTube video?" → "doc" (youtube_transcript)
                 await ctx.store.set("last_agent_type", agent_type)
             if agent_type == "both":
+                        doc_result = await self.doc_agent.run(question, max_iterations=10)
                         doc_answer = str(doc_result)
                         web_question = f"""{question}
 {doc_answer}
 Now search the web for additional current information to complete the answer."""
+                        web_result = await self.web_agent.run(web_question, max_iterations=10)
                         web_answer = str(web_result)
                         final_answer = f"""Based on document analysis and web search:
             elif agent_type == "web":
+                result = await self.web_agent.run(question, max_iterations=10)
                 final_answer = str(result)
             else:  # doc
+                result = await self.doc_agent.run(question, max_iterations=10)
                 final_answer = str(result)
             return AgentResponseEvent(

requirements.txt CHANGED Viewed

@@ -8,6 +8,9 @@ llama-index-multi-modal-llms-openai
 llama-index-tools-duckduckgo
 numexpr
 pypdf
 python-docx
 pillow
 pandas

 llama-index-tools-duckduckgo
 numexpr
 pypdf
+whisper
+tempfile
+pydub
 python-docx
 pillow
 pandas

tools.py CHANGED Viewed

@@ -7,6 +7,7 @@ from llama_index.core.agent.workflow import AgentWorkflow
 import numexpr as ne
 from llama_index.llms.openai import OpenAI
 import base64
 import os
 from huggingface_hub import InferenceClient
 from dotenv import load_dotenv
@@ -18,6 +19,31 @@ OPEN_AI = os.getenv("OPENAI_API_KEY").strip()
 HF_TOKEN = os.environ.get("HF_TOKEN")
 client = InferenceClient(HF_TOKEN)
 def get_youtube_transcript(video_url: str) -> str:
     try:
         # Extraer ID del video más robustamente
@@ -58,8 +84,15 @@ def read_document(file_path: str) -> str:
         if not os.path.exists(file_path):
             return "Error: File not found at "
-        reader = SimpleDirectoryReader(input_files=[file_path])
-        documents = reader.load_data()
         if not documents:
             return "Error: No content found in the file."

 import numexpr as ne
 from llama_index.llms.openai import OpenAI
 import base64
+import openai
 import os
 from huggingface_hub import InferenceClient
 from dotenv import load_dotenv
 HF_TOKEN = os.environ.get("HF_TOKEN")
 client = InferenceClient(HF_TOKEN)
+def transcribe_audio_openai(audio_path: str) -> str:
+    """Transcribe audio using OpenAI Whisper API - compatible with Spaces"""
+    try:
+        if not os.path.exists(audio_path):
+            return "Error: Audio file not found."
+        # Verificar que la API key está disponible
+        if not OPEN_AI:
+            return "Error: OpenAI API key not configured"
+        # Configurar OpenAI
+        openai.api_key = OPEN_AI
+        with open(audio_path, "rb") as audio_file:
+            transcript = openai.audio.transcriptions.create(
+                model="whisper-1",
+                file=audio_file,
+                response_format="text"
+            )
+        return transcript  # Retorna solo el texto transcrito
+    except Exception as e:
+        return f"Error transcribing audio: {str(e)}"
 def get_youtube_transcript(video_url: str) -> str:
     try:
         # Extraer ID del video más robustamente
         if not os.path.exists(file_path):
             return "Error: File not found at "
+        file_ext = os.path.splitext(file_path)[1].lower()
+        if file_ext in ['.mp3', '.wav', '.m4a', '.flac', '.ogg']:
+            transcription = transcribe_audio_openai(file_path)
+            return f"Audio transcription: {transcription}"
+        elif file_ext in ['.txt', '.pdf', '.docx', '.csv', '.json', '.md']:
+            reader = SimpleDirectoryReader(input_files=[file_path])
+            documents = reader.load_data()
         if not documents:
             return "Error: No content found in the file."