Cmuroc27 commited on
Commit
6e44e8b
·
1 Parent(s): 9808761

para audio y respuestas concisas

Browse files
Files changed (3) hide show
  1. agents.py +32 -17
  2. requirements.txt +3 -0
  3. tools.py +35 -2
agents.py CHANGED
@@ -36,23 +36,38 @@ class AlfredAdvancedWorkflow(Workflow):
36
  # Agente de busqueda
37
  self.web_agent = AgentWorkflow.from_tools_or_functions([search_tool],
38
  llm = llm,
39
- system_prompt="""You are a FACTUAL web search assistant. Rules:
40
- 1. Use web_search tool ONCE
41
- 2. Answer in 1-2 sentences MAXIMUM
42
- 3. Be direct and factual
43
- 4. If unsure, say "I don't know"
44
- 5. NO introductions, NO explanations""")
 
 
 
 
45
 
46
  self.doc_agent = AgentWorkflow.from_tools_or_functions([read_document_tool,
47
  image_analyzer_tool, youtube_transcript_tol, calculator_tool],
48
  llm = llm,
49
 
50
- system_prompt = """You are a CONCISE document processor. Rules:
51
- 1. Use ONE appropriate tool ONCE
52
- 2. Answer in 1 sentence MAXIMUM
53
- 3. For YouTube: use youtube_transcript
54
- 4. For calculations: use calculator
55
- 5. Be direct - just the answer""")
 
 
 
 
 
 
 
 
 
 
 
56
 
57
  #self.reviewer = AgentWorkflow.from_tools_or_functions([], llm = llm,
58
  # system_prompt=" You are an expert reviewer. Your task is to review the provided answer to ensure its accuracy, completeness, and relevance to the question. Be concise as much as possible")
@@ -83,7 +98,7 @@ IMPORTANT RULES:
83
  - Files with paths like "1.E Exercises" or documents → "web" (search for it online)
84
  - Calculations or analyzing images → "doc"
85
 
86
- 2. If NO file/image is explicitly provided but question references online content → "web"
87
 
88
  3. Examples:
89
  - "What does Teal'c say in YouTube video?" → "doc" (youtube_transcript)
@@ -120,7 +135,7 @@ Respond with ONLY: "web", "doc", or "both"
120
  await ctx.store.set("last_agent_type", agent_type)
121
 
122
  if agent_type == "both":
123
- doc_result = await self.doc_agent.run(question)
124
  doc_answer = str(doc_result)
125
 
126
  web_question = f"""{question}
@@ -129,7 +144,7 @@ Context from document analysis:
129
  {doc_answer}
130
 
131
  Now search the web for additional current information to complete the answer."""
132
- web_result = await self.web_agent.run(web_question)
133
  web_answer = str(web_result)
134
 
135
  final_answer = f"""Based on document analysis and web search:
@@ -139,11 +154,11 @@ Now search the web for additional current information to complete the answer."""
139
 
140
 
141
  elif agent_type == "web":
142
- result = await self.web_agent.run(question)
143
  final_answer = str(result)
144
 
145
  else: # doc
146
- result = await self.doc_agent.run(question)
147
  final_answer = str(result)
148
 
149
  return AgentResponseEvent(
 
36
  # Agente de busqueda
37
  self.web_agent = AgentWorkflow.from_tools_or_functions([search_tool],
38
  llm = llm,
39
+ system_prompt="""USE web_search ONCE. Answer in 1-2 words if possible. BE DIRECT.
40
+
41
+ EXAMPLES:
42
+ Question: "Capital of France?"
43
+ Answer: "Paris"
44
+
45
+ Question: "2+2?"
46
+ Answer: "4"
47
+
48
+ NO explanations, NO introductions.""")
49
 
50
  self.doc_agent = AgentWorkflow.from_tools_or_functions([read_document_tool,
51
  image_analyzer_tool, youtube_transcript_tol, calculator_tool],
52
  llm = llm,
53
 
54
+ system_prompt = """USE ONE tool ONCE. Answer in FEWEST WORDS possible.
55
+
56
+ FOR AUDIO/FILES: Use read_document tool
57
+ FOR YOUTUBE: Use youtube_transcript tool
58
+ FOR CALCULATIONS: Use calculator tool
59
+
60
+ EXAMPLES:
61
+ Question: "What is 5*5?"
62
+ Answer: "25"
63
+
64
+ Question: "Opposite of left?"
65
+ Answer: "right"
66
+
67
+ Question: "Ingredients from recipe.mp3?"
68
+ Answer: "apples, flour, sugar"
69
+
70
+ NO extra text.""")
71
 
72
  #self.reviewer = AgentWorkflow.from_tools_or_functions([], llm = llm,
73
  # system_prompt=" You are an expert reviewer. Your task is to review the provided answer to ensure its accuracy, completeness, and relevance to the question. Be concise as much as possible")
 
98
  - Files with paths like "1.E Exercises" or documents → "web" (search for it online)
99
  - Calculations or analyzing images → "doc"
100
 
101
+ 2. If NO file/image is explicitly provided but question references open question → "web"
102
 
103
  3. Examples:
104
  - "What does Teal'c say in YouTube video?" → "doc" (youtube_transcript)
 
135
  await ctx.store.set("last_agent_type", agent_type)
136
 
137
  if agent_type == "both":
138
+ doc_result = await self.doc_agent.run(question, max_iterations=10)
139
  doc_answer = str(doc_result)
140
 
141
  web_question = f"""{question}
 
144
  {doc_answer}
145
 
146
  Now search the web for additional current information to complete the answer."""
147
+ web_result = await self.web_agent.run(web_question, max_iterations=10)
148
  web_answer = str(web_result)
149
 
150
  final_answer = f"""Based on document analysis and web search:
 
154
 
155
 
156
  elif agent_type == "web":
157
+ result = await self.web_agent.run(question, max_iterations=10)
158
  final_answer = str(result)
159
 
160
  else: # doc
161
+ result = await self.doc_agent.run(question, max_iterations=10)
162
  final_answer = str(result)
163
 
164
  return AgentResponseEvent(
requirements.txt CHANGED
@@ -8,6 +8,9 @@ llama-index-multi-modal-llms-openai
8
  llama-index-tools-duckduckgo
9
  numexpr
10
  pypdf
 
 
 
11
  python-docx
12
  pillow
13
  pandas
 
8
  llama-index-tools-duckduckgo
9
  numexpr
10
  pypdf
11
+ whisper
12
+ tempfile
13
+ pydub
14
  python-docx
15
  pillow
16
  pandas
tools.py CHANGED
@@ -7,6 +7,7 @@ from llama_index.core.agent.workflow import AgentWorkflow
7
  import numexpr as ne
8
  from llama_index.llms.openai import OpenAI
9
  import base64
 
10
  import os
11
  from huggingface_hub import InferenceClient
12
  from dotenv import load_dotenv
@@ -18,6 +19,31 @@ OPEN_AI = os.getenv("OPENAI_API_KEY").strip()
18
  HF_TOKEN = os.environ.get("HF_TOKEN")
19
  client = InferenceClient(HF_TOKEN)
20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  def get_youtube_transcript(video_url: str) -> str:
22
  try:
23
  # Extraer ID del video más robustamente
@@ -58,8 +84,15 @@ def read_document(file_path: str) -> str:
58
  if not os.path.exists(file_path):
59
  return "Error: File not found at "
60
 
61
- reader = SimpleDirectoryReader(input_files=[file_path])
62
- documents = reader.load_data()
 
 
 
 
 
 
 
63
 
64
  if not documents:
65
  return "Error: No content found in the file."
 
7
  import numexpr as ne
8
  from llama_index.llms.openai import OpenAI
9
  import base64
10
+ import openai
11
  import os
12
  from huggingface_hub import InferenceClient
13
  from dotenv import load_dotenv
 
19
  HF_TOKEN = os.environ.get("HF_TOKEN")
20
  client = InferenceClient(HF_TOKEN)
21
 
22
+ def transcribe_audio_openai(audio_path: str) -> str:
23
+ """Transcribe audio using OpenAI Whisper API - compatible with Spaces"""
24
+ try:
25
+ if not os.path.exists(audio_path):
26
+ return "Error: Audio file not found."
27
+
28
+ # Verificar que la API key está disponible
29
+ if not OPEN_AI:
30
+ return "Error: OpenAI API key not configured"
31
+
32
+ # Configurar OpenAI
33
+ openai.api_key = OPEN_AI
34
+
35
+ with open(audio_path, "rb") as audio_file:
36
+ transcript = openai.audio.transcriptions.create(
37
+ model="whisper-1",
38
+ file=audio_file,
39
+ response_format="text"
40
+ )
41
+
42
+ return transcript # Retorna solo el texto transcrito
43
+
44
+ except Exception as e:
45
+ return f"Error transcribing audio: {str(e)}"
46
+
47
  def get_youtube_transcript(video_url: str) -> str:
48
  try:
49
  # Extraer ID del video más robustamente
 
84
  if not os.path.exists(file_path):
85
  return "Error: File not found at "
86
 
87
+ file_ext = os.path.splitext(file_path)[1].lower()
88
+
89
+ if file_ext in ['.mp3', '.wav', '.m4a', '.flac', '.ogg']:
90
+ transcription = transcribe_audio_openai(file_path)
91
+ return f"Audio transcription: {transcription}"
92
+
93
+ elif file_ext in ['.txt', '.pdf', '.docx', '.csv', '.json', '.md']:
94
+ reader = SimpleDirectoryReader(input_files=[file_path])
95
+ documents = reader.load_data()
96
 
97
  if not documents:
98
  return "Error: No content found in the file."