Cmuroc27 commited on
Commit
9808761
·
1 Parent(s): 5609ae5

mejoras routing y prompts

Browse files
Files changed (2) hide show
  1. agents.py +12 -25
  2. tools.py +27 -7
agents.py CHANGED
@@ -36,36 +36,23 @@ class AlfredAdvancedWorkflow(Workflow):
36
  # Agente de busqueda
37
  self.web_agent = AgentWorkflow.from_tools_or_functions([search_tool],
38
  llm = llm,
39
- system_prompt="""You are a concise web research assistant.
40
-
41
- CRITICAL RULES:
42
- 1. Use web_search ONLY ONCE
43
- 2. After searching, provide a SHORT answer (1-3 sentences maximum).
44
- 3. Provide a clear, direct and short answer. Dont give extra explanations or introductions
45
- 4. Answer ONLY what was asked, nothing more
46
- 5. DO NOT search multiple times
47
- 6. If you can't find the answer, say "I don't know" briefly
48
-
49
- Format: Direct answer in 1-3 sentences.""")
50
 
51
  self.doc_agent = AgentWorkflow.from_tools_or_functions([read_document_tool,
52
  image_analyzer_tool, youtube_transcript_tol, calculator_tool],
53
  llm = llm,
54
 
55
- system_prompt = """Document processing assistant. Answer in 1 sentences max.
56
-
57
- AVAILABLE TOOLS:
58
- - read_document: Read files (PDF, TXT, CSV, etc.)
59
- - analyze_image: Analyze images
60
- - calculator: Do calculations
61
- - youtube_transcript: Get what is SAID in YouTube videos
62
-
63
- RULES:
64
- - Use ONE tool ONCE
65
- - For YouTube videos: Use youtube_transcript tool
66
- - Provide SHORT answer (just the fact requested)
67
-
68
- Answer format: Direct answer, 1 sentences.""")
69
 
70
  #self.reviewer = AgentWorkflow.from_tools_or_functions([], llm = llm,
71
  # system_prompt=" You are an expert reviewer. Your task is to review the provided answer to ensure its accuracy, completeness, and relevance to the question. Be concise as much as possible")
 
36
  # Agente de busqueda
37
  self.web_agent = AgentWorkflow.from_tools_or_functions([search_tool],
38
  llm = llm,
39
+ system_prompt="""You are a FACTUAL web search assistant. Rules:
40
+ 1. Use web_search tool ONCE
41
+ 2. Answer in 1-2 sentences MAXIMUM
42
+ 3. Be direct and factual
43
+ 4. If unsure, say "I don't know"
44
+ 5. NO introductions, NO explanations""")
 
 
 
 
 
45
 
46
  self.doc_agent = AgentWorkflow.from_tools_or_functions([read_document_tool,
47
  image_analyzer_tool, youtube_transcript_tol, calculator_tool],
48
  llm = llm,
49
 
50
+ system_prompt = """You are a CONCISE document processor. Rules:
51
+ 1. Use ONE appropriate tool ONCE
52
+ 2. Answer in 1 sentence MAXIMUM
53
+ 3. For YouTube: use youtube_transcript
54
+ 4. For calculations: use calculator
55
+ 5. Be direct - just the answer""")
 
 
 
 
 
 
 
 
56
 
57
  #self.reviewer = AgentWorkflow.from_tools_or_functions([], llm = llm,
58
  # system_prompt=" You are an expert reviewer. Your task is to review the provided answer to ensure its accuracy, completeness, and relevance to the question. Be concise as much as possible")
tools.py CHANGED
@@ -20,19 +20,39 @@ client = InferenceClient(HF_TOKEN)
20
 
21
  def get_youtube_transcript(video_url: str) -> str:
22
  try:
23
- video_id_match = re.search(r"(?:v=|youtu\.be/)([a-zA-Z0-9_-]{11})", video_url)
24
- if not video_id_match:
25
- return "Error: Invalid YouTube URL format."
26
- video_id = video_id_match.group(1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
28
- #Combine all the text segments
29
  full_transcript = " ".join([entry['text'] for entry in transcript_list])
30
-
31
- return f"The YouTube video Transcript es:\n{full_transcript}"
 
 
 
 
 
32
  except Exception as e:
33
  return f"Error retrieving transcript: {str(e)}"
34
 
35
 
 
 
36
  def read_document(file_path: str) -> str:
37
  try:
38
  if not os.path.exists(file_path):
 
20
 
21
  def get_youtube_transcript(video_url: str) -> str:
22
  try:
23
+ # Extraer ID del video más robustamente
24
+ video_id = None
25
+
26
+ # Patrón para youtube.com/watch?v=ID
27
+ match = re.search(r'(?:v=|\/)([0-9A-Za-z_-]{11}).*', video_url)
28
+ if match:
29
+ video_id = match.group(1)
30
+ # Patrón para youtu.be/ID
31
+ else:
32
+ match = re.search(r'youtu\.be\/([0-9A-Za-z_-]{11})', video_url)
33
+ if match:
34
+ video_id = match.group(1)
35
+
36
+ if not video_id:
37
+ return "Error: Could not extract YouTube video ID from URL"
38
+
39
+ print(f"Getting transcript for YouTube video: {video_id}")
40
+
41
  transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
 
42
  full_transcript = " ".join([entry['text'] for entry in transcript_list])
43
+
44
+ # Hacer el transcript más manejable
45
+ if len(full_transcript) > 1000:
46
+ full_transcript = full_transcript[:1000] + "... [truncated]"
47
+
48
+ return full_transcript
49
+
50
  except Exception as e:
51
  return f"Error retrieving transcript: {str(e)}"
52
 
53
 
54
+
55
+
56
  def read_document(file_path: str) -> str:
57
  try:
58
  if not os.path.exists(file_path):