Final_Assignment_Template

Sleeping

App Files Files Community

dalybuilds commited on Jul 19, 2025

Commit

80e8087

verified ·

1 Parent(s): 6acbbf1

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -51

app.py CHANGED Viewed

@@ -5,11 +5,9 @@ import pandas as pd
 from io import BytesIO
 import re
 import subprocess
-import base64
 # --- Tool-specific Imports ---
 from pytube import YouTube
-from langchain_huggingface import HuggingFaceInferenceAPI
 # --- LangChain & Groq Imports ---
 from groq import Groq
@@ -25,7 +23,10 @@ TEMP_DIR = "/tmp"
 # --- Tool Definition: Audio File Transcription ---
 def transcribe_audio_file(task_id: str) -> str:
-    # (This function is complete and correct from the previous version)
     print(f"Tool 'transcribe_audio_file' called with task_id: {task_id}")
     try:
         file_url = f"{DEFAULT_API_URL}/files/{task_id}"
@@ -39,9 +40,12 @@ def transcribe_audio_file(task_id: str) -> str:
     except Exception as e:
         return f"Error during audio file transcription: {e}"
-# --- Tool Definition: Video Transcription via FFmpeg ---
 def transcribe_youtube_video(video_url: str) -> str:
-    # (This function is complete and correct from the previous version)
     print(f"Tool 'transcribe_youtube_video' (ffmpeg) called with URL: {video_url}")
     video_path, audio_path = None, None
     try:
@@ -62,66 +66,30 @@ def transcribe_youtube_video(video_url: str) -> str:
         if video_path and os.path.exists(video_path): os.remove(video_path)
         if audio_path and os.path.exists(audio_path): os.remove(audio_path)
-# --- NEW TOOL Definition: Image Analysis ---
-def analyze_image_from_task_id(task_id: str) -> str:
-    """
-    Downloads an image file for a given task_id and analyzes it using a Vision-Language Model.
-    Use this tool ONLY when a question explicitly mentions an image.
-    """
-    print(f"Tool 'analyze_image_from_task_id' called with task_id: {task_id}")
-    try:
-        file_url = f"{DEFAULT_API_URL}/files/{task_id}"
-        print(f"Downloading image from: {file_url}")
-        response = requests.get(file_url)
-        response.raise_for_status()
-        # Initialize the VLM client
-        vlm_client = HuggingFaceInferenceAPI(
-            model_id="llava-hf/llava-1.5-7b-hf",
-            token=os.getenv("HF_TOKEN")
-        )
-        print("Analyzing image with Llava...")
-        # The prompt for the VLM needs to be specific.
-        # We can just ask it to describe the image in detail.
-        text_prompt = "Describe the image in detail."
-        result = vlm_client.image_to_text(image=response.content, prompt=text_prompt)
-        print(f"Image analysis successful. Result: {result}")
-        return result
-    except Exception as e:
-        return f"Error during image analysis: {e}"
 # --- Agent Definition ---
 class LangChainAgent:
-    def __init__(self, groq_api_key: str, tavily_api_key: str, hf_token: str):
         self.llm = ChatGroq(model_name="llama3-70b-8192", groq_api_key=groq_api_key, temperature=0.0)
         self.tools = [
             TavilySearchResults(name="web_search", max_results=3, tavily_api_key=tavily_api_key, description="A search engine for finding up-to-date information on the internet."),
             Tool(name="audio_file_transcriber", func=transcribe_audio_file, description="Use this for questions mentioning an audio file (.mp3, recording). Input MUST be the task_id."),
             Tool(name="youtube_video_transcriber", func=transcribe_youtube_video, description="Use this for questions with a youtube.com URL. Input MUST be the URL."),
-            Tool(name="image_analyzer", func=analyze_image_from_task_id, description="Use this for questions mentioning an image. Input MUST be the task_id."),
         ]
         prompt = ChatPromptTemplate.from_messages([
             ("system", (
-                "You are a powerful problem-solving agent. Your goal is to answer the user's question accurately. "
-                "You have access to a web search tool, an audio file transcriber, a YouTube video transcriber, and an image analyzer.\n\n"
                 "**REASONING PROCESS:**\n"
-                "1.  **Analyze the question:** Determine if a tool is needed. Is it a general knowledge question, or does it mention a specific file type (audio, video, image) or URL?\n"
                 "2.  **Select ONE tool based on the question:**\n"
                 "    - For general knowledge, facts, or current events: use `web_search`.\n"
                 "    - For an audio file, .mp3, or voice memo: use `audio_file_transcriber` with the `task_id`.\n"
                 "    - For a youtube.com URL: use `youtube_video_transcriber` with the URL.\n"
-                "    - For an image: use `image_analyzer` with the `task_id`.\n"
-                "    - For math or simple logic: answer directly.\n"
                 "3.  **Execute and Answer:** After using a tool, analyze the result and provide ONLY THE FINAL ANSWER."
             )),
             ("human", "Question: {input}\nTask ID: {task_id}"),
             ("placeholder", "{agent_scratchpad}"),
         ])
         agent = create_tool_calling_agent(self.llm, self.tools, prompt)
         self.agent_executor = AgentExecutor(agent=agent, tools=self.tools, verbose=True, handle_parsing_errors=True)
@@ -144,9 +112,8 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
     try:
         groq_api_key = os.getenv("GROQ_API_KEY")
         tavily_api_key = os.getenv("TAVILY_API_KEY")
-        hf_token = os.getenv("HF_TOKEN")
-        if not all([groq_api_key, tavily_api_key, hf_token]): raise ValueError("An API key (GROQ, TAVILY, or HF) is missing.")
-        agent = LangChainAgent(groq_api_key=groq_api_key, tavily_api_key=tavily_api_key, hf_token=hf_token)
     except Exception as e: return f"Error initializing agent: {e}", None
     questions_url = f"{DEFAULT_API_URL}/questions"
@@ -180,8 +147,8 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
 # --- Gradio Interface ---
 with gr.Blocks() as demo:
-    gr.Markdown("# Ultimate Agent Runner (Search, Audio, Video, Vision)")
-    gr.Markdown("This agent can search, transcribe audio files, transcribe YouTube videos, and analyze images.")
     gr.LoginButton()
     run_button = gr.Button("Run Evaluation & Submit All Answers")
     status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
@@ -190,7 +157,7 @@ with gr.Blocks() as demo:
 if __name__ == "__main__":
     print("\n" + "-"*30 + " App Starting " + "-"*30)
-    for key in ["GROQ_API_KEY", "TAVILY_API_KEY", "HF_TOKEN"]:
         print(f"✅ {key} secret is set." if os.getenv(key) else f"⚠️ WARNING: {key} secret is not set.")
     print("-"*(60 + len(" App Starting ")) + "\n")
     demo.launch(debug=True, share=False)

 from io import BytesIO
 import re
 import subprocess
 # --- Tool-specific Imports ---
 from pytube import YouTube
 # --- LangChain & Groq Imports ---
 from groq import Groq
 # --- Tool Definition: Audio File Transcription ---
 def transcribe_audio_file(task_id: str) -> str:
+    """
+    Downloads an audio file (.mp3) for a given task_id, transcribes it, and returns the text.
+    Use this tool ONLY when a question explicitly mentions an audio file, .mp3, recording, or voice memo.
+    """
     print(f"Tool 'transcribe_audio_file' called with task_id: {task_id}")
     try:
         file_url = f"{DEFAULT_API_URL}/files/{task_id}"
     except Exception as e:
         return f"Error during audio file transcription: {e}"
+# --- Tool Definition: Video Transcription (using FFmpeg) ---
 def transcribe_youtube_video(video_url: str) -> str:
+    """
+    Downloads a YouTube video from a URL, extracts its audio using FFmpeg, and transcribes it.
+    Use this tool ONLY when a question provides a youtube.com URL.
+    """
     print(f"Tool 'transcribe_youtube_video' (ffmpeg) called with URL: {video_url}")
     video_path, audio_path = None, None
     try:
         if video_path and os.path.exists(video_path): os.remove(video_path)
         if audio_path and os.path.exists(audio_path): os.remove(audio_path)
 # --- Agent Definition ---
 class LangChainAgent:
+    def __init__(self, groq_api_key: str, tavily_api_key: str):
         self.llm = ChatGroq(model_name="llama3-70b-8192", groq_api_key=groq_api_key, temperature=0.0)
         self.tools = [
             TavilySearchResults(name="web_search", max_results=3, tavily_api_key=tavily_api_key, description="A search engine for finding up-to-date information on the internet."),
             Tool(name="audio_file_transcriber", func=transcribe_audio_file, description="Use this for questions mentioning an audio file (.mp3, recording). Input MUST be the task_id."),
             Tool(name="youtube_video_transcriber", func=transcribe_youtube_video, description="Use this for questions with a youtube.com URL. Input MUST be the URL."),
         ]
         prompt = ChatPromptTemplate.from_messages([
             ("system", (
+                "You are a powerful problem-solving agent. You have access to a web search tool, an audio file transcriber, and a YouTube video transcriber.\n\n"
                 "**REASONING PROCESS:**\n"
+                "1.  **Analyze the question:** Determine if a tool is needed. Is it a general knowledge question, or does it mention an audio file or a YouTube URL?\n"
                 "2.  **Select ONE tool based on the question:**\n"
                 "    - For general knowledge, facts, or current events: use `web_search`.\n"
                 "    - For an audio file, .mp3, or voice memo: use `audio_file_transcriber` with the `task_id`.\n"
                 "    - For a youtube.com URL: use `youtube_video_transcriber` with the URL.\n"
+                "    - For anything else (like images, which you cannot see, or math), you must answer directly without using a tool.\n"
                 "3.  **Execute and Answer:** After using a tool, analyze the result and provide ONLY THE FINAL ANSWER."
             )),
             ("human", "Question: {input}\nTask ID: {task_id}"),
             ("placeholder", "{agent_scratchpad}"),
         ])
         agent = create_tool_calling_agent(self.llm, self.tools, prompt)
         self.agent_executor = AgentExecutor(agent=agent, tools=self.tools, verbose=True, handle_parsing_errors=True)
     try:
         groq_api_key = os.getenv("GROQ_API_KEY")
         tavily_api_key = os.getenv("TAVILY_API_KEY")
+        if not all([groq_api_key, tavily_api_key]): raise ValueError("GROQ or TAVILY API key is missing.")
+        agent = LangChainAgent(groq_api_key=groq_api_key, tavily_api_key=tavily_api_key)
     except Exception as e: return f"Error initializing agent: {e}", None
     questions_url = f"{DEFAULT_API_URL}/questions"
 # --- Gradio Interface ---
 with gr.Blocks() as demo:
+    gr.Markdown("# Ultimate Agent Runner (Search, Audio, Video)")
+    gr.Markdown("This agent can search, transcribe audio files, and transcribe YouTube videos.")
     gr.LoginButton()
     run_button = gr.Button("Run Evaluation & Submit All Answers")
     status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
 if __name__ == "__main__":
     print("\n" + "-"*30 + " App Starting " + "-"*30)
+    for key in ["GROQ_API_KEY", "TAVILY_API_KEY"]:
         print(f"✅ {key} secret is set." if os.getenv(key) else f"⚠️ WARNING: {key} secret is not set.")
     print("-"*(60 + len(" App Starting ")) + "\n")
     demo.launch(debug=True, share=False)