Final_Assignment_Template

Sleeping

App Files Files Community

benjosaur commited on Jun 28, 2025

Commit

59b66b3

1 Parent(s): 679df7e

Local Submitting Solution

Browse files

Files changed (4) hide show

app.py +112 -69
requirements.txt +2 -1
search.py +7 -0
tools.py +39 -22

app.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import os
-import gradio as gr
 import requests
 import inspect
 import pandas as pd
@@ -9,8 +10,9 @@ from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
 from tools import (
     APIProcessor,
     parse_youtube_video,
-    transcribe_image_from_link,
     transcribe_webpage,
 )
 from utils import format_final_answer
 from search import GoogleSearch
@@ -42,18 +44,25 @@ class BasicAgent:
         agent = AgentWorkflow.from_tools_or_functions(
             [
                 google_search,
                 google_image_search,
-                get_and_process_question_attachment,
                 parse_youtube_video,
-                transcribe_image_from_link,
                 transcribe_webpage,
             ],
             llm=self.llm,
             system_prompt=SYSTEM_PROMPT,
         )
         ctx = Context(agent)
-        handler = agent.run(question, ctx=ctx)
         async for ev in handler.stream_events():
             if isinstance(ev, ToolCallResult):
                 print("")
@@ -70,7 +79,8 @@ class BasicAgent:
         return final_answer
-async def run_and_submit_all(profile: gr.OAuthProfile | None):
     """
     Fetches all questions, runs the BasicAgent on them, submits all answers,
     and displays the results.
@@ -78,12 +88,13 @@ async def run_and_submit_all(profile: gr.OAuthProfile | None):
     # --- Determine HF Space Runtime URL and Repo URL ---
     space_id = os.getenv("SPACE_ID")  # Get the SPACE_ID for sending link to the code
-    if profile:
-        username = f"{profile.username}"
-        print(f"User logged in: {username}")
-    else:
-        print("User not logged in.")
-        return "Please Login to Hugging Face with the button.", None
     api_url = DEFAULT_API_URL
     questions_url = f"{api_url}/questions"
@@ -124,7 +135,7 @@ async def run_and_submit_all(profile: gr.OAuthProfile | None):
     results_log = []
     answers_payload = []
     print(f"Running agent on {len(questions_data)} questions...")
-    async for item in questions_data:
         task_id = item.get("task_id")
         question_text = item.get("question")
         file_name = item.get("file_name")
@@ -133,6 +144,9 @@ async def run_and_submit_all(profile: gr.OAuthProfile | None):
             continue
         try:
             submitted_answer = await agent(question_text, task_id, file_name)
             answers_payload.append(
                 {"task_id": task_id, "submitted_answer": submitted_answer}
             )
@@ -164,6 +178,7 @@ async def run_and_submit_all(profile: gr.OAuthProfile | None):
         "answers": answers_payload,
     }
     status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
     print(status_update)
     # 5. Submit
@@ -210,61 +225,89 @@ async def run_and_submit_all(profile: gr.OAuthProfile | None):
         return status_message, results_df
-# --- Build Gradio Interface using Blocks ---
-with gr.Blocks() as demo:
-    gr.Markdown("# Basic Agent Evaluation Runner")
-    gr.Markdown(
-        """
-        **Instructions:**
-        1.  Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
-        2.  Log in to your Hugging Face account using the button below. This uses your HF username for submission.
-        3.  Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
-        ---
-        **Disclaimers:**
-        Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
-        This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
-        """
-    )
-    gr.LoginButton()
-    run_button = gr.Button("Run Evaluation & Submit All Answers")
-    status_output = gr.Textbox(
-        label="Run Status / Submission Result", lines=5, interactive=False
-    )
-    # Removed max_rows=10 from DataFrame constructor
-    results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
-    run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
 if __name__ == "__main__":
-    print("\n" + "-" * 30 + " App Starting " + "-" * 30)
-    # Check for SPACE_HOST and SPACE_ID at startup for information
-    space_host_startup = os.getenv("SPACE_HOST")
-    space_id_startup = os.getenv("SPACE_ID")  # Get SPACE_ID at startup
-    if space_host_startup:
-        print(f"✅ SPACE_HOST found: {space_host_startup}")
-        print(f"   Runtime URL should be: https://{space_host_startup}.hf.space")
-    else:
-        print("ℹ️  SPACE_HOST environment variable not found (running locally?).")
-    if space_id_startup:  # Print repo URLs if SPACE_ID is found
-        print(f"✅ SPACE_ID found: {space_id_startup}")
-        print(f"   Repo URL: https://huggingface.co/spaces/{space_id_startup}")
-        print(
-            f"   Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main"
-        )
-    else:
-        print(
-            "ℹ️  SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined."
-        )
-    print("-" * (60 + len(" App Starting ")) + "\n")
-    print("Launching Gradio Interface for Basic Agent Evaluation...")
-    demo.launch(debug=True, share=False)

 import os
+# import gradio as gr
 import requests
 import inspect
 import pandas as pd
 from tools import (
     APIProcessor,
     parse_youtube_video,
+    transcribe_image_from_url,
     transcribe_webpage,
+    add_numbers,
 )
 from utils import format_final_answer
 from search import GoogleSearch
         agent = AgentWorkflow.from_tools_or_functions(
             [
+                add_numbers,
                 google_search,
                 google_image_search,
                 parse_youtube_video,
+                transcribe_image_from_url,
                 transcribe_webpage,
             ],
             llm=self.llm,
             system_prompt=SYSTEM_PROMPT,
         )
+        attached_contents = get_and_process_question_attachment()
+        user_message = (
+            question + f"\n\nContents of attached file: {file_name}" + attached_contents
+        )
         ctx = Context(agent)
+        handler = agent.run(user_message, ctx=ctx)
         async for ev in handler.stream_events():
             if isinstance(ev, ToolCallResult):
                 print("")
         return final_answer
+# async def run_and_submit_all(profile: gr.OAuthProfile | None):
+async def run_and_submit_all():
     """
     Fetches all questions, runs the BasicAgent on them, submits all answers,
     and displays the results.
     # --- Determine HF Space Runtime URL and Repo URL ---
     space_id = os.getenv("SPACE_ID")  # Get the SPACE_ID for sending link to the code
+    # if profile:
+    #     username = f"{profile.username}"
+    #     print(f"User logged in: {username}")
+    # else:
+    #     print("User not logged in.")
+    #     return "Please Login to Hugging Face with the button.", None
+    username = "benjosaur"
     api_url = DEFAULT_API_URL
     questions_url = f"{api_url}/questions"
     results_log = []
     answers_payload = []
     print(f"Running agent on {len(questions_data)} questions...")
+    for item in questions_data:
         task_id = item.get("task_id")
         question_text = item.get("question")
         file_name = item.get("file_name")
             continue
         try:
             submitted_answer = await agent(question_text, task_id, file_name)
+            print(f"Submitted Answer: {submitted_answer}")
+            print("==" * 50)
+            print("")
             answers_payload.append(
                 {"task_id": task_id, "submitted_answer": submitted_answer}
             )
         "answers": answers_payload,
     }
     status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
+    print(f"ANSWERS PAYLOAD: {answers_payload}")
     print(status_update)
     # 5. Submit
         return status_message, results_df
+# # --- Build Gradio Interface using Blocks ---
+# with gr.Blocks() as demo:
+#     gr.Markdown("# Basic Agent Evaluation Runner")
+#     gr.Markdown(
+#         """
+#         **Instructions:**
+#         1.  Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
+#         2.  Log in to your Hugging Face account using the button below. This uses your HF username for submission.
+#         3.  Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
+#         ---
+#         **Disclaimers:**
+#         Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
+#         This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
+#         """
+#     )
+#     gr.LoginButton()
+#     run_button = gr.Button("Run Evaluation & Submit All Answers")
+#     status_output = gr.Textbox(
+#         label="Run Status / Submission Result", lines=5, interactive=False
+#     )
+#     # Removed max_rows=10 from DataFrame constructor
+#     results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
+#     run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
+# async def main():
+#     agent = BasicAgent()
+#     api_url = DEFAULT_API_URL
+#     questions_url = f"{api_url}/questions"
+#     print(f"Fetching questions from: {questions_url}")
+#     response = requests.get(questions_url, timeout=15)
+#     response.raise_for_status()
+#     questions_data = response.json()
+#     # 3. Run your Agent
+#     results_log = []
+#     answers_payload = []
+#     print(f"Running agent on {len(questions_data)} questions...")
+#     item = questions_data[0]
+#     task_id = item.get("task_id")
+#     question_text = item.get("question")
+#     file_name = item.get("file_name")
+#     submitted_answer = await agent(question_text, task_id, file_name)
+#     answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
+#     results_log.append(
+#         {
+#             "Task ID": task_id,
+#             "Question": question_text,
+#             "Submitted Answer": submitted_answer,
+#         }
+#     )
 if __name__ == "__main__":
+    # print("\n" + "-" * 30 + " App Starting " + "-" * 30)
+    # # Check for SPACE_HOST and SPACE_ID at startup for information
+    # space_host_startup = os.getenv("SPACE_HOST")
+    # space_id_startup = os.getenv("SPACE_ID")  # Get SPACE_ID at startup
+    # if space_host_startup:
+    #     print(f"✅ SPACE_HOST found: {space_host_startup}")
+    #     print(f"   Runtime URL should be: https://{space_host_startup}.hf.space")
+    # else:
+    #     print("ℹ️  SPACE_HOST environment variable not found (running locally?).")
+    # if space_id_startup:  # Print repo URLs if SPACE_ID is found
+    #     print(f"✅ SPACE_ID found: {space_id_startup}")
+    #     print(f"   Repo URL: https://huggingface.co/spaces/{space_id_startup}")
+    #     print(
+    #         f"   Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main"
+    #     )
+    # else:
+    #     print(
+    #         "ℹ️  SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined."
+    #     )
+    # print("-" * (60 + len(" App Starting ")) + "\n")
+    # print("Launching Gradio Interface for Basic Agent Evaluation...")
+    # demo.launch(debug=True, share=False)
+    asyncio.run(run_and_submit_all())

requirements.txt CHANGED Viewed

@@ -10,4 +10,5 @@ yt-dlp
 html2text
 llama-index-utils-workflow
 llama-index-llms-huggingface-api
-asyncio

 html2text
 llama-index-utils-workflow
 llama-index-llms-huggingface-api
+asyncio
+pydub

search.py CHANGED Viewed

@@ -6,6 +6,7 @@ import aiohttp
 class GoogleSearch:
     def __init__(self):
         load_dotenv()
         self.api_key = os.environ["GOOGLE_API_KEY"]
         self.cse_id = os.getenv("GOOGLE_CSE_ID")
@@ -17,6 +18,9 @@ class GoogleSearch:
         Returns:
             dict: JSON response from Google API.
         """
         if not self.api_key or not self.cse_id:
             raise ValueError(
@@ -46,6 +50,9 @@ class GoogleSearch:
         Returns:
             dict: JSON response from Google API.
         """
         if not self.api_key or not self.cse_id:
             raise ValueError(

 class GoogleSearch:
     def __init__(self):
         load_dotenv()
+        self.counter = 0
         self.api_key = os.environ["GOOGLE_API_KEY"]
         self.cse_id = os.getenv("GOOGLE_CSE_ID")
         Returns:
             dict: JSON response from Google API.
         """
+        if self.counter > 1:
+            return "No more searches, move on"
+        self.counter += 1
         if not self.api_key or not self.cse_id:
             raise ValueError(
         Returns:
             dict: JSON response from Google API.
         """
+        if self.counter > 2:
+            return "No more searches, move on"
+        self.counter += 1
         if not self.api_key or not self.cse_id:
             raise ValueError(

tools.py CHANGED Viewed

@@ -11,14 +11,18 @@ import re
 import html2text
 from requests.exceptions import RequestException
 from bs4 import BeautifulSoup
-def transcribe_image_from_link(image_link: str) -> str:
-    """
     Args:
-        image_link (str): URL of the image to transcribe
-    """
-    client = OpenAI()  # Uses OPENAI_API_KEY environment variable
     response = client.chat.completions.create(
         model="gpt-4o",
@@ -35,7 +39,7 @@ def transcribe_image_from_link(image_link: str) -> str:
                     {
                         "type": "image_url",
                         "image_url": {
-                            "url": image_link,
                             "detail": "high",
                         },
                     },
@@ -68,7 +72,7 @@ def transcribe_webpage(website_url: str) -> str:
         content_div = soup.find("div", id="mw-content-text")
         if not content_div:
-            return "Main content not found."
         # Only extract <p> and <table> tags
         elements = content_div.find_all(["p", "table"])
@@ -95,7 +99,7 @@ def transcribe_webpage(website_url: str) -> str:
 def parse_youtube_video(youtube_url: str) -> str:
     """Returns text transcript of a youtube video
     Args:
-        youtube_url: the full url linking to the video to transcribe
     """
     load_dotenv()
     client = OpenAI()
@@ -107,7 +111,7 @@ def parse_youtube_video(youtube_url: str) -> str:
             {
                 "key": "FFmpegExtractAudio",
                 "preferredcodec": "mp3",
-                "preferredquality": "192",
             }
         ],
         "outtmpl": "%(title)s.%(ext)s",
@@ -119,7 +123,6 @@ def parse_youtube_video(youtube_url: str) -> str:
         # Download audio
         with yt_dlp.YoutubeDL(ydl_opts) as ydl:
             info = ydl.extract_info(youtube_url, download=True)
-            title = info["title"]
         # Find the downloaded audio file
         audio_file = None
@@ -131,13 +134,27 @@ def parse_youtube_video(youtube_url: str) -> str:
         if not audio_file:
             raise Exception("Audio file not found")
-        # Transcribe with Whisper
-        with open(audio_file, "rb") as audio:
-            transcript = client.audio.transcriptions.create(
-                model="gpt-4o-transcribe", file=audio
-            )
-        return {"title": title, "transcript": transcript.text}
 class APIProcessor:
@@ -236,9 +253,9 @@ if __name__ == "__main__":
     # response = audio_task_processor.get_and_process_attachment()
     # print(response)
-    # result = parse_youtube_video("https://www.youtube.com/watch?v=1htKBjuUWec")
-    # print(result)
-    text = transcribe_webpage(
-        "https://en.wikipedia.org/wiki/Mercedes_Sosa#Studio_albums"
-    )
-    print(text)

 import html2text
 from requests.exceptions import RequestException
 from bs4 import BeautifulSoup
+from pydub import AudioSegment
+def add_numbers(*nums: list[int]) -> int:
+    """Add a list of numbers
     Args:
+        nums: list of numbers"""
+def transcribe_image_from_url(image_url: str) -> str:
+    """Only works with full http urls"""
+    client = OpenAI()
     response = client.chat.completions.create(
         model="gpt-4o",
                     {
                         "type": "image_url",
                         "image_url": {
+                            "url": image_url,
                             "detail": "high",
                         },
                     },
         content_div = soup.find("div", id="mw-content-text")
         if not content_div:
+            content_div = soup.find("div")
         # Only extract <p> and <table> tags
         elements = content_div.find_all(["p", "table"])
 def parse_youtube_video(youtube_url: str) -> str:
     """Returns text transcript of a youtube video
     Args:
+        youtube_url: full url linking to the video to transcribe
     """
     load_dotenv()
     client = OpenAI()
             {
                 "key": "FFmpegExtractAudio",
                 "preferredcodec": "mp3",
+                "preferredquality": "64",
             }
         ],
         "outtmpl": "%(title)s.%(ext)s",
         # Download audio
         with yt_dlp.YoutubeDL(ydl_opts) as ydl:
             info = ydl.extract_info(youtube_url, download=True)
         # Find the downloaded audio file
         audio_file = None
         if not audio_file:
             raise Exception("Audio file not found")
+        audio = AudioSegment.from_mp3(audio_file)
+        chunk_length_ms = 5 * 1000 * 60
+        chunks = []
+        for i in range(0, len(audio), chunk_length_ms):
+            chunk = audio[i : i + chunk_length_ms]
+            chunk_path = os.path.join(temp_dir, f"chunk_{i // chunk_length_ms}.mp3")
+            chunk.export(chunk_path, format="mp3")
+            chunks.append(chunk_path)
+        # Transcribe each chunk
+        full_transcript = ""
+        for chunk_path in chunks:
+            with open(chunk_path, "rb") as audio_chunk:
+                transcript = client.audio.transcriptions.create(
+                    model="whisper-1",
+                    file=audio_chunk,
+                )
+                full_transcript += transcript.text + " "
+        return full_transcript.strip()
 class APIProcessor:
     # response = audio_task_processor.get_and_process_attachment()
     # print(response)
+    result = parse_youtube_video("https://www.youtube.com/watch?v=1htKBjuUWec")
+    print(result)
+    # text = transcribe_webpage(
+    #     "https://en.wikipedia.org/wiki/Mercedes_Sosa#Studio_albums"
+    # )
+    # print(text)