Final_Assignment_Template_CURR

Sleeping

App Files Files Community

mdicio commited on May 18, 2025

Commit

1aa70af

1 Parent(s): 263a013

fd

Browse files

Files changed (7) hide show

.gitignore +9 -0
agent.py +16 -11
app.py +3 -0
app_template.py +53 -28
realreq.txt +12 -0
requirements.txt +11 -23
tools.py +18 -24

.gitignore ADDED Viewed

	@@ -0,0 +1,9 @@

+.env
+ragdata/
+chroma_store
+.python-version
+downloads/
+.python_version
+*.jsonl
+*__pycache__/
+*.log

agent.py CHANGED Viewed

@@ -1,5 +1,7 @@
 import os
 from dotenv import load_dotenv
 load_dotenv()
 # Import models from SmolaAgents
@@ -20,7 +22,7 @@ from tools import (
     TranscribeAudioTool,
     VisitWebpageTool,
     WikipediaSearchTool,
-    image_question_answering
 )
 # Import utility functions
@@ -69,11 +71,13 @@ class BoomBot:
             )
         elif self.provider == "anthropic":
             model_id = "anthropic/claude-3-5-haiku-latest"
-            return LiteLLMModel(model_id=model_id,
-                                temperature=0.6,
-                                max_tokens=8192,
-                                api_key=os.getenv("ANTHROPIC_API_KEY"))
         elif self.provider == "deepinfra":
             deepinfra_model = "Qwen/Qwen3-235B-A22B"
             return OpenAIServerModel(
@@ -277,7 +281,7 @@ class BoomBot:
             )
         # Run the agent with the given question
-        result = self.agent.generate_response(question)
         # Extract the final answer from the result
         final_answer = extract_final_answer(result)
@@ -286,7 +290,8 @@ class BoomBot:
 # Example of how to use this code (commented out)
-# if __name__ == "__main__":
-#     agent = BasicAgent()
-#     response = agent("What is the current population of Tokyo?", "population_query", True)
-#     print(f"Response: {response}")

 import os
 from dotenv import load_dotenv
 load_dotenv()
 # Import models from SmolaAgents
     TranscribeAudioTool,
     VisitWebpageTool,
     WikipediaSearchTool,
+    image_question_answering,
 )
 # Import utility functions
             )
         elif self.provider == "anthropic":
             model_id = "anthropic/claude-3-5-haiku-latest"
+            return LiteLLMModel(
+                model_id=model_id,
+                temperature=0.6,
+                max_tokens=8192,
+                api_key=os.getenv("ANTHROPIC_API_KEY"),
+            )
         elif self.provider == "deepinfra":
             deepinfra_model = "Qwen/Qwen3-235B-A22B"
             return OpenAIServerModel(
             )
         # Run the agent with the given question
+        result = self.agent.run(question)
         # Extract the final answer from the result
         final_answer = extract_final_answer(result)
 # Example of how to use this code (commented out)
+if __name__ == "__main__":
+    agent = BoomBot(provider="gemma")
+    question = "In the year 2020, where were koi fish found in the watershed with the id 02040203? Give only the name of the pond, lake, or stream where the fish were found, and not the name of the city or county."
+    response = agent.run(question=question, task_id="1", to_download=False)
+    print(f"Response: {response}")

app.py CHANGED Viewed

@@ -12,7 +12,10 @@ from agent import BoomBot
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 from dotenv import load_dotenv
 load_dotenv()
 # --- Basic Agent Definition --
 class BasicAgent:
     def __init__(self):

 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 from dotenv import load_dotenv
 load_dotenv()
 # --- Basic Agent Definition --
 class BasicAgent:
     def __init__(self):

app_template.py CHANGED Viewed

@@ -1,34 +1,38 @@
 import os
 import gradio as gr
-import requests
-import inspect
 import pandas as pd
 # (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 # --- Basic Agent Definition ---
 # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
 class BasicAgent:
     def __init__(self):
         print("BasicAgent initialized.")
     def __call__(self, question: str) -> str:
         print(f"Agent received question (first 50 chars): {question[:50]}...")
         fixed_answer = "This is a default answer."
         print(f"Agent returning fixed answer: {fixed_answer}")
         return fixed_answer
-def run_and_submit_all( profile: gr.OAuthProfile | None):
     """
     Fetches all questions, runs the BasicAgent on them, submits all answers,
     and displays the results.
     """
     # --- Determine HF Space Runtime URL and Repo URL ---
-    space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
     if profile:
-        username= f"{profile.username}"
         print(f"User logged in: {username}")
     else:
         print("User not logged in.")
@@ -55,16 +59,16 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
         response.raise_for_status()
         questions_data = response.json()
         if not questions_data:
-             print("Fetched questions list is empty.")
-             return "Fetched questions list is empty or invalid format.", None
         print(f"Fetched {len(questions_data)} questions.")
     except requests.exceptions.RequestException as e:
         print(f"Error fetching questions: {e}")
         return f"Error fetching questions: {e}", None
     except requests.exceptions.JSONDecodeError as e:
-         print(f"Error decoding JSON response from questions endpoint: {e}")
-         print(f"Response text: {response.text[:500]}")
-         return f"Error decoding server response for questions: {e}", None
     except Exception as e:
         print(f"An unexpected error occurred fetching questions: {e}")
         return f"An unexpected error occurred fetching questions: {e}", None
@@ -81,18 +85,36 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
             continue
         try:
             submitted_answer = agent(question_text)
-            answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
-            results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
         except Exception as e:
-             print(f"Error running agent on task {task_id}: {e}")
-             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
     if not answers_payload:
         print("Agent did not produce any answers to submit.")
         return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
-    # 4. Prepare Submission
-    submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
     status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
     print(status_update)
@@ -162,20 +184,19 @@ with gr.Blocks() as demo:
     run_button = gr.Button("Run Evaluation & Submit All Answers")
-    status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
     # Removed max_rows=10 from DataFrame constructor
     results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
-    run_button.click(
-        fn=run_and_submit_all,
-        outputs=[status_output, results_table]
-    )
 if __name__ == "__main__":
-    print("\n" + "-"*30 + " App Starting " + "-"*30)
     # Check for SPACE_HOST and SPACE_ID at startup for information
     space_host_startup = os.getenv("SPACE_HOST")
-    space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
     if space_host_startup:
         print(f"✅ SPACE_HOST found: {space_host_startup}")
@@ -183,14 +204,18 @@ if __name__ == "__main__":
     else:
         print("ℹ️  SPACE_HOST environment variable not found (running locally?).")
-    if space_id_startup: # Print repo URLs if SPACE_ID is found
         print(f"✅ SPACE_ID found: {space_id_startup}")
         print(f"   Repo URL: https://huggingface.co/spaces/{space_id_startup}")
-        print(f"   Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
     else:
-        print("ℹ️  SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
-    print("-"*(60 + len(" App Starting ")) + "\n")
     print("Launching Gradio Interface for Basic Agent Evaluation...")
-    demo.launch(debug=True, share=False)

+import inspect
 import os
 import gradio as gr
 import pandas as pd
+import requests
 # (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 # --- Basic Agent Definition ---
 # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
 class BasicAgent:
     def __init__(self):
         print("BasicAgent initialized.")
     def __call__(self, question: str) -> str:
         print(f"Agent received question (first 50 chars): {question[:50]}...")
         fixed_answer = "This is a default answer."
         print(f"Agent returning fixed answer: {fixed_answer}")
         return fixed_answer
+def run_and_submit_all(profile: gr.OAuthProfile | None):
     """
     Fetches all questions, runs the BasicAgent on them, submits all answers,
     and displays the results.
     """
     # --- Determine HF Space Runtime URL and Repo URL ---
+    space_id = os.getenv("SPACE_ID")  # Get the SPACE_ID for sending link to the code
     if profile:
+        username = f"{profile.username}"
         print(f"User logged in: {username}")
     else:
         print("User not logged in.")
         response.raise_for_status()
         questions_data = response.json()
         if not questions_data:
+            print("Fetched questions list is empty.")
+            return "Fetched questions list is empty or invalid format.", None
         print(f"Fetched {len(questions_data)} questions.")
     except requests.exceptions.RequestException as e:
         print(f"Error fetching questions: {e}")
         return f"Error fetching questions: {e}", None
     except requests.exceptions.JSONDecodeError as e:
+        print(f"Error decoding JSON response from questions endpoint: {e}")
+        print(f"Response text: {response.text[:500]}")
+        return f"Error decoding server response for questions: {e}", None
     except Exception as e:
         print(f"An unexpected error occurred fetching questions: {e}")
         return f"An unexpected error occurred fetching questions: {e}", None
             continue
         try:
             submitted_answer = agent(question_text)
+            answers_payload.append(
+                {"task_id": task_id, "submitted_answer": submitted_answer}
+            )
+            results_log.append(
+                {
+                    "Task ID": task_id,
+                    "Question": question_text,
+                    "Submitted Answer": submitted_answer,
+                }
+            )
         except Exception as e:
+            print(f"Error running agent on task {task_id}: {e}")
+            results_log.append(
+                {
+                    "Task ID": task_id,
+                    "Question": question_text,
+                    "Submitted Answer": f"AGENT ERROR: {e}",
+                }
+            )
     if not answers_payload:
         print("Agent did not produce any answers to submit.")
         return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
+    # 4. Prepare Submission
+    submission_data = {
+        "username": username.strip(),
+        "agent_code": agent_code,
+        "answers": answers_payload,
+    }
     status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
     print(status_update)
     run_button = gr.Button("Run Evaluation & Submit All Answers")
+    status_output = gr.Textbox(
+        label="Run Status / Submission Result", lines=5, interactive=False
+    )
     # Removed max_rows=10 from DataFrame constructor
     results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
+    run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
 if __name__ == "__main__":
+    print("\n" + "-" * 30 + " App Starting " + "-" * 30)
     # Check for SPACE_HOST and SPACE_ID at startup for information
     space_host_startup = os.getenv("SPACE_HOST")
+    space_id_startup = os.getenv("SPACE_ID")  # Get SPACE_ID at startup
     if space_host_startup:
         print(f"✅ SPACE_HOST found: {space_host_startup}")
     else:
         print("ℹ️  SPACE_HOST environment variable not found (running locally?).")
+    if space_id_startup:  # Print repo URLs if SPACE_ID is found
         print(f"✅ SPACE_ID found: {space_id_startup}")
         print(f"   Repo URL: https://huggingface.co/spaces/{space_id_startup}")
+        print(
+            f"   Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main"
+        )
     else:
+        print(
+            "ℹ️  SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined."
+        )
+    print("-" * (60 + len(" App Starting ")) + "\n")
     print("Launching Gradio Interface for Basic Agent Evaluation...")
+    demo.launch(debug=True, share=False)

realreq.txt ADDED Viewed

	@@ -0,0 +1,12 @@

+dotenv
+smolagents
+ollama
+chromadb
+pymupdf
+pandas
+bs4
+duckduckgo-search
+langchain_community
+markdownify
+smolagents[litellm]
+smolagents[openai]

requirements.txt CHANGED Viewed

@@ -1,25 +1,13 @@
-beautifulsoup4
-chromadb
-duckduckgo_search
 gradio
-huggingface_hub
-langchain
-langchain-chroma
-langchain-community
-langchain-core
-langchain-groq
-langchain-huggingface
-langchain-google-genai
-langchain-tavily
-langgraph
-markdownify
-pandas
-protobuf==3.20.*
-PyMuPDF
-python-dotenv
-requests
-sentence-transformers
 smolagents
-smolagents[openai]
-smolagents[toolkit]
-ollama

 gradio
+dotenv
 smolagents
+ollama
+chromadb
+pymupdf
+pandas
+bs4
+duckduckgo-search
+langchain_community
+markdownify
+smolagents[litellm]
+smolagents[openai]

tools.py CHANGED Viewed

@@ -7,10 +7,7 @@ import time
 import traceback
 from pathlib import Path
 from typing import Dict, List
-from urllib.parse import urlparse
-from pathlib import Path
-from ollama import chat
-from PIL import Image
 import chromadb
 import chromadb.utils.embedding_functions as embedding_functions
@@ -18,6 +15,7 @@ import fitz  # PyMuPDF
 import pandas as pd
 import requests
 from bs4 import BeautifulSoup
 from duckduckgo_search import DDGS
 from duckduckgo_search.exceptions import (
     ConversationLimitException,
@@ -25,6 +23,7 @@ from duckduckgo_search.exceptions import (
     RatelimitException,
     TimeoutException,
 )
 from langchain_community.document_loaders import (
     BSHTMLLoader,
     JSONLoader,
@@ -32,21 +31,16 @@ from langchain_community.document_loaders import (
     TextLoader,
     UnstructuredFileLoader,
 )
-from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_community.tools import BraveSearch
 from markdownify import markdownify
 from smolagents import Tool, tool
 from smolagents.utils import truncate_content
-from typing import Dict, List
-import requests
-from bs4 import BeautifulSoup
-from urllib.parse import quote_plus
-from dotenv import load_dotenv
 load_dotenv()
 class ReadFileContentTool(Tool):
     name = "read_file_content"
     description = """Reads local files in various formats (text, CSV, Excel, PDF, HTML, etc.) and returns their content as readable text. Automatically detects and processes the appropriate file format."""
@@ -295,7 +289,7 @@ class BraveWebSearchTool(Tool):
     output_type = "string"
     # api_key = os.getenv("BRAVE_SEARCH_API_KEY")
-    api_key=None
     count = 3
     char_limit = 4000  # Adjust based on LLM context window
     tool = BraveSearch.from_api_key(api_key=api_key, search_kwargs={"count": count})
@@ -491,9 +485,6 @@ class DuckDuckGoSearchTool(Tool):
     def forward(self, query: str) -> str:
         self._configure()
-        print(
-            f"EXECUTING TOOL: duckduckgo_search(query='{query}', top_results={top_results})"
-        )
         top_results = 5
@@ -551,6 +542,7 @@ class DuckDuckGoSearchTool(Tool):
         return f"❌ Failed to retrieve results after {max_retries} retries."
 huggingface_ef = embedding_functions.HuggingFaceEmbeddingFunction(
     model_name="sentence-transformers/all-mpnet-base-v2"
 )
@@ -565,6 +557,7 @@ SUPPORTED_EXTENSIONS = [
     ".htm",
 ]
 class AddDocumentToVectorStoreTool(Tool):
     name = "add_document_to_vector_store"
     description = "Processes a document and adds it to the vector database for semantic search. Automatically chunks files and creates text embeddings to enable powerful content retrieval."
@@ -632,6 +625,7 @@ class AddDocumentToVectorStoreTool(Tool):
             traceback.print_exc()
             return f"Error: {e}"
 class QueryVectorStoreTool(Tool):
     name = "query_downloaded_documents"
     description = "Performs semantic searches across your downloaded documents. Use detailed queries to find specific information, concepts, or answers from your collected resources."
@@ -640,16 +634,11 @@ class QueryVectorStoreTool(Tool):
         "query": {
             "type": "string",
             "description": "The search query. Ensure this is constructed intelligently so to retrieve the most relevant outputs.",
-        },
-        "top_k": {
-            "type": "integer",
-            "description": "Number of top results to retrieve. Usually between 3 and 30",
-            "nullable": True,
-        },
     }
     output_type = "string"
-    def forward(self, query: str, top_k: int = 5) -> str:
         collection_name = "vectorstore"
         if k < 3:
@@ -668,7 +657,7 @@ class QueryVectorStoreTool(Tool):
             results = collection.query(
                 query_texts=[query],
-                n_results=top_k,
             )
             formatted = []
@@ -686,6 +675,7 @@ class QueryVectorStoreTool(Tool):
             traceback.print_exc()
             return f"Error querying vector store: {e}"
 @tool
 def image_question_answering(image_path: str, prompt: str) -> str:
     """
@@ -722,6 +712,7 @@ def image_question_answering(image_path: str, prompt: str) -> str:
     return response.message.content.strip()
 class VisitWebpageTool(Tool):
     name = "visit_webpage"
     description = "Loads a webpage from a URL and converts its content to markdown format. Use this to browse websites, extract information, or identify downloadable resources from a specific web address."
@@ -956,6 +947,7 @@ class VisitWebpageTool(Tool):
         return content
 class ArxivSearchTool(Tool):
     name = "arxiv_search"
     description = """Searches arXiv for academic papers and returns structured information including titles, authors, publication dates, abstracts, and download links."""
@@ -1013,6 +1005,7 @@ class ArxivSearchTool(Tool):
         return "\n".join(output_lines).strip()
 def fetch_and_parse_arxiv(url: str) -> List[Dict[str, str]]:
     """
     Fetches the given arXiv advanced‐search URL, parses the HTML,
@@ -1075,6 +1068,7 @@ def fetch_and_parse_arxiv(url: str) -> List[Dict[str, str]]:
     return results
 def build_arxiv_url(
     query: str, from_date: str = None, to_date: str = None, size: int = 50
 ) -> str:

 import traceback
 from pathlib import Path
 from typing import Dict, List
+from urllib.parse import quote_plus, urlparse
 import chromadb
 import chromadb.utils.embedding_functions as embedding_functions
 import pandas as pd
 import requests
 from bs4 import BeautifulSoup
+from dotenv import load_dotenv
 from duckduckgo_search import DDGS
 from duckduckgo_search.exceptions import (
     ConversationLimitException,
     RatelimitException,
     TimeoutException,
 )
+from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_community.document_loaders import (
     BSHTMLLoader,
     JSONLoader,
     TextLoader,
     UnstructuredFileLoader,
 )
 from langchain_community.tools import BraveSearch
 from markdownify import markdownify
+from ollama import chat
+from PIL import Image
 from smolagents import Tool, tool
 from smolagents.utils import truncate_content
 load_dotenv()
 class ReadFileContentTool(Tool):
     name = "read_file_content"
     description = """Reads local files in various formats (text, CSV, Excel, PDF, HTML, etc.) and returns their content as readable text. Automatically detects and processes the appropriate file format."""
     output_type = "string"
     # api_key = os.getenv("BRAVE_SEARCH_API_KEY")
+    api_key = None
     count = 3
     char_limit = 4000  # Adjust based on LLM context window
     tool = BraveSearch.from_api_key(api_key=api_key, search_kwargs={"count": count})
     def forward(self, query: str) -> str:
         self._configure()
         top_results = 5
         return f"❌ Failed to retrieve results after {max_retries} retries."
 huggingface_ef = embedding_functions.HuggingFaceEmbeddingFunction(
     model_name="sentence-transformers/all-mpnet-base-v2"
 )
     ".htm",
 ]
 class AddDocumentToVectorStoreTool(Tool):
     name = "add_document_to_vector_store"
     description = "Processes a document and adds it to the vector database for semantic search. Automatically chunks files and creates text embeddings to enable powerful content retrieval."
             traceback.print_exc()
             return f"Error: {e}"
 class QueryVectorStoreTool(Tool):
     name = "query_downloaded_documents"
     description = "Performs semantic searches across your downloaded documents. Use detailed queries to find specific information, concepts, or answers from your collected resources."
         "query": {
             "type": "string",
             "description": "The search query. Ensure this is constructed intelligently so to retrieve the most relevant outputs.",
+        }
     }
     output_type = "string"
+    def forward(self, query: str) -> str:
         collection_name = "vectorstore"
         if k < 3:
             results = collection.query(
                 query_texts=[query],
+                n_results=k,
             )
             formatted = []
             traceback.print_exc()
             return f"Error querying vector store: {e}"
 @tool
 def image_question_answering(image_path: str, prompt: str) -> str:
     """
     return response.message.content.strip()
 class VisitWebpageTool(Tool):
     name = "visit_webpage"
     description = "Loads a webpage from a URL and converts its content to markdown format. Use this to browse websites, extract information, or identify downloadable resources from a specific web address."
         return content
 class ArxivSearchTool(Tool):
     name = "arxiv_search"
     description = """Searches arXiv for academic papers and returns structured information including titles, authors, publication dates, abstracts, and download links."""
         return "\n".join(output_lines).strip()
 def fetch_and_parse_arxiv(url: str) -> List[Dict[str, str]]:
     """
     Fetches the given arXiv advanced‐search URL, parses the HTML,
     return results
 def build_arxiv_url(
     query: str, from_date: str = None, to_date: str = None, size: int = 50
 ) -> str: