Final_Assignment_Template

Sleeping

App Files Files Community

RalphThings commited on May 2, 2025

Commit

e944048

verified ·

1 Parent(s): b70ff0c

Update app.py

Browse files

Files changed (1) hide show

app.py +202 -171

app.py CHANGED Viewed

@@ -1,12 +1,14 @@
 from transformers import pipeline
 import os
 import re
-import json
 import torch
 import gradio as gr
 import requests
 import inspect
 import pandas as pd
 from youtube_transcript_api import YouTubeTranscriptApi
 import chess, chess.engine
 from bs4 import BeautifulSoup
@@ -18,189 +20,218 @@ from SPARQLWrapper import SPARQLWrapper, JSON
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 HF_TOKEN = os.getenv("HF_TOKEN", None)
 # --- Basic Agent Definition ---
 # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
 class BasicAgent:
-    WIKI_API = "https://en.wikipedia.org/w/api.php"
-    VEGETABLE_SET = {
-        "bell pepper","broccoli","celery","green beans",
-        "lettuce","zucchini","sweet potatoes"
-    }
     def __init__(self):
         # initialize HF inference pipeline once
         if HF_TOKEN is None:
             raise ValueError("HF_TOKEN not set in environment")
-        self.generator = pipeline("text-generation", model="EleutherAI/gpt-neo-125M")
         # The GAIA system prompt (no "FINAL ANSWER:" at the end)
-        self.system_prompt = (
-            "You are a concise AI assistant. "
-            "Answer in as few words as possible—a number, a few words, or a comma-separated list. "
-            "No commentary, prefixes, or units.\n\n"
-        )
         print("BasicAgent initialized with LLM.")
-        # Stockfish location—adjust path if needed
-        self.stockfish_path = "/usr/bin/stockfish"
-    # --- Tool 1: Wikipedia raw wikitext fetch ---
-    def wiki_get_page(self, title: str) -> str:
-        params = {
-            "action": "query","format": "json",
-            "prop": "revisions","rvprop": "content","rvslots": "*",
-            "titles": title
-        }
-        r = requests.get(self.WIKI_API, params=params, timeout=10)
-        pages = r.json()["query"]["pages"]
-        page = next(iter(pages.values()))
-        return page["revisions"][0]["slots"]["main"]["*"]
-    # --- Tool 2: YouTube transcript ---
-    def youtube_transcript(self, video_id: str) -> str:
-        transcript = YouTubeTranscriptApi().fetch_transcript(video_id)
-        return " ".join(t["text"] for t in transcript)
-    # --- Tool 3: reverse text ---
-    def reverse_text(self, text: str) -> str:
-        return text[::-1]
-    # --- Tool 4: Chess best move via Stockfish ---
-    def chess_best_move(self, fen: str, time_limit: float = 0.1) -> str:
-        board = chess.Board(fen)
-        engine = chess.engine.SimpleEngine.popen_uci(self.stockfish_path)
-        result = engine.play(board, chess.engine.Limit(time=time_limit))
-        engine.quit()
-        return result.move.uci()
-    # --- Tool 5: Table non-commutativity ---
-    def find_non_commutative(self, table: dict) -> list:
-        elems = set(x for x,_ in table.keys())
-        bad = set()
-        for x in elems:
-            for y in elems:
-                if table[(x,y)] != table[(y,x)]:
-                    bad.update([x,y])
-        return sorted(bad)
-    # --- Tool 6: LibreTexts scraping (generic) ---
-    def libretext_extract(self, url: str, selector: str) -> str:
-        r = requests.get(url, timeout=10)
-        soup = BeautifulSoup(r.text, "html.parser")
-        return soup.select_one(selector).get_text(strip=True)
-    # --- Tool 7: Grocery vegetable classifier ---
-    def classify_vegetables(self, items: list[str]) -> list[str]:
-        vegs = [i for i in items if i in self.VEGETABLE_SET]
-        return sorted(vegs)
-    # --- Tool 8: Audio transcription via AssemblyAI ---
-    def transcribe_audio(self, audio_url: str) -> str:
-        transcriber = aai.Transcriber()
-        result = transcriber.transcribe(audio_url)
-        return result.text
-    # --- Tool 9: Actor role lookup (stub—for you to flesh out) ---
-    def actor_role(self, title: str, role_name: str, target_series: str) -> str:
-        # TODO: implement via OMDb/IMDbPy
-        return "UNKNOWN"
-    # --- Tool 10: Sandbox code execution ---
-    def execute_code(self, code: str) -> str:
-        local_ns = {}
-        exec(code, {"__builtins__": {}}, local_ns)
-        # assume user sets 'output' variable
-        return str(local_ns.get("output", ""))
-    # --- Tool 11: Baseball stats via statsapi ---
-    def yankee_at_bats_most_walks(self, year: int) -> int:
-        leaders = statsapi.team_leaders("walks", season=year, team=147)  # Yankees=147
-        pid = leaders[0]["id"]
-        stats = statsapi.player_stats(pid, "hitting", "season", season=year)
-        return stats["batting"][0]["atBats"]
-    # --- Tool 12: Olympics data scraping ---
-    def least_athletes_olympics(self, year: int) -> str:
-        url = f"https://en.wikipedia.org/wiki/{year}_Summer_Olympics"
-        r = requests.get(url); soup = BeautifulSoup(r.text,"html.parser")
-        # naive: look for first table with nation counts...
-        table = soup.find("table","wikitable")
-        rows = table.find_all("tr")[1:]
-        data = [(r.find_all("td")[0].get_text(strip=True),
-                 int(r.find_all("td")[1].get_text(strip=True)))
-                for r in rows]
-        min_val = min(c for _,c in data)
-        candidates = sorted([code for code,count in data if count==min_val])
-        return candidates[0]
-    # --- Tool 13: Wikidata SPARQL for NASA awards ---
-    def get_nasa_award_number(self, qid: str) -> str:
-        sparql = SPARQLWrapper("https://query.wikidata.org/sparql")
-        sparql.setQuery(f"""
-          SELECT ?award WHERE {{
-            wd:{qid} wdt:P496 ?award.
-          }}
-        """)
-        sparql.setReturnFormat(JSON)
-        res = sparql.query().convert()
-        return res["results"]["bindings"][0]["award"]["value"]
     # --- Core dispatcher/fallback ---
     def __call__(self, question: str) -> str:
-        q = question.strip()
-        # 1) studio albums by Mercedes Sosa 2000–2009
-        if "Mercedes Sosa" in q and "studio albums" in q:
-            text = self.wiki_get_page("Mercedes Sosa discography")
-            years = re.findall(r"\b(20\d\d)\b", text)
-            # count entries between 2000 and 2009
-            return str(sum(1 for y in years if 2000 <= int(y) <= 2009))
-        # 2) YouTube species count
-        m = re.search(r"youtube\.com/watch\?v=([A-Za-z0-9_\-]+)", q)
-        if m and "bird species" in q:
-            transcript = self.youtube_transcript(m.group(1))
-            nums = [int(n) for n in re.findall(r"(\d+)\s+species", transcript)]
-            return str(max(nums) if nums else 0)
-        # 3) reversed-text puzzles
-        if q.startswith((".",'"')) and "dnatsrednu" in q:
-            inner = q.strip('"').strip()[::-1]
-            # extract the core sentence
-            return inner
-        # 4) chess win move (FEN)
-        if "Review the chess position" in q:
-            # user would have attached FEN in question_data["files"], but here we default example
-            fen = "..."  # TODO: extract from files
-            return self.chess_best_move(fen)
-        # 5) operation table non-commutativity
-        if "counter-examples" in q:
-            # assume question_data carries a JSON-able table under item["table"]
-            table = json.loads(question_data.get("table_json","{}"))
-            bad = self.find_non_commutative(table)
-            return ",".join(bad)
-        # 6) grocery list vegetables
-        if "grocery list" in q and "vegetables" in q:
-            items = re.findall(r"\b[\w\s]+(?=,|$)", q)
-            vegs = self.classify_vegetables([i.strip() for i in items])
-            return ",".join(vegs)
-        # 7) transcript-based page numbers or ingredients
-        if q.lower().startswith("i was out sick") or "strawberry pie.mp3" in q:
-            # use URL or path from item["files"]
-            audio_url = question_data.get("audio_url")
-            text = self.transcribe_audio(audio_url)
-            # depends: page numbers or ingredients
-            nums = sorted(set(re.findall(r"\b(\d+)\b", text)), key=int)
-            return ",".join(nums)
-        # ... extend further for other tools ...
-        # fallback to LLM
         prompt = f"{self.system_prompt}Q: {q}\nA:"
-        out = self.generator(prompt, max_new_tokens=16, return_full_text=False)
-        return out[0]["generated_text"].strip()
 def run_and_submit_all( profile: gr.OAuthProfile | None):
     """

 from transformers import pipeline
 import os
 import re
 import torch
 import gradio as gr
 import requests
 import inspect
 import pandas as pd
+from langchain_huggingface.llms import HuggingFacePipeline
+from langchain_core.tools import tool
+from langchain_core.agents import AgentExecutor, JsonOutputParser
 from youtube_transcript_api import YouTubeTranscriptApi
 import chess, chess.engine
 from bs4 import BeautifulSoup
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 HF_TOKEN = os.getenv("HF_TOKEN", None)
+@tool(
+    name="wiki_get_page",
+    description="Fetch raw wikitext for a given Wikipedia page title",
+    inputs={"title": "string"},
+    output_type="string",
+)
+def wiki_get_page(title: str) -> str:
+    API = "https://en.wikipedia.org/w/api.php"
+    params = {"action": "query", "format": "json", "prop": "revisions", "rvprop": "content", "rvslots": "*", "titles": title}
+    data = requests.get(API, params=params, timeout=10).json()
+    page = next(iter(data["query"]["pages"].values()))
+    return page["revisions"][0]["slots"]["main"]["*"]
+@tool(
+    name="youtube_transcript",
+    description="Retrieve transcript for a YouTube video ID",
+    inputs={"video_id": "string"},
+    output_type="string",
+)
+def youtube_transcript(video_id: str) -> str:
+    transcript = YouTubeTranscriptApi().fetch_transcript(video_id)
+    return " ".join(t["text"] for t in transcript)
+@tool(
+    name="reverse_text",
+    description="Reverse the input string",
+    inputs={"text": "string"},
+    output_type="string",
+)
+def reverse_text(text: str) -> str:
+    return text[::-1]
+@tool(
+    name="chess_best_move",
+    description="Return best move in UCI notation for given FEN",
+    inputs={"fen": "string", "time_limit": "float"},
+    output_type="string",
+)
+def chess_best_move(fen: str, time_limit: float = 0.1) -> str:
+    board = chess.Board(fen)
+    engine = chess.engine.SimpleEngine.popen_uci("/usr/bin/stockfish")
+    result = engine.play(board, chess.engine.Limit(time=time_limit))
+    engine.quit()
+    return result.move.uci()
+@tool(
+    name="find_non_commutative",
+    description="Find elements involved in non-commutativity from operation table",
+    inputs={"table": "dict"},
+    output_type="list[string]",
+)
+def find_non_commutative(table: dict) -> list:
+    elems = set(x for x,_ in table.keys())
+    bad = set()
+    for x in elems:
+        for y in elems:
+            if table[(x,y)] != table[(y,x)]:
+                bad.update([x,y])
+    return sorted(bad)
+@tool(
+    name="libretext_extract",
+    description="Extract text from LibreTexts URL using CSS selector",
+    inputs={"url": "string", "selector": "string"},
+    output_type="string",
+)
+def libretext_extract(url: str, selector: str) -> str:
+    r = requests.get(url, timeout=10)
+    soup = BeautifulSoup(r.text, "html.parser")
+    return soup.select_one(selector).get_text(strip=True)
+@tool(
+    name="classify_vegetables",
+    description="Return alphabetized list of vegetables from input list",
+    inputs={"items": "list[string]"},
+    output_type="list[string]",
+)
+def classify_vegetables(items: list) -> list:
+    VEGETABLE_SET = {"bell pepper","broccoli","celery","green beans","lettuce","zucchini","sweet potatoes"}
+    return sorted([i for i in items if i in VEGETABLE_SET])
+@tool(
+    name="transcribe_audio",
+    description="Transcribe audio file or URL using AssemblyAI",
+    inputs={"audio_url": "string"},
+    output_type="string",
+)
+def transcribe_audio(audio_url: str) -> str:
+    transcriber = aai.Transcriber()
+    result = transcriber.transcribe(audio_url)
+    return result.text
+@tool(
+    name="actor_role",
+    description="Lookup actor role via OMDb API (stub implementation)",
+    inputs={"title": "string", "role_name": "string", "target_series": "string"},
+    output_type="string",
+)
+def actor_role(title: str, role_name: str, target_series: str) -> str:
+    return "UNKNOWN"
+@tool(
+    name="execute_code",
+    description="Execute Python code snippet and return 'output' variable",
+    inputs={"code": "string"},
+    output_type="string",
+)
+def execute_code(code: str) -> str:
+    local_ns = {}
+    exec(code, {"__builtins__": {}}, local_ns)
+    return str(local_ns.get("output", ""))
+@tool(
+    name="yankee_at_bats_most_walks",
+    description="Return at bats for Yankee with most walks in given season",
+    inputs={"year": "int"},
+    output_type="int",
+)
+def yankee_at_bats_most_walks(year: int) -> int:
+    leaders = statsapi.team_leaders("walks", season=year, team=147)
+    pid = leaders[0]["id"]
+    stats = statsapi.player_stats(pid, "hitting", "season", season=year)
+    return stats["batting"][0]["atBats"]
+@tool(
+    name="least_athletes_olympics",
+    description="Return IOC code of country with least athletes in given Olympics year",
+    inputs={"year": "int"},
+    output_type="string",
+)
+def least_athletes_olympics(year: int) -> str:
+    url = f"https://en.wikipedia.org/wiki/{year}_Summer_Olympics"
+    r = requests.get(url)
+    soup = BeautifulSoup(r.text,"html.parser")
+    table = soup.find("table","wikitable")
+    rows = table.find_all("tr")[1:]
+    data = [(r.find_all("td")[0].get_text(strip=True), int(r.find_all("td")[1].get_text(strip=True))) for r in rows]
+    min_val = min(c for _,c in data)
+    candidates = sorted([code for code,count in data if count==min_val])
+    return candidates[0]
+@tool(
+    name="get_nasa_award_number",
+    description="Get NASA award number for a Wikidata QID",
+    inputs={"qid": "string"},
+    output_type="string",
+)
+def get_nasa_award_number(qid: str) -> str:
+    sparql = SPARQLWrapper("https://query.wikidata.org/sparql")
+    sparql.setQuery(f'SELECT ?award WHERE {{ wd:{qid} wdt:P496 ?award. }}')
+    sparql.setReturnFormat(JSON)
+    res = sparql.query().convert()
+    return res["results"]["bindings"][0]["award"]["value"]
+TOOLS = [
+    wiki_get_page,
+    youtube_transcript,
+    reverse_text,
+    chess_best_move,
+    find_non_commutative,
+    libretext_extract,
+    classify_vegetables,
+    transcribe_audio,
+    actor_role,
+    execute_code,
+    yankee_at_bats_most_walks,
+    least_athletes_olympics,
+    get_nasa_award_number,
+]
+SYSTEM_MESSAGE = """You are a concise AI assistant with access to the following tools:
+- wiki_get_page(title: string) → string
+- youtube_transcript(video_id: string) → string
+- reverse_text(text: string) → string
+- chess_best_move(fen: string, time_limit: float) → string
+- find_non_commutative(table: dict) → list[string]
+- libretext_extract(url: string, selector: string) → string
+- classify_vegetables(items: list[string]) → list[string]
+- transcribe_audio(audio_url: string) → string
+- actor_role(title: string, role_name: string, target_series: string) → string
+- execute_code(code: string) → string
+- yankee_at_bats_most_walks(year: int) → int
+- least_athletes_olympics(year: int) → string
+- get_nasa_award_number(qid: string) → string
+When you need to use a tool, respond exactly with:
+Action: <tool_name>(<arg_name>=<value>, ...)
+Then wait for the tool’s output before continuing.
+Once you have all the information, provide your final answer in as few words as possible, with no extra commentary or prefixes.
+"""
 # --- Basic Agent Definition ---
 # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
 class BasicAgent:
     def __init__(self):
         # initialize HF inference pipeline once
         if HF_TOKEN is None:
             raise ValueError("HF_TOKEN not set in environment")
+        self.generator = pipeline("text-generation", model="EleutherAI/gpt-neo-125M", max_new_tokens=16)
+        self.llm = HuggingFacePipeline.from_pipeline(self.generator)
+        self.llm = self.llm.bind_tools(TOOLS)
         # The GAIA system prompt (no "FINAL ANSWER:" at the end)
+        self.system_prompt = SYSTEM_MESSAGE
         print("BasicAgent initialized with LLM.")
     # --- Core dispatcher/fallback ---
     def __call__(self, question: str) -> str:
         prompt = f"{self.system_prompt}Q: {q}\nA:"
+        #out = self.generator(prompt, max_new_tokens=16, return_full_text=False)
+        #return out[0]["generated_text"].strip()
+        agent = AgentExecutor(agent=self.llm, tools=TOOLS, prompt=prompt, verbose=False, return_intermediate_steps=False)
+        result = agent.invoke({"input": question})
+        return JsonOutputParser().parse(result)
 def run_and_submit_all( profile: gr.OAuthProfile | None):
     """