D3MI4N commited on
Commit
7e6c8c6
·
1 Parent(s): 5c5aae7

having more gaia examples as reference

Browse files
app.py CHANGED
@@ -6,7 +6,7 @@ import asyncio
6
  from typing import Optional
7
 
8
  from langchain_core.messages import HumanMessage
9
- from langgraph_final import graph # Your graph agent
10
 
11
  # Constants
12
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 
6
  from typing import Optional
7
 
8
  from langchain_core.messages import HumanMessage
9
+ from langgraph_final2 import graph # Your graph agent
10
 
11
  # Constants
12
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
langgraph_final.py CHANGED
@@ -143,7 +143,9 @@ if __name__ == "__main__":
143
  "What is the capital of France?",
144
  "List only the vegetables from: broccoli, apple, carrot. Alphabetize, comma‑separated.",
145
  "Given the Excel file at test_sales.xlsx, what were total sales for food? Express in USD with two decimals.",
146
- "Examine the video at ./test.wav. What is its transcript?"
 
 
147
  ]
148
  for q in tests:
149
  res = graph.invoke({"messages":[HumanMessage(content=q)]})
 
143
  "What is the capital of France?",
144
  "List only the vegetables from: broccoli, apple, carrot. Alphabetize, comma‑separated.",
145
  "Given the Excel file at test_sales.xlsx, what were total sales for food? Express in USD with two decimals.",
146
+ "Examine the video at ./test.wav. What is its transcript?",
147
+ "Who nominated the only Featured Article on English Wikipedia about a dinosaur that was promoted in November 2016?",
148
+ """ Examine the video at https://www.youtube.com/watch?v=1htKBjuUWec. What does Teal'c say in response to the question "Isn't that hot?" """
149
  ]
150
  for q in tests:
151
  res = graph.invoke({"messages":[HumanMessage(content=q)]})
langgraph_final2.py ADDED
@@ -0,0 +1,172 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ from dotenv import load_dotenv
4
+ import pandas as pd
5
+ import whisper
6
+
7
+ from langchain_openai import ChatOpenAI
8
+ from langchain_core.messages import SystemMessage, HumanMessage, AIMessage
9
+ from langchain_core.tools import tool
10
+ from langchain_community.tools.tavily_search import TavilySearchResults
11
+ from langchain_community.document_loaders import WikipediaLoader
12
+
13
+ # ** Retrieval imports **
14
+ from langchain_huggingface import HuggingFaceEmbeddings
15
+ from supabase.client import Client, create_client
16
+ from langchain_community.vectorstores import SupabaseVectorStore
17
+ from langchain.tools.retriever import create_retriever_tool
18
+
19
+ from langgraph.graph import StateGraph, MessagesState, START, END
20
+ from langgraph.prebuilt import ToolNode, tools_condition
21
+
22
+ load_dotenv()
23
+
24
+ # ─────────────────────────────────────────────────────────────────────────────
25
+ # SYSTEM PROMPT
26
+ # ─────────────────────────────────────────────────────────────────────────────
27
+ SYSTEM = SystemMessage(content="""
28
+ You are a razor‑sharp QA agent that answers in **one bare line, and only the answer**.
29
+ - Your response must be *only* the answer, with no introductory phrases, explanations, or conversational filler.
30
+ - Do NOT include any XML-like tags (e.g., <solution>).
31
+ - Use tools for factual lookups, audio transcription, or Excel analysis.
32
+ - Lists: comma‑separated, alphabetized if requested, no trailing period.
33
+ - Codes (IOC, country, etc.) bare.
34
+ - Currency in USD as 12.34 (no symbol).
35
+ - Never apologize or explain.
36
+ Begin.
37
+ """.strip())
38
+
39
+ # ─────────────────────────────────────────────────────────────────────────────
40
+ # TOOLS
41
+ # ─────────────────────────────────────────────────────────────────────────────
42
+ @tool
43
+ def web_search(query: str) -> dict:
44
+ """Search the web for up to 3 results."""
45
+ docs = TavilySearchResults(max_results=3).run(query)
46
+ return {"web_results": "\n".join(d["content"] for d in docs)}
47
+
48
+ @tool
49
+ def wiki_search(query: str) -> dict:
50
+ """Search Wikipedia for up to 2 pages."""
51
+ pages = WikipediaLoader(query=query, load_max_docs=2).load()
52
+ return {"wiki_results": "\n\n".join(p.page_content for p in pages)}
53
+
54
+ @tool
55
+ def transcribe_audio(path: str) -> dict:
56
+ """Transcribe a local audio file."""
57
+ import os
58
+ abs_path = os.path.abspath(path)
59
+ print(f"DEBUG: Checking for file at {abs_path}")
60
+ print(f"DEBUG: File exists? {os.path.isfile(abs_path)}")
61
+ print(f"DEBUG: Directory listing: {os.listdir(os.path.dirname(abs_path))}")
62
+ try:
63
+ import subprocess
64
+ subprocess.run(["ffmpeg", "-version"], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
65
+ model = whisper.load_model("base")
66
+ result = model.transcribe(abs_path)
67
+ return {"transcript": result["text"]}
68
+ except FileNotFoundError:
69
+ return {"transcript": "Transcription failed due to missing ffmpeg. Please install ffmpeg and ensure it is in your PATH."}
70
+ except Exception as e:
71
+ return {"transcript": f"Error during transcription: {e}"}
72
+
73
+ @tool
74
+ def read_excel(path: str, sheet_name: str = None, sample_rows: int = 5) -> dict:
75
+ """Return a summary of an Excel file for the LLM to query."""
76
+ df = pd.read_excel(path, sheet_name=sheet_name or 0)
77
+ sample = df.head(sample_rows)
78
+ summary = {
79
+ "columns": list(df.columns),
80
+ "types": {c: str(df[c].dtype) for c in df.columns},
81
+ "sample_csv": sample.to_csv(index=False),
82
+ "row_count": len(df)
83
+ }
84
+ return {"excel_summary": summary}
85
+
86
+ # ─────────────────────────────────────────────────────────────────────────────
87
+ # RETRIEVER TOOL (Supabase vector store)
88
+ # ─────────────────────────────────────────────────────────────────────────────
89
+ emb = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
90
+ supabase = create_client(os.environ["SUPABASE_URL"], os.environ["SUPABASE_SERVICE_KEY"])
91
+ vector_store = SupabaseVectorStore(
92
+ client=supabase,
93
+ embedding=emb,
94
+ table_name="documents",
95
+ query_name="match_documents_langchain",
96
+ )
97
+ retriever_tool = create_retriever_tool(
98
+ retriever=vector_store.as_retriever(),
99
+ name="question_search",
100
+ description="Retrieve similar QA pairs from the documents table."
101
+ )
102
+
103
+ TOOLS = [web_search, wiki_search, transcribe_audio, read_excel, retriever_tool]
104
+
105
+ # ─────────────────────────────────────────────────────────────────────────────
106
+ # AGENT & GRAPH SETUP
107
+ # ─────────────────────────────────────────────────────────────────────────────
108
+ llm = ChatOpenAI(model="gpt-4o-mini", temperature=0.0)
109
+ llm_with_tools = llm.bind_tools(TOOLS)
110
+
111
+ builder = StateGraph(MessagesState)
112
+
113
+ def assistant_node(state: dict) -> dict:
114
+ msgs = state.get("messages", [])
115
+ if not msgs or not isinstance(msgs[0], SystemMessage):
116
+ msgs = [SYSTEM] + msgs
117
+
118
+ out: AIMessage = llm_with_tools.invoke(msgs)
119
+
120
+ # Check if the LLM wants to use a tool
121
+ if out.tool_calls:
122
+ # If it's a tool call, return the message as is for the graph to handle
123
+ return {"messages": msgs + [out]}
124
+ else:
125
+ # If it's a direct answer, apply the formatting
126
+ answer_content = out.content.strip()
127
+
128
+ # Post-processing to ensure "one bare line" and remove XML-like tags
129
+ # The SYSTEM prompt already strongly discourages XML, but this is a safeguard.
130
+ answer_content = re.sub(r'<[^>]+>(.*?)</[^>]+>', r'\1', answer_content) # for <tag>content</tag>
131
+ answer_content = re.sub(r'<[^>]+/>', '', answer_content) # for <tag/>
132
+ answer_content = re.sub(r'<[^>]+>', '', answer_content) # for unmatched <tag>
133
+
134
+ # Ensure it's a single line and remove trailing period if any
135
+ answer_content = answer_content.split('\n')[0].strip().rstrip('.')
136
+
137
+ return {"messages": msgs + [AIMessage(content=answer_content)]}
138
+
139
+ builder.add_node("assistant", assistant_node)
140
+ builder.add_node("tools", ToolNode(TOOLS))
141
+
142
+ builder.add_edge(START, "assistant")
143
+ builder.add_conditional_edges(
144
+ "assistant",
145
+ tools_condition,
146
+ {"tools": "tools", END: END}
147
+ )
148
+ builder.add_edge("tools", "assistant")
149
+
150
+ graph = builder.compile()
151
+
152
+ # ─────────────────────────────────────────────────────────────────────────────
153
+ # CLI SMOKE TESTS
154
+ # ─────────────────────────────────────────────────────────────────────────────
155
+ if __name__ == "__main__":
156
+ print("🔍 Graph Mermaid:")
157
+ print(graph.get_graph().draw_mermaid())
158
+
159
+ print("\n🔹 Smoke‑testing agent")
160
+ tests = [
161
+ "How much is 2 + 2?",
162
+ "What is the capital of France?",
163
+ "List only the vegetables from: broccoli, apple, carrot. Alphabetize, comma‑separated.",
164
+ "Given the Excel file at test_sales.xlsx, what were total sales for food? Express in USD with two decimals.",
165
+ "Examine the video at ./test.wav. What is its transcript?",
166
+ "Who nominated the only Featured Article on English Wikipedia about a dinosaur that was promoted in November 2016?",
167
+ """ Examine the video at https://www.youtube.com/watch?v=1htKBjuUWec. What does Teal'c say in response to the question "Isn't that hot?" """
168
+ ]
169
+ for q in tests:
170
+ res = graph.invoke({"messages":[HumanMessage(content=q)]})
171
+ ans = res["messages"][-1].content.strip().rstrip(".")
172
+ print(f"Q: {q}\n→ A: {ans!r}\n")
requirements.txt CHANGED
@@ -42,3 +42,5 @@ openai-whisper
42
  openpyxl
43
  supabase
44
  ffmpeg-python
 
 
 
42
  openpyxl
43
  supabase
44
  ffmpeg-python
45
+ datasets
46
+ youtube
supabase_fill_table.py DELETED
@@ -1,104 +0,0 @@
1
- import os
2
- import requests
3
- import difflib
4
- from supabase import create_client
5
- from sentence_transformers import SentenceTransformer
6
-
7
- from dotenv import load_dotenv
8
-
9
- # Load environment variables from .env file
10
- load_dotenv()
11
-
12
- # Load environment variables
13
- SUPABASE_URL = os.getenv("SUPABASE_URL")
14
- SUPABASE_SERVICE_KEY = os.getenv("SUPABASE_SERVICE_KEY")
15
-
16
- if not SUPABASE_URL or not SUPABASE_SERVICE_KEY:
17
- raise RuntimeError("Please set SUPABASE_URL and SUPABASE_SERVICE_KEY in env")
18
-
19
- GAIA_API = "https://agents-course-unit4-scoring.hf.space"
20
-
21
-
22
-
23
- # Initialize Supabase client and SentenceTransformer model
24
- supabase = create_client(SUPABASE_URL, SUPABASE_SERVICE_KEY)
25
- model = SentenceTransformer("all-mpnet-base-v2")
26
-
27
- # Local ground-truth mapping
28
- GROUND_TRUTH = {
29
- "What is the capital of Italy?": "Rome",
30
- "How much is 2 + 2?": "4",
31
- "List only the vegetables from: milk, eggs, broccoli, carrot. Alphabetize.": "broccoli, carrot",
32
- "What country had the least number of athletes at the 1928 Summer Olympics? Give IOC code.": "LUX",
33
- "Who are the pitchers with the number before and after Taishō Tamai's number as of July 2023? Last names only, comma-separated.": "Lynn, Gilbert",
34
- "What is the first name of the only Malko Competition recipient from the 20th Century (after 1977) whose nationality on record is a country that no longer exists?": "Claus",
35
- "Given the Excel file at 'test_sales.xlsx', what were total sales for food (not drinks)? Express in USD with two decimal places.": "45.00",
36
- "In the video https://www.youtube.com/watch?v=L1vXCYZAYYM, what is the highest number of bird species to be on camera simultaneously?": "270",
37
- "What is the surname of the equine veterinarian mentioned in 1.E Exercises from the chemistry materials?": "Louvrier",
38
- "Who did the actor who played Ray in the Polish-language version of Everybody Loves Raymond play in Magda M.? First name only.": "Wojciech",
39
- }
40
-
41
- def fetch_gaia_examples():
42
- """Fetch GAIA questions from API and pair with ground-truth answers."""
43
- try:
44
- response = requests.get(f"{GAIA_API}/questions")
45
- response.raise_for_status()
46
- questions = response.json() # Assuming the API returns a JSON array of dicts with a 'question' key
47
- except requests.RequestException as e:
48
- raise RuntimeError(f"Failed to fetch questions from GAIA API: {e}")
49
-
50
- # Show the first 5 questions from the API
51
- print("First 5 questions from API:")
52
- for q_obj in questions[:5]:
53
- question_text = q_obj["question"] if isinstance(q_obj, dict) and "question" in q_obj else q_obj
54
- print("-", question_text)
55
-
56
- examples = []
57
- for q_obj in questions:
58
- # Extract the question string from the dict
59
- question_text = q_obj["question"] if isinstance(q_obj, dict) and "question" in q_obj else q_obj
60
- # Try exact match first
61
- answer = GROUND_TRUTH.get(question_text)
62
- # If not found, try fuzzy match
63
- if not answer:
64
- match = difflib.get_close_matches(question_text, GROUND_TRUTH.keys(), n=1, cutoff=0.8)
65
- if match:
66
- answer = GROUND_TRUTH[match[0]]
67
- if answer:
68
- examples.append((question_text, answer))
69
- else:
70
- print(f"Warning: No ground-truth answer found for question: {question_text}")
71
-
72
- return examples
73
-
74
- def main():
75
- # Optionally: fetch and print API questions for inspection
76
- try:
77
- response = requests.get(f"{GAIA_API}/questions")
78
- response.raise_for_status()
79
- questions = response.json()
80
- print("First 5 questions from API:")
81
- for q_obj in questions[:5]:
82
- question_text = q_obj["question"] if isinstance(q_obj, dict) and "question" in q_obj else q_obj
83
- print("-", question_text)
84
- except requests.RequestException as e:
85
- print(f"Warning: Could not fetch questions from GAIA API: {e}")
86
-
87
- # Insert all ground-truth examples
88
- to_insert = []
89
- for q, a in GROUND_TRUTH.items():
90
- qa = f"Q: {q} A: {a}"
91
- emb = model.encode(qa).tolist()
92
- to_insert.append({
93
- "page_content": qa,
94
- "embedding": emb
95
- })
96
-
97
- res = supabase.table("documents").insert(to_insert).execute()
98
- if res.data:
99
- print(f"Inserted {len(to_insert)} GAIA examples from GROUND_TRUTH.")
100
- else:
101
- print("Error inserting:", res)
102
-
103
- if __name__ == "__main__":
104
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
supabase_fill_table2.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from supabase import create_client
3
+ from sentence_transformers import SentenceTransformer
4
+ from huggingface_hub import hf_hub_download
5
+ from datasets import load_dataset
6
+ from dotenv import load_dotenv
7
+
8
+ # -----------------------------------------------------------------------------
9
+ # Load env vars
10
+ # -----------------------------------------------------------------------------
11
+ load_dotenv()
12
+ SUPABASE_URL = os.getenv("SUPABASE_URL")
13
+ SUPABASE_SERVICE_KEY = os.getenv("SUPABASE_SERVICE_KEY")
14
+ HF_TOKEN = os.getenv("HUGGINGFACE_API_TOKEN")
15
+
16
+ if not SUPABASE_URL or not SUPABASE_SERVICE_KEY:
17
+ raise RuntimeError("Please set SUPABASE_URL and SUPABASE_SERVICE_KEY in your .env")
18
+
19
+ if not HF_TOKEN:
20
+ raise RuntimeError(
21
+ "Please set HUGGINGFACE_API_TOKEN in your .env and ensure you've been granted access to the GAIA dataset."
22
+ )
23
+
24
+ # -----------------------------------------------------------------------------
25
+ # Init clients & models
26
+ # -----------------------------------------------------------------------------
27
+ supabase = create_client(SUPABASE_URL, SUPABASE_SERVICE_KEY)
28
+ model = SentenceTransformer("all-mpnet-base-v2")
29
+
30
+ # -----------------------------------------------------------------------------
31
+ # GAIA metadata location on HF
32
+ # -----------------------------------------------------------------------------
33
+ GAIA_REPO_ID = "gaia-benchmark/GAIA"
34
+ GAIA_METADATA_FILE = "2023/validation/metadata.jsonl"
35
+
36
+ def fetch_gaia_validation_examples():
37
+ print("🔄 Downloading GAIA metadata.jsonl …")
38
+ metadata_path = hf_hub_download(
39
+ repo_id = GAIA_REPO_ID,
40
+ filename = GAIA_METADATA_FILE,
41
+ token = HF_TOKEN,
42
+ repo_type = "dataset",
43
+ )
44
+ print(f"✅ Downloaded to {metadata_path!r}")
45
+
46
+ print("🔄 Loading JSONL via Datasets …")
47
+ ds = load_dataset(
48
+ "json",
49
+ data_files = metadata_path,
50
+ split = "train",
51
+ )
52
+ print("Columns in your JSONL:", ds.column_names)
53
+
54
+ QUESTION_FIELD = "Question"
55
+ ANSWER_FIELD = "Final answer"
56
+
57
+ qa = []
58
+ for row in ds:
59
+ q = row.get(QUESTION_FIELD)
60
+ a = row.get(ANSWER_FIELD)
61
+ if q and a:
62
+ qa.append((q, a))
63
+
64
+ print(f"✅ Found {len(qa)} (Question, Final answer) pairs.")
65
+ return qa
66
+
67
+ def main():
68
+ qa_pairs = fetch_gaia_validation_examples()
69
+ if not qa_pairs:
70
+ print("⚠️ No QA pairs—abort.")
71
+ return
72
+
73
+ to_insert = []
74
+ for q, a in qa_pairs:
75
+ text = f"Q: {q} A: {a}"
76
+ emb = model.encode(text).tolist()
77
+ to_insert.append({"page_content": text, "embedding": emb})
78
+
79
+ print(f"🚀 Inserting {len(to_insert)} records into Supabase…")
80
+ res = supabase.table("documents").insert(to_insert).execute()
81
+ if res.data:
82
+ print(f"🎉 Successfully inserted {len(to_insert)} GAIA examples.")
83
+ else:
84
+ print("❌ Insert appeared to fail. Response:")
85
+ print(res)
86
+
87
+ if __name__ == "__main__":
88
+ main()