Raj989898 commited on
Commit
ffec227
·
verified ·
1 Parent(s): cc820c1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -64
app.py CHANGED
@@ -1,64 +1,49 @@
1
- import requests
2
- from datasets import load_dataset
3
- from transformers import pipeline
4
-
5
- # ---------------------------
6
- # CONFIG
7
- # ---------------------------
8
- SCORING_API = "https://agents-course-unit4-scoring.hf.space"
9
- MODEL_NAME = "google/flan-t5-base"
10
-
11
- # ---------------------------
12
- # Load model
13
- # ---------------------------
14
- print("Loading model...")
15
- qa = pipeline("text2text-generation", model=MODEL_NAME, max_new_tokens=64)
16
-
17
- # ---------------------------
18
- # Fetch the 20 questions
19
- # ---------------------------
20
- print("Fetching GAIA questions...")
21
- questions = requests.get(f"{SCORING_API}/questions").json()
22
-
23
- task_ids = [q["task_id"] for q in questions]
24
-
25
- # ---------------------------
26
- # Load GAIA validation dataset
27
- # ---------------------------
28
- print("Loading GAIA validation set...")
29
- dataset = load_dataset(
30
- "gaia-benchmark/GAIA",
31
- "2023_level1",
32
- split="validation"
33
- )
34
-
35
- # Map task_id → correct answer
36
- ground_truth = {
37
- item["task_id"]: item["Final answer"]
38
- for item in dataset
39
- if item["task_id"] in task_ids
40
- }
41
-
42
- # ---------------------------
43
- # Evaluate
44
- # ---------------------------
45
- correct = 0
46
-
47
- for q in questions:
48
- task_id = q["task_id"]
49
- question = q["question"]
50
- true_answer = ground_truth.get(task_id, "").strip().lower()
51
-
52
- model_output = qa(question)[0]["generated_text"].strip().lower()
53
-
54
- match = model_output == true_answer
55
- correct += int(match)
56
-
57
- print("\n" + "="*80)
58
- print(f"QUESTION:\n{question}")
59
- print(f"\nEXPECTED:\n{true_answer}")
60
- print(f"\nMODEL:\n{model_output}")
61
- print(f"\nMATCH: {'✅' if match else '❌'}")
62
-
63
- print("\n" + "="*80)
64
- print(f"FINAL SCORE: {correct}/20")
 
1
+ class BasicAgent:
2
+
3
+ def __init__(self):
4
+
5
+ print("Initializing GAIA Agent")
6
+
7
+ from smolagents import CodeAgent
8
+ from smolagents import DuckDuckGoSearchTool
9
+ from smolagents import PythonInterpreterTool
10
+ from smolagents import InferenceClientModel
11
+
12
+ model = InferenceClientModel(
13
+ model_id="meta-llama/Meta-Llama-3-8B-Instruct"
14
+ )
15
+
16
+ self.agent = CodeAgent(
17
+ tools=[
18
+ DuckDuckGoSearchTool(),
19
+ PythonInterpreterTool()
20
+ ],
21
+ model=model,
22
+ max_steps=6
23
+ )
24
+
25
+ def __call__(self, question: str) -> str:
26
+
27
+ try:
28
+
29
+ print("Question:", question)
30
+
31
+ result = self.agent.run(question)
32
+
33
+ if result is None:
34
+ return ""
35
+
36
+ answer = str(result).strip()
37
+
38
+ # Clean answer for exact-match grading
39
+ answer = answer.replace("FINAL ANSWER:", "")
40
+ answer = answer.replace("Answer:", "")
41
+ answer = answer.replace("The answer is", "")
42
+
43
+ return answer.split("\n")[0].strip()
44
+
45
+ except Exception as e:
46
+
47
+ print("Agent error:", e)
48
+
49
+ return ""