Annessha18 commited on
Commit
e39d8d6
·
verified ·
1 Parent(s): 49ad7de

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -33
app.py CHANGED
@@ -1,54 +1,60 @@
1
  import gradio as gr
2
- import torch
3
- from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
4
- from gaia import run_gaia_evaluation
5
 
6
- MODEL_NAME = "google/flan-t5-base" # use base for stability
 
 
 
 
 
7
 
8
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
9
- model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME)
10
 
11
- device = "cuda" if torch.cuda.is_available() else "cpu"
12
- model.to(device)
13
 
 
 
 
 
 
 
 
 
 
 
14
 
15
- def answer_question(question: str) -> str:
16
- prompt = f"Answer the following question concisely:\n{question}"
17
 
18
- inputs = tokenizer(prompt, return_tensors="pt", truncation=True).to(device)
 
 
 
 
 
 
 
19
 
20
- with torch.no_grad():
21
- outputs = model.generate(
22
- **inputs,
23
- max_new_tokens=64,
24
- do_sample=False
25
- )
26
 
27
- answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
28
- return answer.strip()
29
 
30
 
31
- def run_evaluation():
32
  """
33
- IMPORTANT:
34
- - This function MUST return
35
- - Must NOT print
36
- - Must NOT loop forever
37
  """
38
- results = run_gaia_evaluation(answer_question)
39
- return results
40
 
41
 
42
  with gr.Blocks() as demo:
43
  gr.Markdown("# GAIA Unit 4 – Basic Agent Runner")
44
-
45
  run_btn = gr.Button("Run Evaluation & Submit")
46
  output = gr.JSON(label="Submission Result")
47
 
48
- run_btn.click(
49
- fn=run_evaluation,
50
- inputs=[],
51
- outputs=output
52
- )
53
 
54
- demo.launch()
 
1
  import gradio as gr
2
+ import json
3
+ import os
4
+ from transformers import pipeline
5
 
6
+ # Load model (lightweight & allowed)
7
+ qa_pipeline = pipeline(
8
+ "text2text-generation",
9
+ model="google/flan-t5-base",
10
+ max_new_tokens=64
11
+ )
12
 
13
+ DATA_PATH = "/data/gaia_validation_questions.json"
 
14
 
 
 
15
 
16
+ def solve_question(question: str) -> str:
17
+ """
18
+ Very simple baseline solver.
19
+ GAIA Unit 4 rewards formatting + correctness, not fancy agents.
20
+ """
21
+ try:
22
+ result = qa_pipeline(question)
23
+ return result[0]["generated_text"].strip()
24
+ except Exception:
25
+ return "unknown"
26
 
 
 
27
 
28
+ def run_evaluation():
29
+ """
30
+ Runs GAIA evaluation and returns answers in correct format
31
+ """
32
+ with open(DATA_PATH, "r") as f:
33
+ data = json.load(f)
34
+
35
+ answers = {}
36
 
37
+ for item in data:
38
+ qid = item["id"]
39
+ question = item["question"]
40
+ answers[qid] = solve_question(question)
 
 
41
 
42
+ return answers
 
43
 
44
 
45
+ def submit():
46
  """
47
+ This function is REQUIRED by GAIA.
48
+ It must return a dict of {question_id: answer}
 
 
49
  """
50
+ return run_evaluation()
 
51
 
52
 
53
  with gr.Blocks() as demo:
54
  gr.Markdown("# GAIA Unit 4 – Basic Agent Runner")
 
55
  run_btn = gr.Button("Run Evaluation & Submit")
56
  output = gr.JSON(label="Submission Result")
57
 
58
+ run_btn.click(fn=submit, outputs=output)
 
 
 
 
59
 
60
+ demo.launch(server_name="0.0.0.0", server_port=7860)