victor-johnson commited on
Commit
97b958d
·
verified ·
1 Parent(s): 8679e88

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +129 -85
app.py CHANGED
@@ -1,164 +1,208 @@
1
- import os
2
  import requests
3
  import pandas as pd
4
  import re
 
5
  import textwrap
6
  from transformers import AutoTokenizer, AutoModelForCausalLM
7
  import torch
8
 
9
  # --- Constants ---
10
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 
 
11
 
12
- # --- Local LLM Agent ---
13
  class BasicAgent:
14
  """
15
- Loads and runs a small LLM *locally* instead of using the remote API.
16
- The model answers concisely (number, word, or phrase only).
17
  """
18
  def __init__(self):
 
19
  model_id = "HuggingFaceTB/SmolLM2-1.7B-Instruct"
20
  print(f"🚀 Loading model locally: {model_id}")
21
 
 
22
  self.tokenizer = AutoTokenizer.from_pretrained(model_id)
23
  self.model = AutoModelForCausalLM.from_pretrained(
24
  model_id,
25
- torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
26
- device_map="auto",
27
- )
28
  print("✅ Local model ready.")
29
 
30
  def _clean(self, raw: str) -> str:
31
- """Cleans raw model output to return just the concise final value."""
32
  txt = raw.strip()
33
  lines = [l.strip() for l in txt.splitlines() if l.strip()]
34
  if lines:
35
- txt = lines[-1]
36
- txt = re.sub(r"^(final answer|answer|prediction)\s*[:\-]\s*", "", txt, flags=re.I)
37
- txt = txt.strip("`'\" \t\n\r")
38
- txt = re.sub(r"[ \t]*[.;,:-]+$", "", txt)
39
- return txt[:200]
40
 
41
  def __call__(self, question: str) -> str:
42
- print(f"🧠 Question: {question[:100]}...")
 
 
43
  prompt = textwrap.dedent(f"""
44
  You must answer the question with a single, concise value
45
  (number, word, date, or short phrase) and nothing else.
46
-
47
  Question: {question}
48
  Final answer:
49
  """).strip()
 
50
  inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
 
 
51
  with torch.no_grad():
52
  outputs = self.model.generate(
53
  **inputs,
54
- max_new_tokens=50,
55
- temperature=0.7,
56
  do_sample=True,
57
  pad_token_id=self.tokenizer.eos_token_id,
58
  )
 
59
  generated_ids = outputs[0][inputs["input_ids"].shape[1]:]
60
  raw_answer = self.tokenizer.decode(generated_ids, skip_special_tokens=True)
61
  clean_ans = self._clean(raw_answer)
62
- print(f"💡 Cleaned answer: {clean_ans}")
63
  return clean_ans
64
 
 
 
 
 
 
 
 
 
 
 
65
 
66
- # --- Fetch Questions from API ---
67
- def fetch_questions() -> list[dict]:
68
  """
69
- Calls the /questions endpoint to retrieve evaluation questions.
70
- Returns a list of dicts with keys: task_id, question.
71
  """
 
72
  try:
73
- resp = requests.get(f"{DEFAULT_API_URL}/questions", timeout=60)
74
  resp.raise_for_status()
75
- data = resp.json()
76
- if isinstance(data, list):
77
- print(f"✅ Retrieved {len(data)} questions from API.")
78
- return data
79
- else:
80
- print("⚠️ Unexpected response format from /questions.")
81
- return []
82
  except Exception as e:
83
- print(f" Failed to fetch questions: {e}")
84
  return []
85
-
86
-
87
- # --- Submit Answers ---
88
- def submit_answers(answers: list[dict], username: str, code_link: str, token: str) -> dict:
89
- """
90
- Submit answers to the scoring API.
91
- answers = [{"task_id": ..., "submitted_answer": ...}]
92
- """
93
- payload = {
94
- "username": username,
95
- "agent_code": code_link,
96
- "answers": answers,
97
- }
98
-
99
- headers = {"Authorization": f"Bearer {token}"} if token else {}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
  try:
101
- resp = requests.post(f"{DEFAULT_API_URL}/submit", json=payload, headers=headers, timeout=60)
102
- resp.raise_for_status()
103
- print("✅ Submission successful!")
104
- return resp.json()
 
 
 
 
 
 
 
 
105
  except Exception as e:
106
  return {"success": False, "message": str(e)}
107
 
108
 
109
- # --- Run & Submit ---
110
- def run_and_submit_all(username: str, code_link: str, token: str):
111
- if not username or not code_link or not token:
112
- return "❌ Missing username, code link, or token.", pd.DataFrame()
113
-
114
- questions = fetch_questions()
115
  if not questions:
116
- return "❌ Could not fetch questions from API.", pd.DataFrame()
117
-
118
- # Initialize model
 
119
  try:
120
  agent = BasicAgent()
121
  except Exception as e:
122
- return f"❌ Failed to load model: {e}", pd.DataFrame()
 
123
 
124
- # Run through all questions
125
  results = []
126
- for q in questions:
127
- task_id = q.get("task_id")
128
- question_text = q.get("question", "")
129
  try:
130
- answer = agent(question_text)
131
  except Exception as e:
132
  answer = f"[Error: {e}]"
133
- results.append({"task_id": task_id, "question": question_text, "submitted_answer": answer})
 
134
 
135
  df = pd.DataFrame(results)
136
 
137
- # Prepare submission payload
138
- answers_payload = [{"task_id": r["task_id"], "submitted_answer": r["submitted_answer"]} for r in results]
139
 
140
- # Submit
141
- result = submit_answers(answers_payload, username, code_link, token)
142
- success = result.get("success", False)
143
- message = result.get("message", "No response message.")
 
144
 
145
- if success:
146
- return f"✅ Submission successful: {message}", df
147
- else:
148
- return f"❌ Submission failed: {message}", df
149
 
150
 
151
- # --- CLI Entry Point ---
152
  if __name__ == "__main__":
153
- print("\n" + "-" * 30 + " CLI MODE " + "-" * 30 + "\n")
154
- token = os.getenv("HF_TOKEN") or input("🔑 Enter your Hugging Face token: ").strip()
155
- username = os.getenv("HF_USERNAME") or input("👤 Enter your Hugging Face username: ").strip()
156
- code_link = os.getenv("CODE_LINK") or input("🔗 Enter your Hugging Face Space repo link (.../tree/main): ").strip()
157
 
158
- print("\n⚙️ Running agent evaluation and submission...\n")
159
- status, df = run_and_submit_all(username, code_link, token)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
 
161
  print("\n" + "=" * 80)
162
  print(status)
163
- print("=" * 80)
164
- print(df)
 
 
1
  import requests
2
  import pandas as pd
3
  import re
4
+
5
  import textwrap
6
  from transformers import AutoTokenizer, AutoModelForCausalLM
7
  import torch
8
 
9
  # --- Constants ---
10
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
11
+ DOCS_URL = f"{DEFAULT_API_URL}/docs" # or perhaps /openapi.json if exposed
12
+
13
 
 
14
  class BasicAgent:
15
  """
16
+ Loads and runs a small LLM *locally* inside the Hugging Face Space
17
+ instead of calling the Hugging Face Inference API (which might be blocked).
18
  """
19
  def __init__(self):
20
+
21
  model_id = "HuggingFaceTB/SmolLM2-1.7B-Instruct"
22
  print(f"🚀 Loading model locally: {model_id}")
23
 
24
+
25
  self.tokenizer = AutoTokenizer.from_pretrained(model_id)
26
  self.model = AutoModelForCausalLM.from_pretrained(
27
  model_id,
 
 
 
28
  print("✅ Local model ready.")
29
 
30
  def _clean(self, raw: str) -> str:
31
+
32
  txt = raw.strip()
33
  lines = [l.strip() for l in txt.splitlines() if l.strip()]
34
  if lines:
 
 
 
 
 
35
 
36
  def __call__(self, question: str) -> str:
37
+ print(f"🧠 Agent received question: {question[:120]}...")
38
+
39
+
40
  prompt = textwrap.dedent(f"""
41
  You must answer the question with a single, concise value
42
  (number, word, date, or short phrase) and nothing else.
 
43
  Question: {question}
44
  Final answer:
45
  """).strip()
46
+
47
  inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
48
+
49
+
50
  with torch.no_grad():
51
  outputs = self.model.generate(
52
  **inputs,
 
 
53
  do_sample=True,
54
  pad_token_id=self.tokenizer.eos_token_id,
55
  )
56
+
57
  generated_ids = outputs[0][inputs["input_ids"].shape[1]:]
58
  raw_answer = self.tokenizer.decode(generated_ids, skip_special_tokens=True)
59
  clean_ans = self._clean(raw_answer)
60
+ print(f"💡 Agent raw: '{raw_answer[:80]}' → clean: '{clean_ans}'")
61
  return clean_ans
62
 
63
+ def fetch_questions_from_docs() -> list[dict]:
64
+
65
+
66
+
67
+
68
+
69
+
70
+
71
+
72
+
73
 
 
 
74
  """
75
+ Try to fetch question & expected answer pairs from the API docs / spec.
76
+ Returns a list of dicts: {"question": ..., "expected": ...} if available.
77
  """
78
+ # Try to fetch OpenAPI spec
79
  try:
80
+ resp = requests.get(f"{DEFAULT_API_URL}/openapi.json", timeout=30)
81
  resp.raise_for_status()
82
+ spec = resp.json()
 
 
 
 
 
 
83
  except Exception as e:
84
+ print(f"⚠️ Failed to fetch OpenAPI spec: {e}")
85
  return []
86
+
87
+ # Example: maybe there's a component schema "QuestionAnswer" or an endpoint returning the list
88
+ questions = []
89
+ # This part depends heavily on how the spec is structured.
90
+ # For instance, spec["components"]["schemas"]["QuestionAnswer"]["example"] might exist.
91
+ comp = spec.get("components", {}).get("schemas", {})
92
+ qa_schema = comp.get("QuestionAnswer")
93
+ if qa_schema:
94
+ example = qa_schema.get("example") or qa_schema.get("examples")
95
+ if example:
96
+ # If example is a list or single object
97
+ if isinstance(example, list):
98
+ for ex in example:
99
+ questions.append({"question": ex.get("question", ""), "expected": ex.get("expected", "")})
100
+ elif isinstance(example, dict):
101
+ # maybe contains multiple
102
+ # Or maybe there's a property which is list
103
+ if "questions" in example and isinstance(example["questions"], list):
104
+ for ex in example["questions"]:
105
+ questions.append({"question": ex.get("question", ""), "expected": ex.get("expected", "")})
106
+ else:
107
+ questions.append({"question": example.get("question", ""), "expected": example.get("expected", "")})
108
+ # Fallback: look for paths that look like "/questions" or similar
109
+ for path, methods in spec.get("paths", {}).items():
110
+ if "questions" in path.lower():
111
+ get_op = methods.get("get")
112
+ if get_op and "responses" in get_op:
113
+ # attempt to fetch via the actual endpoint
114
+ try:
115
+ resp2 = requests.get(DEFAULT_API_URL + path, timeout=30)
116
+ resp2.raise_for_status()
117
+ data = resp2.json()
118
+ # assume list of {question, expected}
119
+ if isinstance(data, list):
120
+ for q in data:
121
+ questions.append({"question": q.get("question",""), "expected": q.get("expected","")})
122
+ except Exception as e:
123
+ print(f"⚠️ Failed to fetch questions from path {path}: {e}")
124
+ return questions
125
+
126
+ def submit_answers(answers: list, token: str) -> dict:
127
  try:
128
+ space_host = os.getenv("SPACE_HOST", "")
129
+ space_id = os.getenv("SPACE_ID", "")
130
+
131
+ payload = {
132
+ "answers": answers,
133
+ "space_host": space_host,
134
+ "space_id": space_id,
135
+ }
136
+
137
+ headers = {"Authorization": f"Bearer {token}"}
138
+ resp = requests.post(
139
+ f"{DEFAULT_API_URL}/submit",
140
  except Exception as e:
141
  return {"success": False, "message": str(e)}
142
 
143
 
144
+ def run_and_submit_all(token: str):
145
+ if not token:
146
+ return "❌ You must provide a valid Hugging Face token.", pd.DataFrame()
147
+
148
+ questions = fetch_questions_from_docs()
 
149
  if not questions:
150
+ # fallback to some default or error
151
+ return "❌ Could not fetch questions from docs/spec.", pd.DataFrame()
152
+
153
+ agent = None
154
  try:
155
  agent = BasicAgent()
156
  except Exception as e:
157
+ return f"❌ Error instantiating agent: {e}", pd.DataFrame()
158
+
159
 
 
160
  results = []
161
+ for qa in questions:
162
+ q = qa.get("question", "")
163
+ expected = qa.get("expected", "")
164
  try:
165
+ answer = agent(q)
166
  except Exception as e:
167
  answer = f"[Error: {e}]"
168
+ results.append({"question": q, "answer": answer, "expected": expected})
169
+
170
 
171
  df = pd.DataFrame(results)
172
 
 
 
173
 
174
+ answers_list = [r["answer"] for r in results]
175
+ submission_result = submit_answers(answers_list, token)
176
+
177
+ msg = submission_result.get("message", "Unknown error")
178
+ return f"❌ Submission failed: {msg}", df
179
 
 
 
 
 
180
 
181
 
 
182
  if __name__ == "__main__":
183
+ print("\n" + "-" * 30 + " CLI Mode " + "-" * 30 + "\n")
 
 
 
184
 
185
+ space_host = os.getenv("SPACE_HOST")
186
+ space_id = os.getenv("SPACE_ID")
187
+
188
+ if space_host:
189
+ print(f"✅ SPACE_HOST found: {space_host}")
190
+ print(f" Runtime URL should be: https://{space_host}.hf.space")
191
+ else:
192
+ print("ℹ️ SPACE_HOST not found.")
193
+
194
+ if space_id:
195
+ print(f"✅ SPACE_ID found: {space_id}")
196
+ print(f" Repo URL: https://huggingface.co/spaces/{space_id}")
197
+ print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id}/tree/main")
198
+ else:
199
+ print("ℹ️ SPACE_ID not found.")
200
+
201
+ print("-" * (60 + len(" CLI Mode ")) + "\n")
202
+
203
+ token = os.getenv("HF_TOKEN") or input("🔑 Enter your Hugging Face token: ").strip()
204
+ status, df = run_and_submit_all(token)
205
 
206
  print("\n" + "=" * 80)
207
  print(status)
208
+ print("=" * 80)