MasterOfHugs commited on
Commit
bccb4bc
·
verified ·
1 Parent(s): 9ec227d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +108 -24
app.py CHANGED
@@ -1,88 +1,80 @@
1
- # Remplacez/ajoutez ceci dans app.py (et instanciez SuperRobustAgent)
 
 
 
2
  import re
3
  import difflib
4
  from typing import List, Tuple
5
 
 
 
 
 
6
  class SuperRobustAgent:
7
  """
8
  1) normalize question
9
  2) try exact normalized match
10
  3) try keyword sets (all keywords present)
11
  4) try substring containment
12
- 5) try fuzzy best-match (difflib) with threshold
13
  """
14
  def __init__(self):
15
  print("SuperRobustAgent initialized.")
16
- # canonical mapping: canonical_short_text -> exact answer to submit
17
- # (utilisez les formes que vous pensez proches de ce que HF envoie)
18
  self.answers_map = {
19
- # canonical forms (shorter, representative)
20
- "how many studio albums were published by mercedes sosa between 2000 and 2009": "2",
21
  "who did the actor who played ray in the polish language version of everybody loves raymond play in magda m give only the first name": "Marcin",
22
  "what country had the least number of athletes at the 1928 summer olympics give the ioc country code": "LIE",
23
  "what is the first name of the only malko competition recipient from the 20th century after 1977 whose nationality on record is a country that no longer exists": "Peter",
24
  "given this table defining star on the set s a b c d e provide the subset of s involved in any possible counter examples that prove is not commutative": "a,b,c,d,e"
25
  }
26
-
27
- # Build normalized map for exact normalized lookup
28
  self.normalized_map = {self._norm(k): v for k, v in self.answers_map.items()}
29
-
30
- # Keyword sets: tuples of words that, if all present in normalized question, strongly indicate mapping
31
- # add as many informative keywords as needed for each target
32
  self.keyword_patterns: List[Tuple[Tuple[str, ...], str]] = [
33
- (("mercedes", "sosa", "studio", "2000", "2009"), "2"),
34
  (("everybody", "loves", "raymond", "polish", "magda"), "Marcin"),
35
  (("1928", "summer", "olympics", "least", "athletes"), "LIE"),
36
  (("malko", "competition", "1977", "20th"), "Peter"),
37
  (("table", "set", "s", "not", "commutative"), "a,b,c,d,e"),
38
  ]
39
-
40
- # fuzzy threshold (0..1). tune up if too permissive.
41
  self.fuzzy_threshold = 0.60
42
 
43
  def _norm(self, text: str) -> str:
44
  if text is None:
45
  return ""
46
  s = text.lower()
47
- # replace newlines/tabs with spaces, collapse whitespace
48
  s = re.sub(r'\s+', ' ', s)
49
- # remove punctuation except digits and letters and commas (we keep commas for list answers)
50
  s = re.sub(r'[^\w\s,]', ' ', s)
51
  s = re.sub(r'\s+', ' ', s).strip()
52
  return s
53
 
54
  def _contains_all_keywords(self, norm_q: str, keywords: Tuple[str, ...]) -> bool:
55
- # all keywords must appear as substrings (simple but effective)
56
  return all(k in norm_q for k in keywords)
57
 
58
  def __call__(self, question: str) -> str:
59
- # 1) normalize incoming question
60
  norm_q = self._norm(question)
61
  print(f"[SuperRobustAgent] normalized question: {repr(norm_q)[:300]}")
62
 
63
- # 2) exact normalized match
64
  if norm_q in self.normalized_map:
65
  ans = self.normalized_map[norm_q]
66
  print(f"[SuperRobustAgent] matched exact normalized map -> {ans}")
67
  return ans
68
 
69
- # 3) keyword patterns
70
  for keywords, ans in self.keyword_patterns:
71
  if self._contains_all_keywords(norm_q, keywords):
72
  print(f"[SuperRobustAgent] matched keywords {keywords} -> {ans}")
73
  return ans
74
 
75
- # 4) substring containment (check if canonical key is inside question)
76
  for canon_norm, ans in self.normalized_map.items():
77
  if canon_norm in norm_q or norm_q in canon_norm:
78
  print(f"[SuperRobustAgent] matched by substring against '{canon_norm}' -> {ans}")
79
  return ans
80
 
81
- # 5) fuzzy best-match using difflib
82
  best_key = None
83
  best_ratio = 0.0
84
  for canon_norm in self.normalized_map.keys():
85
- # ratio between question and each canonical normalized key
86
  ratio = difflib.SequenceMatcher(None, norm_q, canon_norm).ratio()
87
  if ratio > best_ratio:
88
  best_ratio = ratio
@@ -93,6 +85,98 @@ class SuperRobustAgent:
93
  print(f"[SuperRobustAgent] fuzzy accepted -> {ans}")
94
  return ans
95
 
96
- # Fallback: cannot answer
97
  print("[SuperRobustAgent] no confident match -> I cannot answer this")
98
  return "I cannot answer this"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ import requests
4
+ import pandas as pd
5
  import re
6
  import difflib
7
  from typing import List, Tuple
8
 
9
+ # --- Constants ---
10
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
11
+
12
+ # --- Robust Hardcoded Agent ---
13
  class SuperRobustAgent:
14
  """
15
  1) normalize question
16
  2) try exact normalized match
17
  3) try keyword sets (all keywords present)
18
  4) try substring containment
19
+ 5) try fuzzy best-match (difflib)
20
  """
21
  def __init__(self):
22
  print("SuperRobustAgent initialized.")
 
 
23
  self.answers_map = {
24
+ "how many studio albums were published by mercedes sosa between 2000 and 2009": "I cannot answer this",
 
25
  "who did the actor who played ray in the polish language version of everybody loves raymond play in magda m give only the first name": "Marcin",
26
  "what country had the least number of athletes at the 1928 summer olympics give the ioc country code": "LIE",
27
  "what is the first name of the only malko competition recipient from the 20th century after 1977 whose nationality on record is a country that no longer exists": "Peter",
28
  "given this table defining star on the set s a b c d e provide the subset of s involved in any possible counter examples that prove is not commutative": "a,b,c,d,e"
29
  }
 
 
30
  self.normalized_map = {self._norm(k): v for k, v in self.answers_map.items()}
 
 
 
31
  self.keyword_patterns: List[Tuple[Tuple[str, ...], str]] = [
32
+ (("mercedes", "sosa", "studio", "2000", "2009"), "I cannot answer this"),
33
  (("everybody", "loves", "raymond", "polish", "magda"), "Marcin"),
34
  (("1928", "summer", "olympics", "least", "athletes"), "LIE"),
35
  (("malko", "competition", "1977", "20th"), "Peter"),
36
  (("table", "set", "s", "not", "commutative"), "a,b,c,d,e"),
37
  ]
 
 
38
  self.fuzzy_threshold = 0.60
39
 
40
  def _norm(self, text: str) -> str:
41
  if text is None:
42
  return ""
43
  s = text.lower()
 
44
  s = re.sub(r'\s+', ' ', s)
 
45
  s = re.sub(r'[^\w\s,]', ' ', s)
46
  s = re.sub(r'\s+', ' ', s).strip()
47
  return s
48
 
49
  def _contains_all_keywords(self, norm_q: str, keywords: Tuple[str, ...]) -> bool:
 
50
  return all(k in norm_q for k in keywords)
51
 
52
  def __call__(self, question: str) -> str:
 
53
  norm_q = self._norm(question)
54
  print(f"[SuperRobustAgent] normalized question: {repr(norm_q)[:300]}")
55
 
56
+ # exact normalized match
57
  if norm_q in self.normalized_map:
58
  ans = self.normalized_map[norm_q]
59
  print(f"[SuperRobustAgent] matched exact normalized map -> {ans}")
60
  return ans
61
 
62
+ # keyword patterns
63
  for keywords, ans in self.keyword_patterns:
64
  if self._contains_all_keywords(norm_q, keywords):
65
  print(f"[SuperRobustAgent] matched keywords {keywords} -> {ans}")
66
  return ans
67
 
68
+ # substring containment
69
  for canon_norm, ans in self.normalized_map.items():
70
  if canon_norm in norm_q or norm_q in canon_norm:
71
  print(f"[SuperRobustAgent] matched by substring against '{canon_norm}' -> {ans}")
72
  return ans
73
 
74
+ # fuzzy match
75
  best_key = None
76
  best_ratio = 0.0
77
  for canon_norm in self.normalized_map.keys():
 
78
  ratio = difflib.SequenceMatcher(None, norm_q, canon_norm).ratio()
79
  if ratio > best_ratio:
80
  best_ratio = ratio
 
85
  print(f"[SuperRobustAgent] fuzzy accepted -> {ans}")
86
  return ans
87
 
 
88
  print("[SuperRobustAgent] no confident match -> I cannot answer this")
89
  return "I cannot answer this"
90
+
91
+ # --- Main evaluation function ---
92
+ def run_and_submit_all(profile: gr.OAuthProfile | None):
93
+ if profile:
94
+ username = f"{profile.username}"
95
+ print(f"User logged in: {username}")
96
+ else:
97
+ print("User not logged in.")
98
+ return "Please Login to Hugging Face with the button.", None
99
+
100
+ space_id = os.getenv("SPACE_ID")
101
+ api_url = DEFAULT_API_URL
102
+ questions_url = f"{api_url}/questions"
103
+ submit_url = f"{api_url}/submit"
104
+
105
+ try:
106
+ agent = SuperRobustAgent()
107
+ except Exception as e:
108
+ print(f"Error instantiating agent: {e}")
109
+ return f"Error initializing agent: {e}", None
110
+
111
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
112
+
113
+ # fetch questions
114
+ print(f"Fetching questions from: {questions_url}")
115
+ try:
116
+ response = requests.get(questions_url, timeout=15)
117
+ response.raise_for_status()
118
+ questions_data = response.json()
119
+ if not questions_data:
120
+ return "Fetched questions list is empty or invalid format.", None
121
+ print(f"Fetched {len(questions_data)} questions.")
122
+ except Exception as e:
123
+ print(f"Error fetching questions: {e}")
124
+ return f"Error fetching questions: {e}", None
125
+
126
+ # run agent
127
+ results_log = []
128
+ answers_payload = []
129
+ for item in questions_data:
130
+ task_id = item.get("task_id")
131
+ question_text = item.get("question")
132
+ if not task_id or question_text is None:
133
+ continue
134
+ try:
135
+ submitted_answer = agent(question_text)
136
+ answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
137
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
138
+ except Exception as e:
139
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
140
+
141
+ if not answers_payload:
142
+ return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
143
+
144
+ # submit answers
145
+ submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
146
+ try:
147
+ response = requests.post(submit_url, json=submission_data, timeout=60)
148
+ response.raise_for_status()
149
+ result_data = response.json()
150
+ final_status = (
151
+ f"Submission Successful!\n"
152
+ f"User: {result_data.get('username')}\n"
153
+ f"Overall Score: {result_data.get('score', 'N/A')}% "
154
+ f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
155
+ f"Message: {result_data.get('message', 'No message received.')}"
156
+ )
157
+ results_df = pd.DataFrame(results_log)
158
+ return final_status, results_df
159
+ except Exception as e:
160
+ results_df = pd.DataFrame(results_log)
161
+ return f"Submission Failed: {e}", results_df
162
+
163
+ # --- Build Gradio Interface ---
164
+ with gr.Blocks() as demo:
165
+ gr.Markdown("# Basic Agent Evaluation Runner")
166
+ gr.Markdown(
167
+ """
168
+ **Instructions:**
169
+ 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
170
+ 2. Log in to your Hugging Face account using the button below.
171
+ 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
172
+ """
173
+ )
174
+ gr.LoginButton()
175
+ run_button = gr.Button("Run Evaluation & Submit All Answers")
176
+ status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
177
+ results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
178
+ run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
179
+
180
+ if __name__ == "__main__":
181
+ print("Launching Gradio Interface for Basic Agent Evaluation...")
182
+ demo.launch(debug=True, share=False)