Files changed (1) hide show
  1. demo.py +51 -156
demo.py CHANGED
@@ -1,197 +1,92 @@
1
  #!/usr/bin/env python3
2
- """
3
- Demo script for Code Output Assessment Environment.
4
-
5
- This script demonstrates how to interact with the environment both locally
6
- and from a deployed Hugging Face Space.
7
- """
8
 
9
  import asyncio
10
  from code_assessment_env import CodeAssessmentAction, CodeAssessmentEnv
11
 
12
 
13
  async def demo_local():
14
- """Demo using local Docker container."""
15
  print("=" * 60)
16
- print("DEMO: Code Output Assessment Environment (Local)")
17
  print("=" * 60)
18
-
19
- # Connect to local Docker container
20
  env = await CodeAssessmentEnv.from_docker_image("code_assessment_env:latest")
21
-
22
  try:
23
- # Reset environment
24
  result = await env.reset()
25
  obs = result.observation
26
-
27
- print(f"\n📝 Problem: {obs.problem_description}")
28
- print(f"🎚️ Difficulty: {obs.difficulty}")
29
- print(f"🔢 Test Input: {obs.test_case_input}")
30
- print(f"💬 Feedback: {obs.feedback}")
31
-
32
- # Example solutions for common problems
33
- solutions = {
34
- "Add two numbers": lambda inp: str(sum(map(int, inp.split(',')))),
35
- "Reverse a string": lambda inp: inp[::-1],
36
- "Count vowels": lambda inp: str(sum(1 for c in inp.lower() if c in 'aeiou')),
37
- "Find maximum": lambda inp: str(max(map(int, inp.split(',')))),
38
- }
39
-
40
- # Try to solve a few problems
41
- for step in range(1, 6):
42
- # Simple solver logic
43
- answer = "0" # Default answer
44
- for problem_name, solver in solutions.items():
45
- if problem_name in obs.problem_description:
46
- try:
47
- answer = solver(obs.test_case_input)
48
- except:
49
- answer = "0"
50
- break
51
-
52
- # Submit answer
53
- result = await env.step(FirstRlProjAction(answer=answer))
54
  obs = result.observation
55
-
56
  print(f"\n{'=' * 60}")
57
- print(f"Step {step}:")
58
- print(f" Answer submitted: {answer}")
59
- print(f" Correct: {'✓' if obs.is_correct else '✗'}")
60
- print(f" Grader Score: {obs.partial_credit:.2f}")
61
- print(f" Reward: {result.reward:.2f}")
62
- print(f" Feedback: {obs.feedback}")
63
- print(f" Problems Solved: {obs.problems_solved}")
64
- print(f" Current Streak: {obs.current_streak}")
65
-
66
  if result.done:
67
- print("\n🏁 Episode complete!")
68
  break
69
-
70
- # Show next problem
71
- print(f"\n📝 Next Problem: {obs.problem_description}")
72
- print(f"🎚️ Difficulty: {obs.difficulty}")
73
- print(f"🔢 Test Input: {obs.test_case_input}")
74
-
75
- # Get final state
76
- state = await env.state()
77
- print(f"\n{'=' * 60}")
78
- print(f"Final Stats:")
79
- print(f" Episode ID: {state.episode_id}")
80
- print(f" Total Steps: {state.step_count}")
81
- print(f" Problems Solved: {obs.problems_solved}")
82
-
83
  finally:
84
  await env.close()
85
- print("\n✅ Demo complete!\n")
86
 
87
 
88
  async def demo_remote():
89
- """Demo using deployed Hugging Face Space."""
90
  print("=" * 60)
91
- print("DEMO: Code Output Assessment Environment (Remote)")
92
  print("=" * 60)
93
-
94
- # Connect to HF Space (replace with your actual space URL)
95
  env = CodeAssessmentEnv(base_url="https://TulasiSankar-code-assessment-env.hf.space")
96
-
97
  try:
98
  result = await env.reset()
99
  obs = result.observation
100
-
101
- print(f"\n📝 Problem: {obs.problem_description}")
102
- print(f"🎚️ Difficulty: {obs.difficulty}")
103
- print(f"🔢 Test Input: {obs.test_case_input}")
104
-
105
- # Submit a simple answer
106
- result = await env.step(CodeAssessmentAction(answer="8"))
107
  obs = result.observation
108
-
109
- print(f"\nAnswer submitted: '8'")
110
- print(f"Correct: {'✓' if obs.is_correct else '✗'}")
111
- print(f"Reward: {result.reward:.2f}")
112
  print(f"Feedback: {obs.feedback}")
113
-
114
- finally:
115
- await env.close()
116
- print("\n✅ Remote demo complete!\n")
117
-
118
 
119
- async def demo_interactive():
120
- """Interactive demo where you can input answers."""
121
- print("=" * 60)
122
- print("INTERACTIVE DEMO: Solve Problems Yourself!")
123
- print("=" * 60)
124
-
125
- env = await FirstRlProjEnv.from_docker_image("first_rl_proj:latest")
126
-
127
- try:
128
- result = await env.reset()
129
- obs = result.observation
130
-
131
- print(f"\n📚 Starting Code Assessment Challenge!")
132
- print(f"You have 15 steps to solve as many problems as possible.\n")
133
-
134
- for step in range(1, 16):
135
- print(f"\n{'=' * 60}")
136
- print(f"Step {step}/15 | Difficulty: {obs.difficulty.upper()}")
137
- print(f"Problems Solved: {obs.problems_solved} | Streak: {obs.current_streak}")
138
- print(f"\n📝 Problem: {obs.problem_description}")
139
- print(f"🔢 Test Input: {obs.test_case_input}")
140
-
141
- # Get user input
142
- answer = input("\n💡 Your answer: ").strip()
143
-
144
- if answer.lower() in ['quit', 'exit', 'q']:
145
- print("Exiting...")
146
- break
147
-
148
- # Submit answer
149
- result = await env.step(FirstRlProjAction(answer=answer))
150
- obs = result.observation
151
-
152
- # Show results
153
- print(f"\n{'✓ CORRECT!' if obs.is_correct else '✗ Incorrect'}")
154
- print(f"Grader Score: {obs.partial_credit:.2f}/1.0")
155
- print(f"Reward: {result.reward:+.2f}")
156
- print(f"Feedback: {obs.feedback}")
157
-
158
- if result.done:
159
- print("\n🏁 Episode complete!")
160
- break
161
-
162
- print(f"\n{'=' * 60}")
163
- print(f"Final Score:")
164
- print(f" Problems Solved: {obs.problems_solved}")
165
- print(f" Best Streak: {obs.current_streak}")
166
-
167
  finally:
168
  await env.close()
169
- print("\n✅ Thanks for playing!\n")
170
 
171
 
172
  async def main():
173
- """Run all demos."""
174
  import sys
175
-
176
- if len(sys.argv) > 1:
177
- mode = sys.argv[1]
178
- if mode == "local":
179
- await demo_local()
180
- elif mode == "remote":
181
- await demo_remote()
182
- elif mode == "interactive":
183
- await demo_interactive()
184
- else:
185
- print(f"Unknown mode: {mode}")
186
- print("Usage: python demo.py [local|remote|interactive]")
187
- else:
188
- print("\nAvailable demos:")
189
- print(" python demo.py local - Run automated local demo")
190
- print(" python demo.py remote - Test HF Space deployment")
191
- print(" python demo.py interactive - Play interactively")
192
- print("\nRunning local demo...\n")
193
  await demo_local()
 
 
 
 
194
 
195
 
196
  if __name__ == "__main__":
197
- asyncio.run(main())
 
1
  #!/usr/bin/env python3
2
+ """Demo for AI Response Evaluation Environment."""
 
 
 
 
 
3
 
4
  import asyncio
5
  from code_assessment_env import CodeAssessmentAction, CodeAssessmentEnv
6
 
7
 
8
  async def demo_local():
 
9
  print("=" * 60)
10
+ print("DEMO: AI Response Evaluation Environment")
11
  print("=" * 60)
12
+
 
13
  env = await CodeAssessmentEnv.from_docker_image("code_assessment_env:latest")
14
+
15
  try:
 
16
  result = await env.reset()
17
  obs = result.observation
18
+
19
+ print(f"\nTask: {obs.task_type} | Difficulty: {obs.difficulty}")
20
+ if obs.user_age:
21
+ print(f"User: age={obs.user_age}, mood={obs.user_mood}, context={obs.user_context}")
22
+ print(f"\nScenario:\n{obs.test_case_input}")
23
+
24
+ demo_answers = [
25
+ "incorrect, factual-error",
26
+ "correct, none",
27
+ "incorrect, instruction-violation",
28
+ "partially-correct, factual-error",
29
+ "needs-adjustment, too-technical, age-inappropriate",
30
+ "inappropriate, insensitive, tone-mismatch",
31
+ "correctness=7, tone=2, empathy=1, safety=7",
32
+ "correctness=9, tone=10, empathy=7, safety=10",
33
+ ]
34
+
35
+ for step in range(1, 8):
36
+ answer = demo_answers[step - 1] if step <= len(demo_answers) else "unknown"
37
+ result = await env.step(CodeAssessmentAction(answer=answer))
 
 
 
 
 
 
 
 
38
  obs = result.observation
39
+
40
  print(f"\n{'=' * 60}")
41
+ print(f"Step {step}: '{answer}'")
42
+ print(f" Correct: {'Y' if obs.is_correct else 'N'} | Credit: {obs.partial_credit:.2f} | Reward: {result.reward:.2f}")
43
+ print(f" Feedback: {obs.feedback[:120]}")
44
+ print(f" Solved: {obs.problems_solved} | Streak: {obs.current_streak}")
45
+
 
 
 
 
46
  if result.done:
 
47
  break
48
+
49
+ print(f"\n Next: {obs.task_type} ({obs.difficulty})")
50
+ if obs.user_age:
51
+ print(f" User: age={obs.user_age}, mood={obs.user_mood}, context={obs.user_context}")
52
+
 
 
 
 
 
 
 
 
 
53
  finally:
54
  await env.close()
55
+ print("\nDemo complete.\n")
56
 
57
 
58
  async def demo_remote():
 
59
  print("=" * 60)
60
+ print("DEMO: Remote HF Space")
61
  print("=" * 60)
62
+
 
63
  env = CodeAssessmentEnv(base_url="https://TulasiSankar-code-assessment-env.hf.space")
64
+
65
  try:
66
  result = await env.reset()
67
  obs = result.observation
68
+ print(f"\nTask: {obs.task_type} | Difficulty: {obs.difficulty}")
69
+
70
+ result = await env.step(CodeAssessmentAction(answer="incorrect, factual-error"))
 
 
 
 
71
  obs = result.observation
72
+ print(f"Correct: {'Y' if obs.is_correct else 'N'} | Reward: {result.reward:.2f}")
 
 
 
73
  print(f"Feedback: {obs.feedback}")
 
 
 
 
 
74
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  finally:
76
  await env.close()
77
+ print("\nRemote demo complete.\n")
78
 
79
 
80
  async def main():
 
81
  import sys
82
+ mode = sys.argv[1] if len(sys.argv) > 1 else "local"
83
+ if mode == "local":
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  await demo_local()
85
+ elif mode == "remote":
86
+ await demo_remote()
87
+ else:
88
+ print("Usage: python demo.py [local|remote]")
89
 
90
 
91
  if __name__ == "__main__":
92
+ asyncio.run(main())