Jibrann commited on
Commit
b0c62b4
·
verified ·
1 Parent(s): 09d185e

Upload folder using huggingface_hub

Browse files
Files changed (6) hide show
  1. grader.py +61 -124
  2. inference.py +1 -1
  3. server/app.py +17 -23
  4. server/app_environment.py +30 -1
  5. utils.py +9 -7
  6. uv.lock +0 -0
grader.py CHANGED
@@ -1,124 +1,61 @@
1
- from sklearn.preprocessing import MinMaxScaler
2
- import os
3
- from dotenv import load_dotenv
4
- from openai import OpenAI
5
- import json
6
- from json import JSONDecodeError
7
- from numpy import average
8
-
9
-
10
- load_dotenv()
11
-
12
- API_URL = os.getenv("API_BASE_URL")
13
- MODEL = os.getenv("MODEL_NAME")
14
- API_KEY = os.getenv("API_KEY") or os.getenv("HF_TOKEN")
15
-
16
- SYSTEM_PROMPT_GRADING = """
17
-
18
- You are a professional object sorter who works at the industry level and
19
- has a good knowledge about how and where things are to be places, you shall receieve the
20
- list of feedbacks from an accomplice hired, you shall rate the feedback on the scale of 0.0 to 1.0 ONLY.
21
-
22
- Rules:
23
- - You shall rate the feedback on the scale of 0.0 to 1.0 ONLY AND also provide a one-line feedback
24
- - You WILL STRICTLY ABIDE BY THIS JSON FORMAT:
25
- {
26
- "grade": float,
27
- "feedback": str,
28
- }
29
- """.strip()
30
-
31
- TEMPERATURE = 0.2
32
-
33
-
34
- def _feed_llm(input):
35
- if not API_URL or not MODEL or not API_KEY:
36
- missing = [
37
- name
38
- for name, value in (
39
- ("API_BASE_URL", API_URL),
40
- ("MODEL_NAME", MODEL),
41
- ("API_KEY/HF_TOKEN", API_KEY),
42
- )
43
- if not value
44
- ]
45
- raise RuntimeError(
46
- f"Missing required environment variables: {', '.join(missing)}"
47
- )
48
-
49
- client = OpenAI(
50
- base_url=API_URL,
51
- api_key=API_KEY,
52
- )
53
-
54
- llm_output = client.chat.completions.create(
55
- model=MODEL,
56
- messages=[
57
- {"role": "system", "content": SYSTEM_PROMPT_GRADING},
58
- {"role": "user", "content": f"{input}"},
59
- ],
60
- temperature=TEMPERATURE,
61
- )
62
-
63
- return llm_output.choices[0].message.content or ""
64
-
65
-
66
- def _extract_json_payload(output_str: str):
67
- output_str = output_str.strip()
68
-
69
- if output_str.startswith("```"):
70
- lines = output_str.splitlines()
71
- if len(lines) >= 3:
72
- output_str = "\n".join(lines[1:-1]).strip()
73
-
74
- start = output_str.find("{")
75
- end = output_str.rfind("}")
76
-
77
- if start == -1 or end == -1 or end < start:
78
- raise JSONDecodeError("No JSON object found in model output", output_str, 0)
79
-
80
- return output_str[start : end + 1]
81
-
82
-
83
- def parse_output(output_str):
84
- data = json.loads(_extract_json_payload(output_str))
85
- return data
86
-
87
-
88
- def grade_segmentation(appObs):
89
- scaler = MinMaxScaler()
90
- reward = appObs.rewardListSegment
91
- feedback = appObs.rewardFeedbackSegment
92
- scaler.fit(reward)
93
- grade = average(scaler.transform(reward))
94
- llmOutput = parse_output(_feed_llm(f"Feedback: {feedback}, Reward: {reward}"))
95
- outputFeedback = llmOutput.get("feedback", "")
96
- outputGrade = llmOutput.get("grade", 0.0)
97
- cumulativeGrade = (grade + outputGrade) / 2.0
98
- return (grade, outputGrade, cumulativeGrade, outputFeedback)
99
-
100
-
101
- def grade_placement(appObs):
102
- scaler = MinMaxScaler()
103
- reward = appObs.rewardListPlace
104
- feedback = appObs.rewardFeedbackPlace
105
- scaler.fit(reward)
106
- grade = average(scaler.transform(reward))
107
- llmOutput = parse_output(_feed_llm(f"Feedback: {feedback}, Reward: {reward}"))
108
- outputFeedback = llmOutput.get("feedback", "")
109
- outputGrade = llmOutput.get("grade", 0.0)
110
- cumulativeGrade = (grade + outputGrade) / 2.0
111
- return (grade, outputGrade, cumulativeGrade, outputFeedback)
112
-
113
-
114
- def grade_segmentation(appObs):
115
- scaler = MinMaxScaler()
116
- reward = appObs.rewardListAdjust
117
- feedback = appObs.rewardFeedbackAdjust
118
- scaler.fit(reward)
119
- grade = average(scaler.transform(reward))
120
- llmOutput = parse_output(_feed_llm(f"Feedback: {feedback}, Reward: {reward}"))
121
- outputFeedback = llmOutput.get("feedback", "")
122
- outputGrade = llmOutput.get("grade", 0.0)
123
- cumulativeGrade = (grade + outputGrade) / 2.0
124
- return (grade, outputGrade, cumulativeGrade, outputFeedback)
 
1
+ from __future__ import annotations
2
+
3
+ from typing import Iterable
4
+
5
+ try:
6
+ from models import AppObservation
7
+ except ImportError:
8
+ from app.models import AppObservation
9
+
10
+
11
+ def _clamp_score(value):
12
+ return max(0.0, min(1.0, float(value)))
13
+
14
+ def _safe_ratio(numerator, denominator):
15
+ if denominator <= 0:
16
+ return 0.0
17
+ return _clamp_score(numerator / denominator)
18
+
19
+
20
+ def _attempt_quality(rewards):
21
+ rewards = list(rewards)
22
+ if not rewards:
23
+ return 0.0
24
+
25
+ positive_attempts = sum(1 for reward in rewards if reward > 0)
26
+ return _safe_ratio(positive_attempts, len(rewards))
27
+
28
+
29
+ def _feedback(score, category):
30
+ if score >= 0.9:
31
+ return f"{category} performance is excellent and highly reliable."
32
+ if score >= 0.7:
33
+ return f"{category} performance is solid with minor room for improvement."
34
+ if score >= 0.4:
35
+ return f"{category} performance is partial and needs more consistency."
36
+ return f"{category} performance is weak and needs significant improvement."
37
+
38
+
39
+ def grade_segmentation(app_obs: AppObservation):
40
+ total_objects = len(app_obs.objectsFound) + len(app_obs.objectsLeft)
41
+ progress_score = _safe_ratio(len(app_obs.objectsFound), total_objects)
42
+ quality_score = _attempt_quality(app_obs.rewardListSegment)
43
+ score = _clamp_score((progress_score * 0.7) + (quality_score * 0.3))
44
+ return score, _feedback(score, "Segmentation")
45
+
46
+
47
+ def grade_placement(app_obs: AppObservation):
48
+ total_objects = len(app_obs.objectsFound) + len(app_obs.objectsLeft)
49
+ progress_score = _safe_ratio(app_obs.numberPlaced, total_objects)
50
+ quality_score = _attempt_quality(app_obs.rewardListPlace)
51
+ score = _clamp_score((progress_score * 0.7) + (quality_score * 0.3))
52
+ return score, _feedback(score, "Placement")
53
+
54
+
55
+ def grade_adjustment(app_obs: AppObservation):
56
+ rewards = list(app_obs.rewardListAdjust)
57
+ if not rewards:
58
+ return 0.0, "Adjustment was not attempted."
59
+
60
+ score = _attempt_quality(rewards)
61
+ return score, _feedback(score, "Adjustment")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
inference.py CHANGED
@@ -186,7 +186,7 @@ def main() -> None:
186
 
187
  if observation.isDone:
188
  break
189
-
190
  print(HISTORY)
191
 
192
 
 
186
 
187
  if observation.isDone:
188
  break
189
+
190
  print(HISTORY)
191
 
192
 
server/app.py CHANGED
@@ -22,20 +22,19 @@ app = create_app(
22
  )
23
 
24
 
25
- # @app.get("/health")
26
- # def health() -> dict[str, str]:
27
- # return {"status": "ok"}
28
- #
29
- #
30
- # @app.get("/")
31
- # def root() -> dict[str, str]:
32
- # return {
33
- # "message": "Object Placer API is running",
34
- # "health": "/health",
35
- # }
36
 
37
 
38
- def main(host: str = "0.0.0.0", port: int = 8000):
 
 
 
 
 
 
 
39
  """
40
  Entry point for direct execution via uv run or python -m.
41
 
@@ -44,23 +43,18 @@ def main(host: str = "0.0.0.0", port: int = 8000):
44
  uv run --project . server --port 8001
45
  python -m app.server.app
46
 
47
- Args:
48
- host: Host address to bind to (default: "0.0.0.0")
49
- port: Port number to listen on (default: 8000)
50
-
51
  For production deployments, consider using uvicorn directly with
52
  multiple workers:
53
  uvicorn app.server.app:app --workers 4
54
  """
55
- import uvicorn
56
-
57
- uvicorn.run(app, host=host, port=port)
58
-
59
-
60
- if __name__ == "__main__":
61
  import argparse
62
 
63
  parser = argparse.ArgumentParser()
 
64
  parser.add_argument("--port", type=int, default=8000)
65
  args = parser.parse_args()
66
- main(port=args.port)
 
 
 
 
 
22
  )
23
 
24
 
25
+ @app.get("/health")
26
+ def health() -> dict[str, str]:
27
+ return {"status": "ok"}
 
 
 
 
 
 
 
 
28
 
29
 
30
+ def _run_server(host: str = "0.0.0.0", port: int = 8000):
31
+ """Start the FastAPI app with uvicorn."""
32
+ import uvicorn
33
+
34
+ uvicorn.run(app, host=host, port=port)
35
+
36
+
37
+ def main():
38
  """
39
  Entry point for direct execution via uv run or python -m.
40
 
 
43
  uv run --project . server --port 8001
44
  python -m app.server.app
45
 
 
 
 
 
46
  For production deployments, consider using uvicorn directly with
47
  multiple workers:
48
  uvicorn app.server.app:app --workers 4
49
  """
 
 
 
 
 
 
50
  import argparse
51
 
52
  parser = argparse.ArgumentParser()
53
+ parser.add_argument("--host", default="0.0.0.0")
54
  parser.add_argument("--port", type=int, default=8000)
55
  args = parser.parse_args()
56
+ _run_server(host=args.host, port=args.port)
57
+
58
+
59
+ if __name__ == "__main__":
60
+ main()
server/app_environment.py CHANGED
@@ -16,6 +16,7 @@ except ImportError:
16
  class AppEnvironment(Environment):
17
 
18
  SUPPORTS_CONCURRENT_SESSIONS: bool = True
 
19
 
20
  def __init__(self):
21
  self._state = self._new_state()
@@ -84,6 +85,26 @@ class AppEnvironment(Environment):
84
  def step(self, action: AppAction) -> AppObservation:
85
  state = self._coerce_state()
86
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  if isinstance(action, dict):
88
  action = AppAction(**action)
89
 
@@ -152,8 +173,16 @@ class AppEnvironment(Environment):
152
  appendRewardFeedback(
153
  state, "segment", "All objects found. Episode completed!", reward
154
  )
 
 
 
 
 
 
 
 
155
 
156
- state.reward += reward / (10**state.step_count)
157
 
158
  return AppObservation(
159
  currentGrid=state.currentGrid,
 
16
  class AppEnvironment(Environment):
17
 
18
  SUPPORTS_CONCURRENT_SESSIONS: bool = True
19
+ MAX_STEPS: int = 20
20
 
21
  def __init__(self):
22
  self._state = self._new_state()
 
85
  def step(self, action: AppAction) -> AppObservation:
86
  state = self._coerce_state()
87
 
88
+ if state.isDone:
89
+ return AppObservation(
90
+ currentGrid=state.currentGrid,
91
+ positions=state.ObjectsPresent,
92
+ objectsLeft=state.objectsLeft,
93
+ objectsFound=state.objectsFound,
94
+ reward=state.reward,
95
+ isDone=state.isDone,
96
+ rewardFeedback=state.rewardFeedback,
97
+ rewardList=state.rewardList,
98
+ numberPlaced=state.numberPlaced,
99
+ ObjectsPlaced=state.ObjectsPlaced,
100
+ rewardListSegment=state.rewardListSegment,
101
+ rewardFeedbackSegment=state.rewardFeedbackSegment,
102
+ rewardListPlace=state.rewardListPlace,
103
+ rewardFeedbackPlace=state.rewardFeedbackPlace,
104
+ rewardListAdjust=state.rewardListAdjust,
105
+ rewardFeedbackAdjust=state.rewardFeedbackAdjust,
106
+ )
107
+
108
  if isinstance(action, dict):
109
  action = AppAction(**action)
110
 
 
173
  appendRewardFeedback(
174
  state, "segment", "All objects found. Episode completed!", reward
175
  )
176
+ elif state.step_count >= self.MAX_STEPS:
177
+ state.isDone = True
178
+ appendRewardFeedback(
179
+ state,
180
+ "",
181
+ f"Maximum step limit of {self.MAX_STEPS} reached. Episode ended.",
182
+ reward,
183
+ )
184
 
185
+ state.reward += reward
186
 
187
  return AppObservation(
188
  currentGrid=state.currentGrid,
utils.py CHANGED
@@ -182,11 +182,13 @@ def place(segment, objects, state):
182
  totalObjs = len(objects)
183
  reward_per_obj_placed = 45.0 / totalObjs
184
 
 
 
185
  if segment:
186
  appendRewardFeedback(
187
  state, "place", "Placing objects with segmentation is not allowed.", -60.0
188
  )
189
- return -60.0
190
 
191
  for obj_name, pos in objects.items():
192
 
@@ -202,8 +204,6 @@ def place(segment, objects, state):
202
  continue
203
 
204
  objGrid = initDimentions(obj)
205
- placement_failed = False
206
-
207
  for i in range(len(objGrid)):
208
  for j in range(len(objGrid[0])):
209
  for k in range(len(objGrid[0][0])):
@@ -320,11 +320,11 @@ def findobject(segment, objects, state):
320
  )
321
  return -60.0
322
 
323
- if state.ObjectsPresent == state.objectsFound:
324
  appendRewardFeedback(
325
  state,
326
  "segment",
327
- "No point in finding more objects as all are already found Make the IsSegement attribute false and execute the place method.",
328
  -60.0,
329
  )
330
  return -60.0
@@ -381,8 +381,10 @@ def findobject(segment, objects, state):
381
  )
382
 
383
  for obj in objs:
384
- state.objectsLeft.remove(obj)
385
- state.objectsFound.append(obj)
 
 
386
 
387
  return reward
388
 
 
182
  totalObjs = len(objects)
183
  reward_per_obj_placed = 45.0 / totalObjs
184
 
185
+ placement_failed = False
186
+
187
  if segment:
188
  appendRewardFeedback(
189
  state, "place", "Placing objects with segmentation is not allowed.", -60.0
190
  )
191
+ return (-60.0, True)
192
 
193
  for obj_name, pos in objects.items():
194
 
 
204
  continue
205
 
206
  objGrid = initDimentions(obj)
 
 
207
  for i in range(len(objGrid)):
208
  for j in range(len(objGrid[0])):
209
  for k in range(len(objGrid[0][0])):
 
320
  )
321
  return -60.0
322
 
323
+ if set(state.objectsFound) == set(state.ObjectsPresent.keys()):
324
  appendRewardFeedback(
325
  state,
326
  "segment",
327
+ "No point in finding more objects as all are already found. Make the isSegement attribute false and execute the place method.",
328
  -60.0,
329
  )
330
  return -60.0
 
381
  )
382
 
383
  for obj in objs:
384
+ if obj in state.objectsLeft:
385
+ state.objectsLeft.remove(obj)
386
+ if obj not in state.objectsFound:
387
+ state.objectsFound.append(obj)
388
 
389
  return reward
390
 
uv.lock ADDED
The diff for this file is too large to render. See raw diff