Spaces:

Jibrann
/

app

Runtime error

App Files Files Community

Jibrann commited on 18 days ago

Commit

b0c62b4

verified ·

1 Parent(s): 09d185e

Upload folder using huggingface_hub

Browse files

Files changed (6) hide show

grader.py +61 -124
inference.py +1 -1
server/app.py +17 -23
server/app_environment.py +30 -1
utils.py +9 -7
uv.lock +0 -0

grader.py CHANGED Viewed

@@ -1,124 +1,61 @@
-from sklearn.preprocessing import MinMaxScaler
-import os
-from dotenv import load_dotenv
-from openai import OpenAI
-import json
-from json import JSONDecodeError
-from numpy import average
-load_dotenv()
-API_URL = os.getenv("API_BASE_URL")
-MODEL = os.getenv("MODEL_NAME")
-API_KEY = os.getenv("API_KEY") or os.getenv("HF_TOKEN")
-SYSTEM_PROMPT_GRADING = """
-    You are a professional object sorter who works at the industry level and
-    has a good knowledge about how and where things are to be places, you shall receieve the
-    list of feedbacks from an accomplice hired, you shall rate the feedback on the scale of 0.0 to 1.0 ONLY.
-    Rules:
-    - You shall rate the feedback on the scale of 0.0 to 1.0 ONLY AND also provide a one-line feedback
-    - You WILL STRICTLY ABIDE BY THIS JSON FORMAT:
-        {
-            "grade": float,
-            "feedback": str,
-        }
-    """.strip()
-TEMPERATURE = 0.2
-def _feed_llm(input):
-    if not API_URL or not MODEL or not API_KEY:
-        missing = [
-            name
-            for name, value in (
-                ("API_BASE_URL", API_URL),
-                ("MODEL_NAME", MODEL),
-                ("API_KEY/HF_TOKEN", API_KEY),
-            )
-            if not value
-        ]
-        raise RuntimeError(
-            f"Missing required environment variables: {', '.join(missing)}"
-        )
-    client = OpenAI(
-        base_url=API_URL,
-        api_key=API_KEY,
-    )
-    llm_output = client.chat.completions.create(
-        model=MODEL,
-        messages=[
-            {"role": "system", "content": SYSTEM_PROMPT_GRADING},
-            {"role": "user", "content": f"{input}"},
-        ],
-        temperature=TEMPERATURE,
-    )
-    return llm_output.choices[0].message.content or ""
-def _extract_json_payload(output_str: str):
-    output_str = output_str.strip()
-    if output_str.startswith("```"):
-        lines = output_str.splitlines()
-        if len(lines) >= 3:
-            output_str = "\n".join(lines[1:-1]).strip()
-    start = output_str.find("{")
-    end = output_str.rfind("}")
-    if start == -1 or end == -1 or end < start:
-        raise JSONDecodeError("No JSON object found in model output", output_str, 0)
-    return output_str[start : end + 1]
-def parse_output(output_str):
-    data = json.loads(_extract_json_payload(output_str))
-    return data
-def grade_segmentation(appObs):
-    scaler = MinMaxScaler()
-    reward = appObs.rewardListSegment
-    feedback = appObs.rewardFeedbackSegment
-    scaler.fit(reward)
-    grade = average(scaler.transform(reward))
-    llmOutput = parse_output(_feed_llm(f"Feedback: {feedback}, Reward: {reward}"))
-    outputFeedback = llmOutput.get("feedback", "")
-    outputGrade = llmOutput.get("grade", 0.0)
-    cumulativeGrade = (grade + outputGrade) / 2.0
-    return (grade, outputGrade, cumulativeGrade, outputFeedback)
-def grade_placement(appObs):
-    scaler = MinMaxScaler()
-    reward = appObs.rewardListPlace
-    feedback = appObs.rewardFeedbackPlace
-    scaler.fit(reward)
-    grade = average(scaler.transform(reward))
-    llmOutput = parse_output(_feed_llm(f"Feedback: {feedback}, Reward: {reward}"))
-    outputFeedback = llmOutput.get("feedback", "")
-    outputGrade = llmOutput.get("grade", 0.0)
-    cumulativeGrade = (grade + outputGrade) / 2.0
-    return (grade, outputGrade, cumulativeGrade, outputFeedback)
-def grade_segmentation(appObs):
-    scaler = MinMaxScaler()
-    reward = appObs.rewardListAdjust
-    feedback = appObs.rewardFeedbackAdjust
-    scaler.fit(reward)
-    grade = average(scaler.transform(reward))
-    llmOutput = parse_output(_feed_llm(f"Feedback: {feedback}, Reward: {reward}"))
-    outputFeedback = llmOutput.get("feedback", "")
-    outputGrade = llmOutput.get("grade", 0.0)
-    cumulativeGrade = (grade + outputGrade) / 2.0
-    return (grade, outputGrade, cumulativeGrade, outputFeedback)

+from __future__ import annotations
+from typing import Iterable
+try:
+    from models import AppObservation
+except ImportError:
+    from app.models import AppObservation
+def _clamp_score(value):
+    return max(0.0, min(1.0, float(value)))
+def _safe_ratio(numerator, denominator):
+    if denominator <= 0:
+        return 0.0
+    return _clamp_score(numerator / denominator)
+def _attempt_quality(rewards):
+    rewards = list(rewards)
+    if not rewards:
+        return 0.0
+    positive_attempts = sum(1 for reward in rewards if reward > 0)
+    return _safe_ratio(positive_attempts, len(rewards))
+def _feedback(score, category):
+    if score >= 0.9:
+        return f"{category} performance is excellent and highly reliable."
+    if score >= 0.7:
+        return f"{category} performance is solid with minor room for improvement."
+    if score >= 0.4:
+        return f"{category} performance is partial and needs more consistency."
+    return f"{category} performance is weak and needs significant improvement."
+def grade_segmentation(app_obs: AppObservation):
+    total_objects = len(app_obs.objectsFound) + len(app_obs.objectsLeft)
+    progress_score = _safe_ratio(len(app_obs.objectsFound), total_objects)
+    quality_score = _attempt_quality(app_obs.rewardListSegment)
+    score = _clamp_score((progress_score * 0.7) + (quality_score * 0.3))
+    return score, _feedback(score, "Segmentation")
+def grade_placement(app_obs: AppObservation):
+    total_objects = len(app_obs.objectsFound) + len(app_obs.objectsLeft)
+    progress_score = _safe_ratio(app_obs.numberPlaced, total_objects)
+    quality_score = _attempt_quality(app_obs.rewardListPlace)
+    score = _clamp_score((progress_score * 0.7) + (quality_score * 0.3))
+    return score, _feedback(score, "Placement")
+def grade_adjustment(app_obs: AppObservation):
+    rewards = list(app_obs.rewardListAdjust)
+    if not rewards:
+        return 0.0, "Adjustment was not attempted."
+    score = _attempt_quality(rewards)
+    return score, _feedback(score, "Adjustment")

inference.py CHANGED Viewed

@@ -186,7 +186,7 @@ def main() -> None:
         if observation.isDone:
             break
     print(HISTORY)

         if observation.isDone:
             break
     print(HISTORY)

server/app.py CHANGED Viewed

@@ -22,20 +22,19 @@ app = create_app(
 )
-# @app.get("/health")
-# def health() -> dict[str, str]:
-#    return {"status": "ok"}
-#
-#
-# @app.get("/")
-# def root() -> dict[str, str]:
-#    return {
-#        "message": "Object Placer API is running",
-#        "health": "/health",
-#    }
-def main(host: str = "0.0.0.0", port: int = 8000):
     """
     Entry point for direct execution via uv run or python -m.
@@ -44,23 +43,18 @@ def main(host: str = "0.0.0.0", port: int = 8000):
         uv run --project . server --port 8001
         python -m app.server.app
-    Args:
-        host: Host address to bind to (default: "0.0.0.0")
-        port: Port number to listen on (default: 8000)
     For production deployments, consider using uvicorn directly with
     multiple workers:
         uvicorn app.server.app:app --workers 4
     """
-    import uvicorn
-    uvicorn.run(app, host=host, port=port)
-if __name__ == "__main__":
     import argparse
     parser = argparse.ArgumentParser()
     parser.add_argument("--port", type=int, default=8000)
     args = parser.parse_args()
-    main(port=args.port)

 )
+@app.get("/health")
+def health() -> dict[str, str]:
+    return {"status": "ok"}
+def _run_server(host: str = "0.0.0.0", port: int = 8000):
+    """Start the FastAPI app with uvicorn."""
+    import uvicorn
+    uvicorn.run(app, host=host, port=port)
+def main():
     """
     Entry point for direct execution via uv run or python -m.
         uv run --project . server --port 8001
         python -m app.server.app
     For production deployments, consider using uvicorn directly with
     multiple workers:
         uvicorn app.server.app:app --workers 4
     """
     import argparse
     parser = argparse.ArgumentParser()
+    parser.add_argument("--host", default="0.0.0.0")
     parser.add_argument("--port", type=int, default=8000)
     args = parser.parse_args()
+    _run_server(host=args.host, port=args.port)
+if __name__ == "__main__":
+    main()

server/app_environment.py CHANGED Viewed

@@ -16,6 +16,7 @@ except ImportError:
 class AppEnvironment(Environment):
     SUPPORTS_CONCURRENT_SESSIONS: bool = True
     def __init__(self):
         self._state = self._new_state()
@@ -84,6 +85,26 @@ class AppEnvironment(Environment):
     def step(self, action: AppAction) -> AppObservation:
         state = self._coerce_state()
         if isinstance(action, dict):
             action = AppAction(**action)
@@ -152,8 +173,16 @@ class AppEnvironment(Environment):
             appendRewardFeedback(
                 state, "segment", "All objects found. Episode completed!", reward
             )
-        state.reward += reward / (10**state.step_count)
         return AppObservation(
             currentGrid=state.currentGrid,

 class AppEnvironment(Environment):
     SUPPORTS_CONCURRENT_SESSIONS: bool = True
+    MAX_STEPS: int = 20
     def __init__(self):
         self._state = self._new_state()
     def step(self, action: AppAction) -> AppObservation:
         state = self._coerce_state()
+        if state.isDone:
+            return AppObservation(
+                currentGrid=state.currentGrid,
+                positions=state.ObjectsPresent,
+                objectsLeft=state.objectsLeft,
+                objectsFound=state.objectsFound,
+                reward=state.reward,
+                isDone=state.isDone,
+                rewardFeedback=state.rewardFeedback,
+                rewardList=state.rewardList,
+                numberPlaced=state.numberPlaced,
+                ObjectsPlaced=state.ObjectsPlaced,
+                rewardListSegment=state.rewardListSegment,
+                rewardFeedbackSegment=state.rewardFeedbackSegment,
+                rewardListPlace=state.rewardListPlace,
+                rewardFeedbackPlace=state.rewardFeedbackPlace,
+                rewardListAdjust=state.rewardListAdjust,
+                rewardFeedbackAdjust=state.rewardFeedbackAdjust,
+            )
         if isinstance(action, dict):
             action = AppAction(**action)
             appendRewardFeedback(
                 state, "segment", "All objects found. Episode completed!", reward
             )
+        elif state.step_count >= self.MAX_STEPS:
+            state.isDone = True
+            appendRewardFeedback(
+                state,
+                "",
+                f"Maximum step limit of {self.MAX_STEPS} reached. Episode ended.",
+                reward,
+            )
+        state.reward += reward
         return AppObservation(
             currentGrid=state.currentGrid,

utils.py CHANGED Viewed

@@ -182,11 +182,13 @@ def place(segment, objects, state):
     totalObjs = len(objects)
     reward_per_obj_placed = 45.0 / totalObjs
     if segment:
         appendRewardFeedback(
             state, "place", "Placing objects with segmentation is not allowed.", -60.0
         )
-        return -60.0
     for obj_name, pos in objects.items():
@@ -202,8 +204,6 @@ def place(segment, objects, state):
             continue
         objGrid = initDimentions(obj)
-        placement_failed = False
         for i in range(len(objGrid)):
             for j in range(len(objGrid[0])):
                 for k in range(len(objGrid[0][0])):
@@ -320,11 +320,11 @@ def findobject(segment, objects, state):
         )
         return -60.0
-    if state.ObjectsPresent == state.objectsFound:
         appendRewardFeedback(
             state,
             "segment",
-            "No point in finding more objects as all are already found Make the IsSegement attribute false and execute the place method.",
             -60.0,
         )
         return -60.0
@@ -381,8 +381,10 @@ def findobject(segment, objects, state):
             )
     for obj in objs:
-        state.objectsLeft.remove(obj)
-        state.objectsFound.append(obj)
     return reward

     totalObjs = len(objects)
     reward_per_obj_placed = 45.0 / totalObjs
+    placement_failed = False
     if segment:
         appendRewardFeedback(
             state, "place", "Placing objects with segmentation is not allowed.", -60.0
         )
+        return (-60.0, True)
     for obj_name, pos in objects.items():
             continue
         objGrid = initDimentions(obj)
         for i in range(len(objGrid)):
             for j in range(len(objGrid[0])):
                 for k in range(len(objGrid[0][0])):
         )
         return -60.0
+    if set(state.objectsFound) == set(state.ObjectsPresent.keys()):
         appendRewardFeedback(
             state,
             "segment",
+            "No point in finding more objects as all are already found. Make the isSegement attribute false and execute the place method.",
             -60.0,
         )
         return -60.0
             )
     for obj in objs:
+        if obj in state.objectsLeft:
+            state.objectsLeft.remove(obj)
+        if obj not in state.objectsFound:
+            state.objectsFound.append(obj)
     return reward

uv.lock ADDED Viewed

The diff for this file is too large to render. See raw diff