Spaces:

TechAvenger
/

TrashCollector

Build error

Mihir Mithani

Sync Hub-enabled code to Space (no weights)

a8d4cdf about 2 months ago

9.17 kB

	"""
	FastAPI server for the Garbage Collecting Robot OpenEnv environment.
	Exposes reset / step / state / tasks / grade / policy / configure endpoints.

	Fix applied:
	- /policy BFS fallback now uses env.get_observation().dict() instead of
	a hand-built incomplete dict (which was missing robot_mode, home_position,
	unload_station, current_storage_load, storage_capacity, distance_from_home).
	- Static files and /ui route added so the HTML dashboard is served from the
	same origin — required for HuggingFace Spaces deployment.
	"""

	import os
	import sys
	sys.path.insert(0, os.path.dirname(__file__))

	from typing import List
	from pydantic import BaseModel
	from fastapi import FastAPI, HTTPException
	from fastapi.middleware.cors import CORSMiddleware
	from fastapi.staticfiles import StaticFiles
	from fastapi.responses import FileResponse

	from environment import GarbageRobotEnv
	from models import (
	Action, StepOutput, ResetInput, ResetOutput, CustomResetInput, State, Task,
	)

	app = FastAPI(
	title="Garbage Collecting Robot — OpenEnv",
	description=(
	"An OpenEnv-compliant robotics environment for garbage collection. "
	"AI agents must navigate a grid room to pick up garbage while managing battery constraints."
	),
	version="1.0.0",
	)

	app.add_middleware(
	CORSMiddleware,
	allow_origins=["*"],
	allow_methods=["*"],
	allow_headers=["*"],
	)

	env = GarbageRobotEnv()

	TASKS = [
	Task(
	id="task_easy",
	name="Small Room Clean",
	description="Navigate a small 5x5 grid to collect 1 piece of garbage.",
	difficulty="easy",
	reward_range=[0.0, 1.0],
	),
	Task(
	id="task_medium",
	name="Medium Room with Obstacles",
	description="Navigate a 7x7 grid to collect 3 pieces of garbage with limited battery.",
	difficulty="medium",
	reward_range=[0.0, 1.0],
	),
	Task(
	id="task_hard",
	name="Large Maze Cleanup",
	description="Navigate a 10x10 maze avoiding obstacles to collect 5 pieces of garbage with strict battery usage.",
	difficulty="hard",
	reward_range=[0.0, 1.0],
	),
	]

	VALID_IDS = {t.id for t in TASKS}

	@app.get("/", tags=["health"])
	def health():
	return {"status": "ok", "env": "garbage-collecting-robot"}

	@app.post("/reset", response_model=ResetOutput, tags=["openenv"])
	def reset(body: ResetInput = ResetInput()):
	if body.task_id not in VALID_IDS:
	raise HTTPException(400, f"task_id must be one of {sorted(VALID_IDS)}")
	state = env.reset(task_id=body.task_id)
	return {"observation": env.get_observation().dict()}

	@app.post("/reset_custom", response_model=ResetOutput, tags=["openenv"])
	def reset_custom(body: CustomResetInput):
	"""
	Dynamic reset endpoint. Lets callers specify garbage positions,
	obstacle positions, robot start, grid size and battery at runtime.
	Any omitted field falls back to the base scenario's value.
	"""
	env.reset_custom(
	task_id=body.task_id,
	grid_size=body.grid_size,
	robot_start=body.robot_start,
	garbage_positions=body.garbage_positions,
	obstacle_positions=body.obstacle_positions,
	max_battery=body.max_battery,
	storage_capacity=body.storage_capacity,
	home_position=body.home_position,
	unload_station=body.unload_station,
	)
	return {"observation": env.get_observation().dict()}

	@app.post("/step", response_model=StepOutput, tags=["openenv"])
	def step(body: Action):
	result = env.step(command=body.command)
	return result

	@app.get("/state", response_model=State, tags=["openenv"])
	def state():
	return env.state()

	@app.get("/tasks", response_model=list[Task], tags=["openenv"])
	def tasks():
	return TASKS

	@app.get("/grade/{task_id}", tags=["grading"])
	def grade(task_id: str):
	if task_id not in VALID_IDS:
	raise HTTPException(400, f"task_id must be one of {sorted(VALID_IDS)}")
	score = env.grade(task_id)
	return {"task_id": task_id, "score": score, "reward_range": [0.0, 1.0]}


	# ── Policy endpoint (fine-tuned LLM) ──────────────────────────────────────

	LOCAL_MODEL_PATH = os.environ.get(
	"LOCAL_MODEL_PATH",
	"TechAvenger/GarbageBot-Weights"
	)

	_policy_model = None
	_policy_tokenizer = None
	_policy_loaded = False

	def _load_policy():
	global _policy_model, _policy_tokenizer, _policy_loaded
	if _policy_loaded:
	return
	try:
	from transformers import AutoModelForCausalLM, AutoTokenizer
	import torch
	_policy_tokenizer = AutoTokenizer.from_pretrained(LOCAL_MODEL_PATH)
	_policy_model = AutoModelForCausalLM.from_pretrained(
	LOCAL_MODEL_PATH, torch_dtype=torch.float16, device_map="auto"
	)
	_policy_model.eval()
	print(f"[Policy] Fine-tuned model loaded from {LOCAL_MODEL_PATH}")
	except Exception as e:
	print(f"[Policy] Model unavailable: {e}")
	_policy_loaded = True


	class PolicyInput(BaseModel):
	message: str # the obs.message string from the environment

	@app.post("/policy", tags=["openenv"])
	def policy(body: PolicyInput):
	"""
	Ask the fine-tuned LLM for the next action.
	Returns {"action": "UP\|DOWN\|LEFT\|RIGHT\|COLLECT", "source": "llm\|bfs"}
	"""
	_load_policy()
	VALID = ["UP", "DOWN", "LEFT", "RIGHT", "COLLECT"]

	if _policy_model is not None and _policy_tokenizer is not None:
	try:
	import torch
	instruction = (
	"You are an AI brain controlling a garbage collecting robot.\n"
	"Reply with EXACTLY ONE of: UP DOWN LEFT RIGHT COLLECT"
	)
	prompt = (
	f"### Instruction:\n{instruction}\n\n"
	f"### Input:\nENVIRONMENT STATUS:\n{body.message}\n\n"
	f"### Response:\n"
	)
	inputs = _policy_tokenizer(
	prompt, return_tensors="pt", truncation=True, max_length=512
	).to(_policy_model.device)
	with torch.no_grad():
	outputs = _policy_model.generate(
	**inputs, max_new_tokens=6, do_sample=False,
	pad_token_id=_policy_tokenizer.eos_token_id
	)
	new_tokens = outputs[0][inputs["input_ids"].shape[1]:]
	raw = _policy_tokenizer.decode(new_tokens, skip_special_tokens=True).strip().upper()
	for v in VALID:
	if v in raw:
	return {"action": v, "source": "llm", "raw": raw}
	except Exception as e:
	print(f"[Policy] Inference error: {e}")

	# FIX: use env.get_observation().dict() so heuristic_action() receives
	# all required fields (robot_mode, home_position, unload_station, etc.)
	# instead of the previous hand-built incomplete dict.
	from inference import heuristic_action
	obs_dict = env.get_observation().dict()
	obs_dict["message"] = body.message # use the caller's message for context
	return {"action": heuristic_action(obs_dict), "source": "bfs"}


	# ── Dynamic garbage placement ──────────────────────────────────────────────

	class ConfigureInput(BaseModel):
	task_id: str = "task_easy"
	garbage_positions: List[List[int]] # [[x,y], ...]

	@app.post("/configure", tags=["openenv"])
	def configure(body: ConfigureInput):
	"""
	Reset the environment for task_id, then override garbage positions
	with whatever the caller supplies.
	"""
	if body.task_id not in VALID_IDS:
	raise HTTPException(400, f"task_id must be one of {sorted(VALID_IDS)}")

	env.reset(task_id=body.task_id)

	validated = []
	for pos in body.garbage_positions:
	if len(pos) != 2:
	raise HTTPException(400, f"Each position must be [x, y], got {pos}")
	x, y = pos
	gw, gh = env.grid_size
	if not (0 <= x < gw and 0 <= y < gh):
	raise HTTPException(400, f"Position {pos} out of bounds for grid {env.grid_size}")
	if [x, y] in env.obstacle_positions:
	raise HTTPException(400, f"Position {pos} is an obstacle")
	validated.append([x, y])

	env.garbage_positions = validated

	return {"observation": env.get_observation().dict()}


	# ── Serve HTML dashboard ───────────────────────────────────────────────────
	# This makes the frontend accessible at /ui on the same origin as the API,
	# which is required for HuggingFace Spaces (no localhost cross-origin issues).

	@app.get("/ui", include_in_schema=False)
	def ui():
	"""Serve the dashboard HTML."""
	return FileResponse("frontend/index.html")

	# Mount static assets (style.css, script.js) at /static
	if os.path.exists("frontend/style.css") or os.path.exists("frontend/script.js"):
	app.mount("/static", StaticFiles(directory="frontend"), name="static")


	if __name__ == "__main__":
	import uvicorn
	uvicorn.run(app, host="0.0.0.0", port=7860)