Spaces:

SiddharthVenba
/

TraceSceneFinal

Running on Zero

App Files Files Community

Siddharth Ravikumar commited on 9 days ago

Commit

7ff4a07

1 Parent(s): 287e21d

Fix chat backend to use chat_engine text module instead of inference_engine vision module

Browse files

Files changed (1) hide show

app.py +13 -16

app.py CHANGED Viewed

@@ -24,7 +24,7 @@ from fastapi.responses import FileResponse
 # ── Backend Imports ────────────────────────────────────────────────────
 from backend.app.config import settings
 from backend.app.db.database import db
-from backend.app.core.inference import inference_engine, SCENE_ANALYSIS_PROMPT
 from backend.app.core.scene_analyzer import SceneAnalyzer
 from backend.app.core.rule_matcher import RuleMatcher
 from backend.app.core.fault_deducer import FaultDeducer
@@ -55,6 +55,15 @@ def gpu_run_inference(image, prompt):
 # Monkey-patch so the entire pipeline uses GPU
 inference_engine._run_inference = gpu_run_inference
 # ── Async helpers ──────────────────────────────────────────────────────
@@ -504,22 +513,10 @@ def chat_respond(user_message, history, system_ctx):
     if not user_message or not user_message.strip():
         return history, "", system_ctx
     ensure_init()
-    if not inference_engine.is_loaded:
-        inference_engine.load_model()
     try:
-        # Use the vision model's text generation capability for chat
-        chat_prompt = f"""You are TraceScene AI assistant helping with accident analysis.
-CONTEXT:
-{system_ctx}
-USER QUESTION: {user_message.strip()}
-Provide a concise, helpful answer based on the context above."""
-        # Create a small blank image for the vision model
-        from PIL import Image as PILImg
-        blank = PILImg.new('RGB', (64, 64), color=(0, 0, 0))
-        response = gpu_run_inference(blank, chat_prompt)
     except Exception as e:
         response = f"Error: {e}"
     history = history or []

 # ── Backend Imports ────────────────────────────────────────────────────
 from backend.app.config import settings
 from backend.app.db.database import db
+from backend.app.core.inference import inference_engine, chat_engine, SCENE_ANALYSIS_PROMPT
 from backend.app.core.scene_analyzer import SceneAnalyzer
 from backend.app.core.rule_matcher import RuleMatcher
 from backend.app.core.fault_deducer import FaultDeducer
 # Monkey-patch so the entire pipeline uses GPU
 inference_engine._run_inference = gpu_run_inference
+_original_chat = chat_engine.chat
+@spaces.GPU(duration=60)
+def gpu_run_chat(system_context: str, user_message: str):
+    """GPU-accelerated chat inference"""
+    return _original_chat(system_context, user_message)
+chat_engine.chat = gpu_run_chat
 # ── Async helpers ──────────────────────────────────────────────────────
     if not user_message or not user_message.strip():
         return history, "", system_ctx
     ensure_init()
+    if not chat_engine.is_loaded:
+        chat_engine.load_model()
     try:
+        response = gpu_run_chat(system_ctx, user_message.strip())
     except Exception as e:
         response = f"Error: {e}"
     history = history or []