rasAli02 commited on
Commit
83d8e9c
·
1 Parent(s): 7e8948c

🚀 Debug: Capture and surface backend errors on Vercel

Browse files
Files changed (1) hide show
  1. backend/app.py +31 -294
backend/app.py CHANGED
@@ -1,168 +1,21 @@
1
  import os
2
- import uuid
3
- import time
4
- import math
5
- import httpx
6
- import json
7
- import tempfile
8
- import asyncio
9
- from datetime import datetime, timezone
10
- from typing import List, Optional
11
-
12
- from typing import List, Optional
13
-
14
  from fastapi import FastAPI, Request
15
- from fastapi.responses import JSONResponse, FileResponse
16
  from fastapi.middleware.cors import CORSMiddleware
17
- from fpdf import FPDF
18
-
19
- # Import our agent pipeline
20
- from agents import run_pipeline, AMD_INFERENCE_URL, AMD_MODEL_NAME, AMD_INFERENCE_TOKEN, generate_social_post
21
-
22
- # ── MONGODB PERSISTENCE (optional, falls back to in-memory) ──────────────────
23
- MONGO_URL = os.getenv("MONGO_URL", "")
24
- _db = None
25
- _inspections_col = None
26
- _journal_col = None
27
-
28
- # In-memory fallback
29
- _mem_inspections: list = []
30
- _mem_journal: list = []
31
-
32
- async def _init_db():
33
- """Attempt to connect to MongoDB; silently fall back to in-memory if unavailable."""
34
- global _db, _inspections_col, _journal_col
35
- if not MONGO_URL:
36
- print("⚠️ MONGO_URL not set – using in-memory storage")
37
- return
38
- try:
39
- from motor.motor_asyncio import AsyncIOMotorClient
40
- import certifi
41
- # Try with standard SSL first
42
- client = AsyncIOMotorClient(
43
- MONGO_URL,
44
- serverSelectionTimeoutMS=5000,
45
- tlsCAFile=certifi.where(),
46
- # Fallback for environments with strict/broken SSL handshakes
47
- tlsAllowInvalidCertificates=True
48
- )
49
- _db = client["forgesight"]
50
- _inspections_col = _db["inspections"]
51
- _journal_col = _db["journal"]
52
- print("✅ MongoDB client initialized (with TLS fallback)")
53
- except Exception as e:
54
- print(f"⚠️ MongoDB unavailable ({e}) – using in-memory storage")
55
-
56
- async def _db_insert_inspection(doc: dict):
57
- if _inspections_col is not None:
58
- await _inspections_col.insert_one({**doc, "_id": doc["id"]})
59
- else:
60
- _mem_inspections.insert(0, doc)
61
-
62
- async def _db_list_inspections(limit=50) -> list:
63
- if _inspections_col is not None:
64
- cursor = _inspections_col.find({}, {"_id": 0}).sort("created_at", -1).limit(limit)
65
- return await cursor.to_list(length=limit)
66
- return _mem_inspections[:limit]
67
-
68
- async def _db_insert_journal(doc: dict):
69
- if _journal_col is not None:
70
- await _journal_col.insert_one({**doc, "_id": doc["id"]})
71
- else:
72
- _mem_journal.insert(0, doc)
73
-
74
- async def _db_list_journal(limit=50) -> list:
75
- if _journal_col is not None:
76
- cursor = _journal_col.find({}, {"_id": 0}).sort("created_at", -1).limit(limit)
77
- return await cursor.to_list(length=limit)
78
- return _mem_journal[:limit]
79
-
80
- # ── HELPERS ───────────────────────────────────────────────────────────────────
81
-
82
- def _now_iso() -> str:
83
- return datetime.now(timezone.utc).isoformat()
84
-
85
- def _summarize(inspection: dict) -> dict:
86
- agents = inspection.get("transcript", {}).get("agents", [])
87
- inspector = next((a for a in agents if a["role"] == "inspector"), None)
88
- reporter = next((a for a in agents if a["role"] == "reporter"), None)
89
- action = next((a for a in agents if a["role"] == "action"), None)
90
-
91
- inspector_out = (inspector or {}).get("output", {}).get("parsed", {}) or {}
92
- reporter_out = (reporter or {}).get("output", {}).get("parsed", {}) or {}
93
- action_out = (action or {}).get("output", {}).get("parsed", {}) or {}
94
-
95
- defects = inspector_out.get("defects") or []
96
- return {
97
- "id": inspection["id"],
98
- "created_at": inspection["created_at"],
99
- "verdict": inspector_out.get("verdict", "warn"),
100
- "confidence": float(inspector_out.get("confidence", 0.0) or 0.0),
101
- "headline": (reporter_out.get("headline") or inspector_out.get("observation", "Inspection complete"))[:60],
102
- "defect_count": len(defects) if isinstance(defects, list) else 0,
103
- "priority": action_out.get("priority", "P2"),
104
- "source": inspection.get("source", "upload"),
105
- }
106
-
107
- async def _seed_journal():
108
- """Seed the journal with initial milestones (instant, no LLM calls)."""
109
- existing = await _db_list_journal(1)
110
- if existing:
111
- return
112
- seeds = [
113
- {
114
- "title": "Kickoff: ForgeSight on AMD Developer Cloud",
115
- "body": "Spun up an MI300X instance on AMD Developer Cloud. First impression: zero CUDA-lock-in, ROCm + PyTorch just worked.",
116
- "tags": ["kickoff", "amd", "rocm"],
117
- "x_post": "🚀 ForgeSight is live! We've officially spun up an AMD Instinct MI300X instance on the Developer Cloud. Zero CUDA-lock-in, just raw ROCm power. #AMDHackathon #ROCm #AIatAMD @lablab @AIatAMD",
118
- "linkedin_post": "We've officially kicked off ForgeSight for the AMD + lablab.ai Hackathon! We're leveraging the massive 192GB VRAM of the MI300X to build a production-ready QC pipeline. #AI #AMD #Engineering",
119
- },
120
- {
121
- "title": "Multi-agent pipeline wired end-to-end",
122
- "body": "Inspector → Diagnostician → Action → Reporter. Each agent produces strict JSON so hand-offs stay auditable.",
123
- "tags": ["agents", "pipeline", "qwen"],
124
- "x_post": "Our 4-agent pipeline is wired! Inspector → Diagnostician → Action → Reporter. Real-time vision reasoning on MI300X. #AIatAMD #AMDHackathon @lablab",
125
- "linkedin_post": "Auditability is key in industrial QC. ForgeSight's multi-agent pipeline ensures every decision is grounded in structured data. #QualityControl #Agents",
126
- },
127
- ]
128
- for s in seeds:
129
- entry = {
130
- "id": str(uuid.uuid4()),
131
- "created_at": _now_iso(),
132
- **s,
133
- }
134
- await _db_insert_journal(entry)
135
-
136
- # ── API LOGIC ─────────────────────────────────────────────────────────────────
137
 
138
- async def api_get_telemetry():
139
- t = time.time()
140
- status = "Connected"
141
- error_msg = None
142
-
143
- # FOR HACKATHON DEMO: Simulated data for premium UI visuals
144
- gpu_util = 65 + 25 * math.sin(t / 4.0)
145
- vram_used = 142.0 + 10 * math.sin(t / 6.0)
146
- tokens_per_sec = int(2700 + 300 * math.sin(t / 3.0))
147
- power_w = int(480 + 50 * math.sin(t / 5.0))
148
 
149
- return {
150
- "gpu_util_pct": round(gpu_util, 1),
151
- "vram_used_gb": round(vram_used, 1),
152
- "vram_total_gb": 192.0,
153
- "temp_c": round(64 + 4 * math.sin(t / 7.0), 1),
154
- "power_watts": power_w,
155
- "tokens_per_sec": tokens_per_sec,
156
- "device": "AMD Instinct MI300X",
157
- "status": status,
158
- "is_simulated": True,
159
- "persistence": "MongoDB" if _inspections_col is not None else "In-Memory",
160
- "ts": _now_iso(),
161
- }
162
 
163
- # ── FASTAPI SETUP ─────────────────────────────────────────────────────────────
164
-
165
- app = FastAPI(title="ForgeSight API")
166
 
167
  app.add_middleware(
168
  CORSMiddleware,
@@ -171,143 +24,27 @@ app.add_middleware(
171
  allow_headers=["*"],
172
  )
173
 
174
- @app.on_event("startup")
175
- async def startup_event():
176
- await _init_db()
177
- await _seed_journal()
 
 
 
 
 
 
 
 
178
 
179
- @app.get("/api")
180
  @app.get("/api/health")
181
- async def handle_health():
182
- return {"status": "online", "service": "forgesight", "db": "connected" if _inspections_col is not None else "memory"}
 
183
 
184
  @app.get("/api/inspections")
185
- async def get_inspections(limit: int = 50):
186
- docs = await _db_list_inspections(limit)
187
- items = [_summarize(doc) for doc in docs]
188
- return {"items": items, "total": len(items)}
189
-
190
- @app.post("/api/inspections")
191
- async def create_inspection(request: Request):
192
- data = await request.json()
193
- image_base64 = data.get("image_base64", "")
194
- notes = data.get("notes", "")
195
- product_spec = data.get("product_spec", "")
196
- source = data.get("source", "upload")
197
-
198
- if image_base64 and "," in image_base64:
199
- image_base64 = image_base64.split(",")[1]
200
-
201
- transcript = await run_pipeline(image_base64, notes, product_spec)
202
-
203
- inspection = {
204
- "id": str(uuid.uuid4()),
205
- "created_at": _now_iso(),
206
- "notes": notes or "",
207
- "product_spec": product_spec or "",
208
- "source": source or "upload",
209
- "transcript": transcript,
210
- }
211
- await _db_insert_inspection(inspection)
212
- return inspection
213
-
214
- @app.get("/api/inspections/{inspection_id}")
215
- async def get_inspection(inspection_id: str):
216
- inspection = None
217
- if _inspections_col is not None:
218
- inspection = await _inspections_col.find_one({"id": inspection_id}, {"_id": 0})
219
- else:
220
- inspection = next((i for i in _mem_inspections if i["id"] == inspection_id), None)
221
-
222
- if not inspection:
223
- return JSONResponse({"detail": "Inspection not found"}, status_code=404)
224
- return inspection
225
-
226
- @app.get("/api/metrics")
227
- async def get_metrics():
228
- docs = await _db_list_inspections(500)
229
- total = len(docs)
230
- verdict_counts = {"pass": 0, "warn": 0, "fail": 0}
231
- defect_type_counts = {}
232
- confidences = []
233
-
234
- for doc in docs:
235
- summary = _summarize(doc)
236
- v = summary["verdict"] if summary["verdict"] in verdict_counts else "warn"
237
- verdict_counts[v] += 1
238
- confidences.append(summary["confidence"])
239
- agents = doc.get("transcript", {}).get("agents", [])
240
- inspector = next((a for a in agents if a["role"] == "inspector"), None)
241
- defects = ((inspector or {}).get("output", {}).get("parsed", {}) or {}).get("defects") or []
242
- if isinstance(defects, list):
243
- for d in defects:
244
- if isinstance(d, dict):
245
- t = (d.get("type") or "unknown").lower()
246
- defect_type_counts[t] = defect_type_counts.get(t, 0) + 1
247
-
248
- avg_conf = sum(confidences) / len(confidences) if confidences else 0.0
249
- top_defects = sorted(defect_type_counts.items(), key=lambda x: x[1], reverse=True)[:6]
250
- quality_score = round(100 * (verdict_counts["pass"] + 0.5 * verdict_counts["warn"]) / total) if total > 0 else 100
251
-
252
- return {
253
- "total_inspections": total,
254
- "verdict_counts": verdict_counts,
255
- "avg_confidence": round(avg_conf, 3),
256
- "top_defects": [{"type": t, "count": c} for t, c in top_defects],
257
- "quality_score": quality_score,
258
- }
259
-
260
- @app.get("/api/telemetry")
261
- async def get_telemetry():
262
- return await api_get_telemetry()
263
-
264
- @app.get("/api/blueprint")
265
- async def get_blueprint():
266
- return {
267
- "stack": [
268
- {"layer": "Hardware", "title": "AMD Instinct MI300X", "detail": "192 GB HBM3 · 5.3 TB/s bandwidth", "why": "Enables massive VRAM pools for multimodal Qwen-VL."},
269
- {"layer": "Runtime", "title": "ROCm 6.2", "detail": "Open compute stack · PyTorch 2.4", "why": "Native AMD acceleration without CUDA lock-in."},
270
- {"layer": "Serving", "title": "vLLM", "detail": "PagedAttention · continuous batching", "why": "High-throughput serving for agentic chains."},
271
- {"layer": "Model", "title": "Qwen2-VL-72B", "detail": "Fine-tuned for structural defects", "why": "Domain-specialized vision reasoning."},
272
- {"layer": "Agents", "title": "Sequential Agentic Chain", "detail": "Structured JSON hand-offs", "why": "Auditability and reliability."},
273
- ]
274
- }
275
-
276
- @app.get("/api/journal")
277
- async def list_journal():
278
- items = await _db_list_journal(50)
279
- if not items:
280
- await _seed_journal()
281
- items = await _db_list_journal(50)
282
- return {"items": items, "total": len(items)}
283
-
284
- @app.post("/api/journal")
285
- async def create_journal(request: Request):
286
- data = await request.json()
287
- title = data.get("title", "")
288
- body = data.get("body", "")
289
- tags = data.get("tags", [])
290
-
291
- try:
292
- social = await generate_social_post(title, body)
293
- except:
294
- social = {"x_post": "", "linkedin_post": ""}
295
-
296
- entry = {
297
- "id": str(uuid.uuid4()),
298
- "created_at": _now_iso(),
299
- "title": title,
300
- "body": body,
301
- "tags": tags,
302
- "x_post": social.get("x_post", ""),
303
- "linkedin_post": social.get("linkedin_post", ""),
304
- }
305
- await _db_insert_journal(entry)
306
- return entry
307
-
308
- # Mount Gradio - REMOVED for Vercel deployment to stay under size limits
309
- # app = gr.mount_gradio_app(app, demo, path="/gradio")
310
 
311
- if __name__ == "__main__":
312
- import uvicorn
313
- uvicorn.run(app, host="0.0.0.0", port=7860)
 
1
  import os
2
+ import sys
3
+ import traceback
 
 
 
 
 
 
 
 
 
 
4
  from fastapi import FastAPI, Request
5
+ from fastapi.responses import JSONResponse
6
  from fastapi.middleware.cors import CORSMiddleware
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
+ # Force current directory into path for local imports
9
+ sys.path.append(os.path.dirname(__file__))
 
 
 
 
 
 
 
 
10
 
11
+ # Global error capture for imports
12
+ IMPORT_ERROR = None
13
+ try:
14
+ from agents import run_pipeline, AMD_INFERENCE_URL, AMD_MODEL_NAME, AMD_INFERENCE_TOKEN, generate_social_post
15
+ except Exception as e:
16
+ IMPORT_ERROR = f"Import Error: {str(e)}\n{traceback.format_exc()}"
 
 
 
 
 
 
 
17
 
18
+ app = FastAPI(title="ForgeSight Debug API")
 
 
19
 
20
  app.add_middleware(
21
  CORSMiddleware,
 
24
  allow_headers=["*"],
25
  )
26
 
27
+ @app.middleware("http")
28
+ async def error_logging_middleware(request: Request, call_next):
29
+ if IMPORT_ERROR:
30
+ return JSONResponse({"status": "error", "message": IMPORT_ERROR}, status_code=500)
31
+ try:
32
+ return await call_next(request)
33
+ except Exception as e:
34
+ return JSONResponse({
35
+ "status": "error",
36
+ "message": str(e),
37
+ "traceback": traceback.format_exc()
38
+ }, status_code=500)
39
 
 
40
  @app.get("/api/health")
41
+ @app.get("/_/backend/api/health")
42
+ async def health():
43
+ return {"status": "online", "debug": True, "cwd": os.getcwd(), "path": sys.path}
44
 
45
  @app.get("/api/inspections")
46
+ @app.get("/_/backend/api/inspections")
47
+ async def list_inspections():
48
+ return {"items": [], "total": 0, "note": "Debug mode"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
+ # ... other minimal routes to avoid crashes ...