Spaces:

focustiki
/

Data_eng_designer

Sleeping

App Files Files Community

Data_eng_designer / app.py

focustiki

Update app.py

a5ba31a verified about 2 months ago

raw

history blame contribute delete

10.6 kB


	"""
	Data Engineering Knowledge Assistant — FastAPI Server
	━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
	Routes:
	POST /api/chat → streaming SSE (with keep-alive to defeat HF proxy buffering)
	POST /api/chat-simple → plain JSON fallback (no streaming)
	POST /api/transcribe → voice → text via Groq Whisper
	GET /api/health → readiness probe
	POST /api/search → raw vector search (debug)
	* / → PWA frontend (static/)
	"""
	from __future__ import annotations

	import os
	import json
	import asyncio
	import tempfile
	from contextlib import asynccontextmanager
	from typing import List, Optional

	from fastapi import FastAPI, HTTPException, UploadFile, File
	from fastapi.middleware.cors import CORSMiddleware
	from fastapi.responses import StreamingResponse
	from fastapi.staticfiles import StaticFiles
	from pydantic import BaseModel

	# ──────────────────────────────────────────────────────────────────────────────
	# Global state
	# ──────────────────────────────────────────────────────────────────────────────

	rag_pipeline = None
	agent = None


	# ──────────────────────────────────────────────────────────────────────────────
	# Lifespan — init on startup
	# ──────────────────────────────────────────────────────────────────────────────

	@asynccontextmanager
	async def lifespan(app: FastAPI):
	global rag_pipeline, agent

	from rag import DataEngineeringRAG
	from agent import DataEngineeringAgent

	pdf_path = os.environ.get("PDF_PATH", "knowledge/data_engineering_patterns.pdf")
	groq_key = os.environ.get("GROQ_API_KEY", "")

	if not groq_key:
	print("⚠️ GROQ_API_KEY not set — get a free key at https://console.groq.com")
	else:
	print(f"✅ GROQ_API_KEY detected ({len(groq_key)} chars)")

	print("🚀 Starting Data Engineering Knowledge Assistant …")
	rag_pipeline = DataEngineeringRAG(pdf_path=pdf_path, groq_api_key=groq_key)
	rag_pipeline.initialize()

	agent = DataEngineeringAgent(rag=rag_pipeline, groq_api_key=groq_key)
	print("✅ Agent ready — listening for requests")

	yield
	print("👋 Shutting down")


	# ──────────────────────────────────────────────────────────────────────────────
	# App
	# ──────────────────────────────────────────────────────────────────────────────

	app = FastAPI(title="DE Knowledge Assistant", version="1.1.0", lifespan=lifespan)
	app.add_middleware(CORSMiddleware, allow_origins=[""], allow_methods=[""], allow_headers=["*"])


	# ──────────────────────────────────────────────────────────────────────────────
	# Schemas
	# ──────────────────────────────────────────────────────────────────────────────

	class ChatMessage(BaseModel):
	role: str
	content: str


	class ChatRequest(BaseModel):
	message: str
	history: Optional[List[ChatMessage]] = []
	stream: bool = True


	class SearchRequest(BaseModel):
	query: str
	k: int = 5


	# ──────────────────────────────────────────────────────────────────────────────
	# Routes
	# ──────────────────────────────────────────────────────────────────────────────

	@app.get("/api/health")
	async def health():
	return {
	"status": "healthy",
	"model": "llama-3.1-8b-instant (Groq)",
	"vectorstore_docs": rag_pipeline.get_doc_count() if rag_pipeline else 0,
	"groq_key_set": bool(os.environ.get("GROQ_API_KEY")),
	"version": "1.1.0",
	}


	@app.post("/api/chat")
	async def chat(req: ChatRequest):
	"""
	Streaming chat endpoint.

	Critical for HF Spaces: we must flush bytes immediately — Cloudflare/nginx
	will otherwise buffer the whole response until the generator finishes,
	defeating streaming entirely. We emit an SSE comment every second as a
	heartbeat so the proxy flushes the response chunk-by-chunk.
	"""
	if not agent:
	raise HTTPException(503, "Agent not initialised")

	history = [m.model_dump() for m in req.history]

	async def event_stream():
	# ── Force the proxy to start sending immediately ──────────────────
	yield ": keep-alive\n\n"

	# ── Heartbeat task — keeps the connection flowing in slow moments ──
	heartbeat_stop = asyncio.Event()

	async def heartbeat(queue: asyncio.Queue):
	while not heartbeat_stop.is_set():
	try:
	await asyncio.wait_for(heartbeat_stop.wait(), timeout=1.0)
	except asyncio.TimeoutError:
	await queue.put(": ping\n\n")

	queue: asyncio.Queue = asyncio.Queue()
	hb_task = asyncio.create_task(heartbeat(queue))

	async def producer():
	try:
	async for chunk in agent.astream(message=req.message, history=history):
	await queue.put(f"data: {json.dumps({'chunk': chunk})}\n\n")
	except Exception as exc:
	err = json.dumps({"chunk": f"\n\n⚠️ Server error: {type(exc).__name__}: {exc}"})
	await queue.put(f"data: {err}\n\n")
	finally:
	await queue.put("data: [DONE]\n\n")
	heartbeat_stop.set()

	prod_task = asyncio.create_task(producer())

	try:
	while True:
	item = await queue.get()
	yield item
	if item.startswith("data: [DONE]"):
	break
	finally:
	hb_task.cancel()
	prod_task.cancel()

	return StreamingResponse(
	event_stream(),
	media_type="text/event-stream",
	headers={
	"Cache-Control": "no-cache, no-transform",
	"X-Accel-Buffering": "no",
	"Connection": "keep-alive",
	},
	)


	@app.post("/api/chat-simple")
	async def chat_simple(req: ChatRequest):
	"""
	Non-streaming fallback. Used by the PWA when SSE is blocked
	(corporate proxies, some mobile networks, etc.)
	"""
	if not agent:
	raise HTTPException(503, "Agent not initialised")

	history = [m.model_dump() for m in req.history]

	try:
	# Drain the async generator into a single string
	chunks = []
	async for c in agent.astream(message=req.message, history=history):
	chunks.append(c)
	return {"response": "".join(chunks)}
	except Exception as exc:
	raise HTTPException(500, f"{type(exc).__name__}: {exc}")


	@app.post("/api/transcribe")
	async def transcribe(audio: UploadFile = File(...)):
	"""
	Voice-to-text via Groq Whisper (free tier).
	Accepts webm/ogg/mp3/wav. Returns {"text": "..."}.

	Why this beats the browser Web Speech API:
	• Works in Brave / Safari / iOS PWA (Web Speech needs Google's proxy)
	• Works inside HF Spaces iframe (no cross-origin STT issues)
	• Whisper accuracy >> webkitSpeechRecognition
	"""
	groq_key = os.environ.get("GROQ_API_KEY", "")
	if not groq_key:
	raise HTTPException(500, "GROQ_API_KEY not configured")

	try:
	from groq import Groq
	client = Groq(api_key=groq_key)

	# Save upload to a tmp file (Groq SDK wants a file-like object)
	suffix = "." + (audio.filename or "audio.webm").split(".")[-1]
	with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tmp:
	tmp.write(await audio.read())
	tmp_path = tmp.name

	with open(tmp_path, "rb") as f:
	result = client.audio.transcriptions.create(
	file=(audio.filename or "audio.webm", f.read()),
	model="whisper-large-v3-turbo", # fastest, free-tier friendly
	language="en",
	temperature=0.0,
	)

	os.unlink(tmp_path)
	return {"text": result.text}

	except Exception as exc:
	raise HTTPException(500, f"Transcription failed: {type(exc).__name__}: {exc}")


	@app.post("/api/search")
	async def search(req: SearchRequest):
	if not rag_pipeline:
	raise HTTPException(503, "RAG not initialised")
	return {"query": req.query, "results": rag_pipeline.search(req.query, k=req.k)}


	# ──────────────────────────────────────────────────────────────────────────────
	# Static frontend — mount LAST so API routes take priority
	# ──────────────────────────────────────────────────────────────────────────────

	app.mount("/", StaticFiles(directory="static", html=True), name="static")


	if __name__ == "__main__":
	import uvicorn
	port = int(os.environ.get("PORT", 7860))
	uvicorn.run("app:app", host="0.0.0.0", port=port, log_level="info")