Spaces:

Abhay557
/

code-collab

Running

App Files Files Community

Abhay557 commited on Mar 14

Commit

3e13dcc

verified ·

1 Parent(s): 462230f

Update app.py

Browse files

Files changed (1) hide show

app.py +48 -53

app.py CHANGED Viewed

@@ -1,27 +1,25 @@
 """
-Code Collab AI Backend — Fast code generation API
-Uses Qwen 2.5 Coder 0.5B GGUF for low-latency HTML/CSS/JS generation.
-Model is loaded once at startup and stays in memory forever.
 """
 import os
 import re
 import time
-from contextlib import asynccontextmanager
 from fastapi import FastAPI, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
-from llama_cpp import Llama
 # ─── Config ────────────────────────────────────────────────────────────
-MODEL_PATH = os.path.join(os.path.dirname(__file__), "qwen2.5-coder-0.5b-instruct-q4_k_m.gguf")
-N_CTX = 1536        # Smaller context = faster (free tier)
-N_THREADS = 2       # HF free tier has 2 vCPU
-MAX_TOKENS = 512    # Keep output short for speed
-TEMPERATURE = 0.5   # Lower = faster + more deterministic
-# ─── System prompt (optimized for structured output) ───────────────────
 SYSTEM_PROMPT = """You are a web code generator. Given a user request, output ONLY three fenced code blocks:
 ```html
@@ -39,31 +37,17 @@ SYSTEM_PROMPT = """You are a web code generator. Given a user request, output ON
 Rules:
 - No explanations, no markdown text outside code blocks
 - If a section is not needed, output an empty code block for it
-- Write clean, modern code"""
-# ─── Global model reference (loaded once, stays forever) ──────────────
-llm = None
-@asynccontextmanager
-async def lifespan(app: FastAPI):
-    """Load model once at startup. It stays in memory for the entire lifetime."""
-    global llm
-    print(f"🔄 Loading model from {MODEL_PATH}...")
-    llm = Llama(
-        model_path=MODEL_PATH,
-        n_ctx=N_CTX,
-        n_threads=N_THREADS,
-        n_gpu_layers=0,  # CPU only (free HF Spaces)
-        verbose=False,
-    )
-    print("🚀 Model loaded and ready! It will stay in memory forever.")
-    yield
-    # Model stays loaded — never unloaded
 # ─── FastAPI App ───────────────────────────────────────────────────────
-app = FastAPI(title="Code Collab AI", lifespan=lifespan)
 app.add_middleware(
     CORSMiddleware,
@@ -72,13 +56,16 @@ app.add_middleware(
     allow_headers=["*"],
 )
 class GenerateRequest(BaseModel):
-    prompt: str
     max_tokens: int = MAX_TOKENS
     temperature: float = TEMPERATURE
 class GenerateResponse(BaseModel):
     html: str
     css: str
@@ -86,7 +73,6 @@ class GenerateResponse(BaseModel):
     raw: str
     time_ms: int
 def parse_code_blocks(text: str) -> dict:
     """Extract HTML, CSS, JS from fenced code blocks."""
     result = {"html": "", "css": "", "js": ""}
@@ -108,34 +94,43 @@ def parse_code_blocks(text: str) -> dict:
     return result
 # ─── API Endpoints ─────────────────────────────────────────────────────
 @app.get("/")
 def health():
-    return {"status": "ok", "model": "Qwen2.5-Coder-0.5B-Instruct-GGUF", "loaded": llm is not None}
 @app.post("/generate", response_model=GenerateResponse)
 def generate(req: GenerateRequest):
-    if llm is None:
-        raise HTTPException(503, "Model not loaded yet")
-    if not req.prompt.strip():
-        raise HTTPException(400, "Prompt cannot be empty")
     start = time.time()
-    output = llm.create_chat_completion(
-        messages=[
-            {"role": "system", "content": SYSTEM_PROMPT},
-            {"role": "user", "content": req.prompt},
-        ],
-        max_tokens=req.max_tokens,
-        temperature=req.temperature,
-    )
-    raw_text = output["choices"][0]["message"]["content"]
     elapsed_ms = int((time.time() - start) * 1000)
     parsed = parse_code_blocks(raw_text)
@@ -146,4 +141,4 @@ def generate(req: GenerateRequest):
         js=parsed["js"],
         raw=raw_text,
         time_ms=elapsed_ms,
-    )

 """
+Code Collab AI Backend — Fast code generation API (Groq Edition)
+Now with Conversation History Support!
 """
 import os
 import re
 import time
+from typing import List
 from fastapi import FastAPI, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
+from groq import Groq
 # ─── Config ────────────────────────────────────────────────────────────
+GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
+MODEL_NAME = "qwen-2.5-coder-32b"
+MAX_TOKENS = 2048   # Increased slightly for iterative changes
+TEMPERATURE = 0.5
+# ─── System prompt ─────────────────────────────────────────────────────
 SYSTEM_PROMPT = """You are a web code generator. Given a user request, output ONLY three fenced code blocks:
 ```html
 Rules:
 - No explanations, no markdown text outside code blocks
 - If a section is not needed, output an empty code block for it
+- Write clean, modern code
+- When the user asks for edits, rewrite the FULL code blocks with the changes applied."""
+# ─── Groq Client Setup ─────────────────────────────────────────────────
+if not GROQ_API_KEY:
+    print("⚠️ WARNING: GROQ_API_KEY environment variable is not set!")
+client = Groq(api_key=GROQ_API_KEY)
 # ─── FastAPI App ───────────────────────────────────────────────────────
+app = FastAPI(title="Code Collab AI")
 app.add_middleware(
     CORSMiddleware,
     allow_headers=["*"],
 )
+# New Message model to handle history
+class Message(BaseModel):
+    role: str
+    content: str
 class GenerateRequest(BaseModel):
+    messages: List[Message]
     max_tokens: int = MAX_TOKENS
     temperature: float = TEMPERATURE
 class GenerateResponse(BaseModel):
     html: str
     css: str
     raw: str
     time_ms: int
 def parse_code_blocks(text: str) -> dict:
     """Extract HTML, CSS, JS from fenced code blocks."""
     result = {"html": "", "css": "", "js": ""}
     return result
 # ─── API Endpoints ─────────────────────────────────────────────────────
 @app.get("/")
 def health():
+    return {
+        "status": "ok",
+        "provider": "Groq",
+        "model": MODEL_NAME,
+        "api_key_configured": GROQ_API_KEY is not None
+    }
 @app.post("/generate", response_model=GenerateResponse)
 def generate(req: GenerateRequest):
+    if not GROQ_API_KEY:
+        raise HTTPException(status_code=500, detail="Groq API key is missing on the server")
+    if not req.messages:
+        raise HTTPException(status_code=400, detail="Message history cannot be empty")
     start = time.time()
+    # Prepend the system prompt to the conversation history
+    api_messages = [{"role": "system", "content": SYSTEM_PROMPT}]
+    for msg in req.messages:
+        api_messages.append({"role": msg.role, "content": msg.content})
+    try:
+        output = client.chat.completions.create(
+            model=MODEL_NAME,
+            messages=api_messages,
+            max_tokens=req.max_tokens,
+            temperature=req.temperature,
+        )
+    except Exception as e:
+        raise HTTPException(status_code=502, detail=f"Groq API error: {str(e)}")
+    raw_text = output.choices[0].message.content
     elapsed_ms = int((time.time() - start) * 1000)
     parsed = parse_code_blocks(raw_text)
         js=parsed["js"],
         raw=raw_text,
         time_ms=elapsed_ms,
+    )