Abhay557 commited on
Commit
3e13dcc
Β·
verified Β·
1 Parent(s): 462230f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -53
app.py CHANGED
@@ -1,27 +1,25 @@
1
  """
2
- Code Collab AI Backend β€” Fast code generation API
3
- Uses Qwen 2.5 Coder 0.5B GGUF for low-latency HTML/CSS/JS generation.
4
- Model is loaded once at startup and stays in memory forever.
5
  """
6
 
7
  import os
8
  import re
9
  import time
10
- from contextlib import asynccontextmanager
11
 
12
  from fastapi import FastAPI, HTTPException
13
  from fastapi.middleware.cors import CORSMiddleware
14
  from pydantic import BaseModel
15
- from llama_cpp import Llama
16
 
17
  # ─── Config ────────────────────────────────────────────────────────────
18
- MODEL_PATH = os.path.join(os.path.dirname(__file__), "qwen2.5-coder-0.5b-instruct-q4_k_m.gguf")
19
- N_CTX = 1536 # Smaller context = faster (free tier)
20
- N_THREADS = 2 # HF free tier has 2 vCPU
21
- MAX_TOKENS = 512 # Keep output short for speed
22
- TEMPERATURE = 0.5 # Lower = faster + more deterministic
23
 
24
- # ─── System prompt (optimized for structured output) ───────────────────
25
  SYSTEM_PROMPT = """You are a web code generator. Given a user request, output ONLY three fenced code blocks:
26
 
27
  ```html
@@ -39,31 +37,17 @@ SYSTEM_PROMPT = """You are a web code generator. Given a user request, output ON
39
  Rules:
40
  - No explanations, no markdown text outside code blocks
41
  - If a section is not needed, output an empty code block for it
42
- - Write clean, modern code"""
 
43
 
44
- # ─── Global model reference (loaded once, stays forever) ──────────────
45
- llm = None
46
-
47
-
48
- @asynccontextmanager
49
- async def lifespan(app: FastAPI):
50
- """Load model once at startup. It stays in memory for the entire lifetime."""
51
- global llm
52
- print(f"πŸ”„ Loading model from {MODEL_PATH}...")
53
- llm = Llama(
54
- model_path=MODEL_PATH,
55
- n_ctx=N_CTX,
56
- n_threads=N_THREADS,
57
- n_gpu_layers=0, # CPU only (free HF Spaces)
58
- verbose=False,
59
- )
60
- print("πŸš€ Model loaded and ready! It will stay in memory forever.")
61
- yield
62
- # Model stays loaded β€” never unloaded
63
 
 
64
 
65
  # ─── FastAPI App ───────────────────────────────────────────────────────
66
- app = FastAPI(title="Code Collab AI", lifespan=lifespan)
67
 
68
  app.add_middleware(
69
  CORSMiddleware,
@@ -72,13 +56,16 @@ app.add_middleware(
72
  allow_headers=["*"],
73
  )
74
 
 
 
 
 
75
 
76
  class GenerateRequest(BaseModel):
77
- prompt: str
78
  max_tokens: int = MAX_TOKENS
79
  temperature: float = TEMPERATURE
80
 
81
-
82
  class GenerateResponse(BaseModel):
83
  html: str
84
  css: str
@@ -86,7 +73,6 @@ class GenerateResponse(BaseModel):
86
  raw: str
87
  time_ms: int
88
 
89
-
90
  def parse_code_blocks(text: str) -> dict:
91
  """Extract HTML, CSS, JS from fenced code blocks."""
92
  result = {"html": "", "css": "", "js": ""}
@@ -108,34 +94,43 @@ def parse_code_blocks(text: str) -> dict:
108
 
109
  return result
110
 
111
-
112
  # ─── API Endpoints ─────────────────────────────────────────────────────
113
 
114
  @app.get("/")
115
  def health():
116
- return {"status": "ok", "model": "Qwen2.5-Coder-0.5B-Instruct-GGUF", "loaded": llm is not None}
117
-
 
 
 
 
118
 
119
  @app.post("/generate", response_model=GenerateResponse)
120
  def generate(req: GenerateRequest):
121
- if llm is None:
122
- raise HTTPException(503, "Model not loaded yet")
123
 
124
- if not req.prompt.strip():
125
- raise HTTPException(400, "Prompt cannot be empty")
126
 
127
  start = time.time()
128
 
129
- output = llm.create_chat_completion(
130
- messages=[
131
- {"role": "system", "content": SYSTEM_PROMPT},
132
- {"role": "user", "content": req.prompt},
133
- ],
134
- max_tokens=req.max_tokens,
135
- temperature=req.temperature,
136
- )
137
-
138
- raw_text = output["choices"][0]["message"]["content"]
 
 
 
 
 
 
139
  elapsed_ms = int((time.time() - start) * 1000)
140
 
141
  parsed = parse_code_blocks(raw_text)
@@ -146,4 +141,4 @@ def generate(req: GenerateRequest):
146
  js=parsed["js"],
147
  raw=raw_text,
148
  time_ms=elapsed_ms,
149
- )
 
1
  """
2
+ Code Collab AI Backend β€” Fast code generation API (Groq Edition)
3
+ Now with Conversation History Support!
 
4
  """
5
 
6
  import os
7
  import re
8
  import time
9
+ from typing import List
10
 
11
  from fastapi import FastAPI, HTTPException
12
  from fastapi.middleware.cors import CORSMiddleware
13
  from pydantic import BaseModel
14
+ from groq import Groq
15
 
16
  # ─── Config ────────────────────────────────────────────────────────────
17
+ GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
18
+ MODEL_NAME = "qwen-2.5-coder-32b"
19
+ MAX_TOKENS = 2048 # Increased slightly for iterative changes
20
+ TEMPERATURE = 0.5
 
21
 
22
+ # ─── System prompt ─────────────────────────────────────────────────────
23
  SYSTEM_PROMPT = """You are a web code generator. Given a user request, output ONLY three fenced code blocks:
24
 
25
  ```html
 
37
  Rules:
38
  - No explanations, no markdown text outside code blocks
39
  - If a section is not needed, output an empty code block for it
40
+ - Write clean, modern code
41
+ - When the user asks for edits, rewrite the FULL code blocks with the changes applied."""
42
 
43
+ # ─── Groq Client Setup ─────────────────────────────────────────────────
44
+ if not GROQ_API_KEY:
45
+ print("⚠️ WARNING: GROQ_API_KEY environment variable is not set!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
+ client = Groq(api_key=GROQ_API_KEY)
48
 
49
  # ─── FastAPI App ───────────────────────────────────────────────────────
50
+ app = FastAPI(title="Code Collab AI")
51
 
52
  app.add_middleware(
53
  CORSMiddleware,
 
56
  allow_headers=["*"],
57
  )
58
 
59
+ # New Message model to handle history
60
+ class Message(BaseModel):
61
+ role: str
62
+ content: str
63
 
64
  class GenerateRequest(BaseModel):
65
+ messages: List[Message]
66
  max_tokens: int = MAX_TOKENS
67
  temperature: float = TEMPERATURE
68
 
 
69
  class GenerateResponse(BaseModel):
70
  html: str
71
  css: str
 
73
  raw: str
74
  time_ms: int
75
 
 
76
  def parse_code_blocks(text: str) -> dict:
77
  """Extract HTML, CSS, JS from fenced code blocks."""
78
  result = {"html": "", "css": "", "js": ""}
 
94
 
95
  return result
96
 
 
97
  # ─── API Endpoints ─────────────────────────────────────────────────────
98
 
99
  @app.get("/")
100
  def health():
101
+ return {
102
+ "status": "ok",
103
+ "provider": "Groq",
104
+ "model": MODEL_NAME,
105
+ "api_key_configured": GROQ_API_KEY is not None
106
+ }
107
 
108
  @app.post("/generate", response_model=GenerateResponse)
109
  def generate(req: GenerateRequest):
110
+ if not GROQ_API_KEY:
111
+ raise HTTPException(status_code=500, detail="Groq API key is missing on the server")
112
 
113
+ if not req.messages:
114
+ raise HTTPException(status_code=400, detail="Message history cannot be empty")
115
 
116
  start = time.time()
117
 
118
+ # Prepend the system prompt to the conversation history
119
+ api_messages = [{"role": "system", "content": SYSTEM_PROMPT}]
120
+ for msg in req.messages:
121
+ api_messages.append({"role": msg.role, "content": msg.content})
122
+
123
+ try:
124
+ output = client.chat.completions.create(
125
+ model=MODEL_NAME,
126
+ messages=api_messages,
127
+ max_tokens=req.max_tokens,
128
+ temperature=req.temperature,
129
+ )
130
+ except Exception as e:
131
+ raise HTTPException(status_code=502, detail=f"Groq API error: {str(e)}")
132
+
133
+ raw_text = output.choices[0].message.content
134
  elapsed_ms = int((time.time() - start) * 1000)
135
 
136
  parsed = parse_code_blocks(raw_text)
 
141
  js=parsed["js"],
142
  raw=raw_text,
143
  time_ms=elapsed_ms,
144
+ )