Spaces:

SharmaGroups07
/

coding_engine

Running

App Files Files Community

SharmaGroups07 commited on Feb 19

Commit

c7135bb

verified ·

1 Parent(s): 48b1cf8

Update app.py

Browse files

Files changed (1) hide show

app.py +70 -9

app.py CHANGED Viewed

@@ -2,9 +2,14 @@ from fastapi import FastAPI
 from pydantic import BaseModel
 from llama_cpp import Llama
 from huggingface_hub import hf_hub_download
 app = FastAPI()
 MODEL_REPO = "bartowski/Qwen2.5-Coder-1.5B-Instruct-GGUF"
 MODEL_FILE = "Qwen2.5-Coder-1.5B-Instruct-Q4_K_M.gguf"
@@ -13,33 +18,89 @@ model_path = hf_hub_download(
     filename=MODEL_FILE
 )
 llm = Llama(
     model_path=model_path,
-    n_ctx=2048,
-    n_threads=2
 )
 class ChatRequest(BaseModel):
     message: str
 @app.get("/")
 def root():
-    return {"status": "AI engine running"}
 @app.post("/chat")
 def chat(req: ChatRequest):
     output = llm(
-        f"<|user|>{req.message}<|assistant|>",
-        max_tokens=512,
-        temperature=0.7,
         top_p=0.9,
-        repeat_penalty=1.1,
         stop=["<|end|>"]
     )
-    return {"reply": output["choices"][0]["text"]}
-# ⭐ THIS PART WAS MISSING
 if __name__ == "__main__":
     import uvicorn
     uvicorn.run(app, host="0.0.0.0", port=7860)

 from pydantic import BaseModel
 from llama_cpp import Llama
 from huggingface_hub import hf_hub_download
+import multiprocessing
 app = FastAPI()
+# ===============================
+# MODEL CONFIG
+# ===============================
 MODEL_REPO = "bartowski/Qwen2.5-Coder-1.5B-Instruct-GGUF"
 MODEL_FILE = "Qwen2.5-Coder-1.5B-Instruct-Q4_K_M.gguf"
     filename=MODEL_FILE
 )
+# ===============================
+# LLM INITIALIZATION (OPTIMIZED)
+# ===============================
 llm = Llama(
     model_path=model_path,
+    # Larger context for coding tasks
+    n_ctx=8192,
+    # Use all CPU cores
+    n_threads=multiprocessing.cpu_count(),
+    # CPU inference
+    n_gpu_layers=0,
+    # PERFORMANCE BOOST
+    n_batch=512,
+    use_mmap=True,
+    use_mlock=True,
 )
+# ===============================
+# REQUEST MODEL
+# ===============================
 class ChatRequest(BaseModel):
     message: str
+# ===============================
+# HEALTH CHECK
+# ===============================
 @app.get("/")
 def root():
+    return {"status": "Coding AI engine running"}
+# ===============================
+# CHAT ENDPOINT
+# ===============================
 @app.post("/chat")
 def chat(req: ChatRequest):
+    # CODING SPECIALIZED SYSTEM PROMPT
+    system_prompt = (
+        "<|system|>"
+        "You are an elite senior software engineer AI. "
+        "Write clean, production-ready code. "
+        "Always include comments. "
+        "Use best practices, error handling, and optimization. "
+        "Format output in proper markdown with code blocks."
+        "<|end|>"
+    )
+    prompt = system_prompt + f"<|user|>{req.message}<|assistant|>"
     output = llm(
+        prompt,
+        # Larger token output for code
+        max_tokens=800,
+        # Lower randomness = better code
+        temperature=0.4,
+        # Stable generation
         top_p=0.9,
+        # Prevent repetition loops
+        repeat_penalty=1.2,
         stop=["<|end|>"]
     )
+    response_text = output["choices"][0]["text"].strip()
+    return {"reply": response_text}
+# ===============================
+# LOCAL RUN
+# ===============================
 if __name__ == "__main__":
     import uvicorn
     uvicorn.run(app, host="0.0.0.0", port=7860)