Spaces:

Loomisgitarrist
/

personal-coder-ai

Sleeping

App Files Files Community

Loomis Green commited on Jan 25

Commit

5c3cb1b

1 Parent(s): 086c91c

Switch to Qwen2.5-Coder-14B-Instruct-Uncensored GGUF

Browse files

Files changed (3) hide show

app.py +35 -37
requirements.txt +2 -5
static/index.html +1 -1

app.py CHANGED Viewed

@@ -1,22 +1,34 @@
-from fastapi import FastAPI, Request
 from fastapi.staticfiles import StaticFiles
 from fastapi.middleware.cors import CORSMiddleware
-from fastapi.responses import FileResponse, JSONResponse
-from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
-import torch
-# Load model (Switching to Qwen2.5-1.5B-Instruct for significantly better logic/reasoning)
-print("Loading Qwen2.5-1.5B-Instruct Model...")
-checkpoint = "Qwen/Qwen2.5-1.5B-Instruct"
-tokenizer = AutoTokenizer.from_pretrained(checkpoint)
-model = AutoModelForCausalLM.from_pretrained(checkpoint)
-pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
 print("Model Loaded Successfully!")
 app = FastAPI()
 # Global Conversation History (Simple Server-Side Memory)
-# We initialize with a system prompt that defines the persona clearly.
 DEFAULT_SYSTEM_PROMPT = {
     "role": "system",
     "content": (
@@ -24,6 +36,7 @@ DEFAULT_SYSTEM_PROMPT = {
         "You are chatting with a user named Loomis (unless they tell you otherwise). "
         "Your name is Loomyloo. The user's name is Loomis. "
         "Never confuse your name with the user's name. "
         "Keep your answers concise, friendly, and helpful."
     )
 }
@@ -55,42 +68,27 @@ def ask(prompt: str):
     conversation_history.append({"role": "user", "content": prompt})
     # 2. Prune History (Keep System Prompt + Last 10 exchanges)
-    # This prevents the context from getting too large for the model
     if len(conversation_history) > 21:
         conversation_history = [DEFAULT_SYSTEM_PROMPT] + conversation_history[-20:]
     print(f"Current History Length: {len(conversation_history)}")
-    # 3. Format Prompt for the Model
-    prompt_text = tokenizer.apply_chat_template(
-        conversation_history,
-        tokenize=False,
-        add_generation_prompt=True
-    )
-    # 4. Generate Response
-    result = pipe(
-        prompt_text,
-        max_new_tokens=512,  # Allow for longer, more thoughtful responses
-        do_sample=True,
-        temperature=0.7,
-        top_p=0.9,
-        return_full_text=False
     )
-    generated_text = result[0]['generated_text']
-    # Clean up (sometimes models output the role label)
-    if generated_text.startswith("assistant"):
-        generated_text = generated_text.replace("assistant", "", 1).strip()
-    # 5. Add Assistant Response to History
     conversation_history.append({"role": "assistant", "content": generated_text})
-    # 6. Return Result
-    # We explicitly update the result dict to include the cleaned text
-    result[0]['generated_text'] = generated_text
-    return result[0]
 # Serve Static Files
 app.mount("/static", StaticFiles(directory="static"), name="static")

+from fastapi import FastAPI
 from fastapi.staticfiles import StaticFiles
 from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import FileResponse
+from huggingface_hub import hf_hub_download
+from llama_cpp import Llama
+import os
+# Define Model details
+REPO_ID = "roleplaiapp/Qwen2.5-Coder-14B-Instruct-Uncensored-Q4_K_S-GGUF"
+FILENAME = "Qwen2.5-Coder-14B-Instruct-Uncensored.Q4_K_S.gguf"
+print(f"Downloading {FILENAME} from {REPO_ID}...")
+model_path = hf_hub_download(repo_id=REPO_ID, filename=FILENAME)
+print(f"Model downloaded to: {model_path}")
+print("Loading Llama model...")
+# Initialize Llama model
+# n_ctx=4096: Context window (RAM usage scales with this)
+# n_threads=2: Hugging Face Spaces free tier usually has 2 vCPUs
+llm = Llama(
+    model_path=model_path,
+    n_ctx=4096,
+    n_threads=2,
+    verbose=True
+)
 print("Model Loaded Successfully!")
 app = FastAPI()
 # Global Conversation History (Simple Server-Side Memory)
 DEFAULT_SYSTEM_PROMPT = {
     "role": "system",
     "content": (
         "You are chatting with a user named Loomis (unless they tell you otherwise). "
         "Your name is Loomyloo. The user's name is Loomis. "
         "Never confuse your name with the user's name. "
+        "You are running on the powerful Qwen2.5-Coder-14B-Instruct-Uncensored model. "
         "Keep your answers concise, friendly, and helpful."
     )
 }
     conversation_history.append({"role": "user", "content": prompt})
     # 2. Prune History (Keep System Prompt + Last 10 exchanges)
     if len(conversation_history) > 21:
         conversation_history = [DEFAULT_SYSTEM_PROMPT] + conversation_history[-20:]
     print(f"Current History Length: {len(conversation_history)}")
+    # 3. Generate Response using llama-cpp-python chat completion
+    response = llm.create_chat_completion(
+        messages=conversation_history,
+        max_tokens=512,
+        temperature=0.7,
+        top_p=0.9
     )
+    # Extract text from response
+    generated_text = response['choices'][0]['message']['content']
+    # 4. Add Assistant Response to History
     conversation_history.append({"role": "assistant", "content": generated_text})
+    # 5. Return Result (keeping format consistent with previous API)
+    return {"generated_text": generated_text}
 # Serve Static Files
 app.mount("/static", StaticFiles(directory="static"), name="static")

requirements.txt CHANGED Viewed

@@ -1,8 +1,5 @@
 fastapi[standard]
 uvicorn
-transformers
-torch
-torchvision
 aiofiles
-sentencepiece
-accelerate

 fastapi[standard]
 uvicorn
 aiofiles
+huggingface_hub
+llama-cpp-python

static/index.html CHANGED Viewed

@@ -81,7 +81,7 @@
 <body>
 <div id="chat-container">
-    <div class="message ai-message">Hello! I am Loomyloo (v4) (running on Qwen2.5-1.5B). I am much smarter now! How can I help you?</div>
 </div>
 <div id="input-area">

 <body>
 <div id="chat-container">
+    <div class="message ai-message">Hello! I am Loomyloo (v5) (running on Qwen2.5-Coder-14B-Instruct-Uncensored GGUF). I am powerful and uncensored! How can I help you?</div>
 </div>
 <div id="input-area">