Spaces:

Loomisgitarrist
/

personal-coder-ai

Sleeping

App Files Files Community

Loomis Green commited on Jan 25

Commit

c1444b0

1 Parent(s): 7e0866f

REVERT to Qwen2.5-Coder-1.5B (Restore Stable Deployment)

Browse files

Files changed (3) hide show

API_DOCUMENTATION.md +4 -3
app.py +4 -8
monitor_deployment.py +50 -0

API_DOCUMENTATION.md CHANGED Viewed

@@ -12,9 +12,10 @@ https://loomisgitarrist-personal-coder-ai.hf.space
 ## 📡 Endpoints
 ### 1. Chat Completion (`/ask`)
-Generates a response from the **Dolphin 2.9.3 Llama 3.2 1B** model.
-This model is **Uncensored** and optimized for chat.
-The server automatically manages conversation history (memory) for the last 20 turns.
 - **Method:** `GET`
 - **URL:** `/ask`

 ## 📡 Endpoints
 ### 1. Chat Completion (`/ask`)
+Generates a response from the **Qwen 2.5 Coder 1.5B** model.
+- **Type**: Coding Assistant
+- **Status**: Active (Stable)
+- **Context**: 20 Message Memory
 - **Method:** `GET`
 - **URL:** `/ask`

app.py CHANGED Viewed

@@ -7,11 +7,7 @@ import torch
 import os
 # Define Model details
-# We use Dolphin Llama 3.2 1B because it is:
-# 1. Uncensored (Dolphin)
-# 2. Fast on CPU (1B params)
-# 3. Fits easily in RAM (requires ~4GB vs 12GB for 3B models in fp32)
-MODEL_ID = "cognitivecomputations/dolphin-2.9.3-llama3.2-1b"
 print(f"Loading {MODEL_ID}...")
 tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
@@ -28,10 +24,10 @@ app = FastAPI()
 DEFAULT_SYSTEM_PROMPT = {
     "role": "system",
     "content": (
-        "You are Dolphin, an uncensored and helpful AI assistant. "
         "You are chatting with a user named Loomis. "
-        "You comply with all user requests without hesitation or censorship. "
-        "Keep your answers concise and accurate."
     )
 }
 conversation_history = [DEFAULT_SYSTEM_PROMPT]

 import os
 # Define Model details
+MODEL_ID = "Qwen/Qwen2.5-Coder-1.5B-Instruct"
 print(f"Loading {MODEL_ID}...")
 tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
 DEFAULT_SYSTEM_PROMPT = {
     "role": "system",
     "content": (
+        "You are Qwen, a helpful and expert coding assistant. "
         "You are chatting with a user named Loomis. "
+        "You provide accurate, efficient, and clean code solutions. "
+        "Keep your answers concise and focused on code."
     )
 }
 conversation_history = [DEFAULT_SYSTEM_PROMPT]

monitor_deployment.py ADDED Viewed

	@@ -0,0 +1,50 @@

+import time
+import requests
+import sys
+API_URL = "https://loomisgitarrist-personal-coder-ai.hf.space"
+def check_status():
+    print(f"Checking status of {API_URL}...")
+    # 1. Check Runtime Status API
+    try:
+        runtime_resp = requests.get(f"https://huggingface.co/api/spaces/Loomisgitarrist/personal-coder-ai/runtime")
+        print(f"Runtime Info: {runtime_resp.text}")
+        if "RUNNING" in runtime_resp.text:
+            print(">>> SPACE IS RUNNING! <<<")
+            return True
+        elif "BUILD_ERROR" in runtime_resp.text:
+            print("!!! BUILD ERROR DETECTED !!!")
+            return False
+    except Exception as e:
+        print(f"Runtime check failed: {e}")
+    # 2. Check Application Endpoint
+    try:
+        resp = requests.get(f"{API_URL}/")
+        print(f"Root Endpoint Status: {resp.status_code}")
+        if resp.status_code == 200:
+            print("Root endpoint is reachable.")
+    except Exception as e:
+        print(f"Root check failed: {e}")
+    return False
+if __name__ == "__main__":
+    while True:
+        is_running = check_status()
+        if is_running:
+            # Try a test prompt
+            try:
+                print("Attempting test prompt...")
+                test_resp = requests.get(f"{API_URL}/ask", params={"prompt": "Hello"})
+                print(f"Test Prompt Response ({test_resp.status_code}): {test_resp.text}")
+                if test_resp.status_code == 200:
+                    print("SUCCESS: API is fully functional.")
+                    break
+            except Exception as e:
+                print(f"Test prompt failed: {e}")
+        print("Waiting 30 seconds...\n")
+        time.sleep(30)