Spaces:
Sleeping
Sleeping
Loomis Green commited on
Commit ·
c1444b0
1
Parent(s): 7e0866f
REVERT to Qwen2.5-Coder-1.5B (Restore Stable Deployment)
Browse files- API_DOCUMENTATION.md +4 -3
- app.py +4 -8
- monitor_deployment.py +50 -0
API_DOCUMENTATION.md
CHANGED
|
@@ -12,9 +12,10 @@ https://loomisgitarrist-personal-coder-ai.hf.space
|
|
| 12 |
## 📡 Endpoints
|
| 13 |
|
| 14 |
### 1. Chat Completion (`/ask`)
|
| 15 |
-
Generates a response from the **
|
| 16 |
-
|
| 17 |
-
|
|
|
|
| 18 |
|
| 19 |
- **Method:** `GET`
|
| 20 |
- **URL:** `/ask`
|
|
|
|
| 12 |
## 📡 Endpoints
|
| 13 |
|
| 14 |
### 1. Chat Completion (`/ask`)
|
| 15 |
+
Generates a response from the **Qwen 2.5 Coder 1.5B** model.
|
| 16 |
+
- **Type**: Coding Assistant
|
| 17 |
+
- **Status**: Active (Stable)
|
| 18 |
+
- **Context**: 20 Message Memory
|
| 19 |
|
| 20 |
- **Method:** `GET`
|
| 21 |
- **URL:** `/ask`
|
app.py
CHANGED
|
@@ -7,11 +7,7 @@ import torch
|
|
| 7 |
import os
|
| 8 |
|
| 9 |
# Define Model details
|
| 10 |
-
|
| 11 |
-
# 1. Uncensored (Dolphin)
|
| 12 |
-
# 2. Fast on CPU (1B params)
|
| 13 |
-
# 3. Fits easily in RAM (requires ~4GB vs 12GB for 3B models in fp32)
|
| 14 |
-
MODEL_ID = "cognitivecomputations/dolphin-2.9.3-llama3.2-1b"
|
| 15 |
|
| 16 |
print(f"Loading {MODEL_ID}...")
|
| 17 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
|
|
@@ -28,10 +24,10 @@ app = FastAPI()
|
|
| 28 |
DEFAULT_SYSTEM_PROMPT = {
|
| 29 |
"role": "system",
|
| 30 |
"content": (
|
| 31 |
-
"You are
|
| 32 |
"You are chatting with a user named Loomis. "
|
| 33 |
-
"You
|
| 34 |
-
"Keep your answers concise and
|
| 35 |
)
|
| 36 |
}
|
| 37 |
conversation_history = [DEFAULT_SYSTEM_PROMPT]
|
|
|
|
| 7 |
import os
|
| 8 |
|
| 9 |
# Define Model details
|
| 10 |
+
MODEL_ID = "Qwen/Qwen2.5-Coder-1.5B-Instruct"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
print(f"Loading {MODEL_ID}...")
|
| 13 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
|
|
|
|
| 24 |
DEFAULT_SYSTEM_PROMPT = {
|
| 25 |
"role": "system",
|
| 26 |
"content": (
|
| 27 |
+
"You are Qwen, a helpful and expert coding assistant. "
|
| 28 |
"You are chatting with a user named Loomis. "
|
| 29 |
+
"You provide accurate, efficient, and clean code solutions. "
|
| 30 |
+
"Keep your answers concise and focused on code."
|
| 31 |
)
|
| 32 |
}
|
| 33 |
conversation_history = [DEFAULT_SYSTEM_PROMPT]
|
monitor_deployment.py
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import time
|
| 2 |
+
import requests
|
| 3 |
+
import sys
|
| 4 |
+
|
| 5 |
+
API_URL = "https://loomisgitarrist-personal-coder-ai.hf.space"
|
| 6 |
+
|
| 7 |
+
def check_status():
|
| 8 |
+
print(f"Checking status of {API_URL}...")
|
| 9 |
+
|
| 10 |
+
# 1. Check Runtime Status API
|
| 11 |
+
try:
|
| 12 |
+
runtime_resp = requests.get(f"https://huggingface.co/api/spaces/Loomisgitarrist/personal-coder-ai/runtime")
|
| 13 |
+
print(f"Runtime Info: {runtime_resp.text}")
|
| 14 |
+
if "RUNNING" in runtime_resp.text:
|
| 15 |
+
print(">>> SPACE IS RUNNING! <<<")
|
| 16 |
+
return True
|
| 17 |
+
elif "BUILD_ERROR" in runtime_resp.text:
|
| 18 |
+
print("!!! BUILD ERROR DETECTED !!!")
|
| 19 |
+
return False
|
| 20 |
+
except Exception as e:
|
| 21 |
+
print(f"Runtime check failed: {e}")
|
| 22 |
+
|
| 23 |
+
# 2. Check Application Endpoint
|
| 24 |
+
try:
|
| 25 |
+
resp = requests.get(f"{API_URL}/")
|
| 26 |
+
print(f"Root Endpoint Status: {resp.status_code}")
|
| 27 |
+
if resp.status_code == 200:
|
| 28 |
+
print("Root endpoint is reachable.")
|
| 29 |
+
except Exception as e:
|
| 30 |
+
print(f"Root check failed: {e}")
|
| 31 |
+
|
| 32 |
+
return False
|
| 33 |
+
|
| 34 |
+
if __name__ == "__main__":
|
| 35 |
+
while True:
|
| 36 |
+
is_running = check_status()
|
| 37 |
+
if is_running:
|
| 38 |
+
# Try a test prompt
|
| 39 |
+
try:
|
| 40 |
+
print("Attempting test prompt...")
|
| 41 |
+
test_resp = requests.get(f"{API_URL}/ask", params={"prompt": "Hello"})
|
| 42 |
+
print(f"Test Prompt Response ({test_resp.status_code}): {test_resp.text}")
|
| 43 |
+
if test_resp.status_code == 200:
|
| 44 |
+
print("SUCCESS: API is fully functional.")
|
| 45 |
+
break
|
| 46 |
+
except Exception as e:
|
| 47 |
+
print(f"Test prompt failed: {e}")
|
| 48 |
+
|
| 49 |
+
print("Waiting 30 seconds...\n")
|
| 50 |
+
time.sleep(30)
|