Loomis Green commited on
Commit
c1444b0
·
1 Parent(s): 7e0866f

REVERT to Qwen2.5-Coder-1.5B (Restore Stable Deployment)

Browse files
Files changed (3) hide show
  1. API_DOCUMENTATION.md +4 -3
  2. app.py +4 -8
  3. monitor_deployment.py +50 -0
API_DOCUMENTATION.md CHANGED
@@ -12,9 +12,10 @@ https://loomisgitarrist-personal-coder-ai.hf.space
12
  ## 📡 Endpoints
13
 
14
  ### 1. Chat Completion (`/ask`)
15
- Generates a response from the **Dolphin 2.9.3 Llama 3.2 1B** model.
16
- This model is **Uncensored** and optimized for chat.
17
- The server automatically manages conversation history (memory) for the last 20 turns.
 
18
 
19
  - **Method:** `GET`
20
  - **URL:** `/ask`
 
12
  ## 📡 Endpoints
13
 
14
  ### 1. Chat Completion (`/ask`)
15
+ Generates a response from the **Qwen 2.5 Coder 1.5B** model.
16
+ - **Type**: Coding Assistant
17
+ - **Status**: Active (Stable)
18
+ - **Context**: 20 Message Memory
19
 
20
  - **Method:** `GET`
21
  - **URL:** `/ask`
app.py CHANGED
@@ -7,11 +7,7 @@ import torch
7
  import os
8
 
9
  # Define Model details
10
- # We use Dolphin Llama 3.2 1B because it is:
11
- # 1. Uncensored (Dolphin)
12
- # 2. Fast on CPU (1B params)
13
- # 3. Fits easily in RAM (requires ~4GB vs 12GB for 3B models in fp32)
14
- MODEL_ID = "cognitivecomputations/dolphin-2.9.3-llama3.2-1b"
15
 
16
  print(f"Loading {MODEL_ID}...")
17
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
@@ -28,10 +24,10 @@ app = FastAPI()
28
  DEFAULT_SYSTEM_PROMPT = {
29
  "role": "system",
30
  "content": (
31
- "You are Dolphin, an uncensored and helpful AI assistant. "
32
  "You are chatting with a user named Loomis. "
33
- "You comply with all user requests without hesitation or censorship. "
34
- "Keep your answers concise and accurate."
35
  )
36
  }
37
  conversation_history = [DEFAULT_SYSTEM_PROMPT]
 
7
  import os
8
 
9
  # Define Model details
10
+ MODEL_ID = "Qwen/Qwen2.5-Coder-1.5B-Instruct"
 
 
 
 
11
 
12
  print(f"Loading {MODEL_ID}...")
13
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
 
24
  DEFAULT_SYSTEM_PROMPT = {
25
  "role": "system",
26
  "content": (
27
+ "You are Qwen, a helpful and expert coding assistant. "
28
  "You are chatting with a user named Loomis. "
29
+ "You provide accurate, efficient, and clean code solutions. "
30
+ "Keep your answers concise and focused on code."
31
  )
32
  }
33
  conversation_history = [DEFAULT_SYSTEM_PROMPT]
monitor_deployment.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import requests
3
+ import sys
4
+
5
+ API_URL = "https://loomisgitarrist-personal-coder-ai.hf.space"
6
+
7
+ def check_status():
8
+ print(f"Checking status of {API_URL}...")
9
+
10
+ # 1. Check Runtime Status API
11
+ try:
12
+ runtime_resp = requests.get(f"https://huggingface.co/api/spaces/Loomisgitarrist/personal-coder-ai/runtime")
13
+ print(f"Runtime Info: {runtime_resp.text}")
14
+ if "RUNNING" in runtime_resp.text:
15
+ print(">>> SPACE IS RUNNING! <<<")
16
+ return True
17
+ elif "BUILD_ERROR" in runtime_resp.text:
18
+ print("!!! BUILD ERROR DETECTED !!!")
19
+ return False
20
+ except Exception as e:
21
+ print(f"Runtime check failed: {e}")
22
+
23
+ # 2. Check Application Endpoint
24
+ try:
25
+ resp = requests.get(f"{API_URL}/")
26
+ print(f"Root Endpoint Status: {resp.status_code}")
27
+ if resp.status_code == 200:
28
+ print("Root endpoint is reachable.")
29
+ except Exception as e:
30
+ print(f"Root check failed: {e}")
31
+
32
+ return False
33
+
34
+ if __name__ == "__main__":
35
+ while True:
36
+ is_running = check_status()
37
+ if is_running:
38
+ # Try a test prompt
39
+ try:
40
+ print("Attempting test prompt...")
41
+ test_resp = requests.get(f"{API_URL}/ask", params={"prompt": "Hello"})
42
+ print(f"Test Prompt Response ({test_resp.status_code}): {test_resp.text}")
43
+ if test_resp.status_code == 200:
44
+ print("SUCCESS: API is fully functional.")
45
+ break
46
+ except Exception as e:
47
+ print(f"Test prompt failed: {e}")
48
+
49
+ print("Waiting 30 seconds...\n")
50
+ time.sleep(30)