AJ STUDIOZ commited on
Commit
e4b755c
·
1 Parent(s): 761e525

Switch to Qwen2.5-Coder-0.5B with simplified prompts for reliability

Browse files
Files changed (1) hide show
  1. app.py +22 -45
app.py CHANGED
@@ -14,11 +14,10 @@ from datetime import datetime
14
 
15
  # Hugging Face API configuration
16
  HF_TOKEN = os.getenv("HF_TOKEN", "")
17
- HF_API_URL = "https://api-inference.huggingface.co/models/"
18
 
19
- # Use a powerful free model - Microsoft Phi-3 is excellent and fast
20
- MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct"
21
- API_URL = HF_API_URL + MODEL_NAME
22
 
23
  def query_hf_model(prompt: str, max_tokens: int = 1000, temperature: float = 0.7, stream: bool = False):
24
  """Query Hugging Face Inference API"""
@@ -29,19 +28,24 @@ def query_hf_model(prompt: str, max_tokens: int = 1000, temperature: float = 0.7
29
  if HF_TOKEN:
30
  headers["Authorization"] = f"Bearer {HF_TOKEN}"
31
 
 
32
  payload = {
33
  "inputs": prompt,
34
  "parameters": {
35
- "max_new_tokens": max_tokens,
36
  "temperature": temperature,
37
  "return_full_text": False,
38
- "do_sample": True,
39
  "top_p": 0.9
 
 
 
 
40
  }
41
  }
42
 
43
  try:
44
- response = requests.post(API_URL, headers=headers, json=payload, timeout=30)
45
  return response
46
  except Exception as e:
47
  # Create a mock response for error handling
@@ -100,7 +104,7 @@ async def root():
100
  return {
101
  "service": "AJ STUDIOZ API",
102
  "version": "1.0",
103
- "model": "AJ-Mini v1.0 (powered by Phi-3 Mini)",
104
  "status": "online",
105
  "provider": "AJ STUDIOZ",
106
  "website": "https://ajstudioz.co.in",
@@ -198,15 +202,8 @@ async def anthropic_messages(
198
  prompt_parts.append("Assistant:")
199
  full_prompt = "\n\n".join(prompt_parts)
200
 
201
- # Format for Phi-3
202
- phi_prompt = f"""<|system|>
203
- {prompt_parts[0]}<|end|>
204
- <|user|>
205
- {prompt_parts[1] if len(prompt_parts) > 1 else 'Hello'}<|end|>
206
- <|assistant|>
207
- """
208
-
209
- response = query_hf_model(phi_prompt, max_tokens, temperature)
210
 
211
  if response.status_code == 200:
212
  result = response.json()
@@ -285,13 +282,8 @@ async def list_models(authorization: Optional[str] = Header(None)):
285
  async def stream_chat_response(prompt: str, model: str, temperature: float, max_tokens: int, completion_id: str):
286
  """Generator for streaming responses using Hugging Face Inference API"""
287
  try:
288
- # Format prompt for Phi-3
289
- full_prompt = f"""<|system|>
290
- You are AJ, a professional AI assistant created by AJ STUDIOZ with advanced coding and problem-solving abilities.<|end|>
291
- <|user|>
292
- {prompt}<|end|>
293
- <|assistant|>
294
- """
295
 
296
  response = query_hf_model(full_prompt, max_tokens, temperature, stream=True)
297
 
@@ -396,12 +388,7 @@ async def chat_completions(request: Request, authorization: Optional[str] = Head
396
  )
397
 
398
  # Non-streaming response
399
- full_prompt = f"""<|system|>
400
- You are AJ, a professional AI assistant created by AJ STUDIOZ with advanced coding and problem-solving abilities.<|end|>
401
- <|user|>
402
- {prompt}<|end|>
403
- <|assistant|>
404
- """
405
 
406
  response = query_hf_model(full_prompt, max_tokens, temperature)
407
 
@@ -460,13 +447,8 @@ async def completions(request: Request, authorization: Optional[str] = Header(No
460
  if not prompt:
461
  raise HTTPException(status_code=400, detail="Prompt is required")
462
 
463
- # Call Hugging Face Inference API
464
- full_prompt = f"""<|system|>
465
- You are AJ, a professional AI assistant created by AJ STUDIOZ with advanced coding abilities.<|end|>
466
- <|user|>
467
- {prompt}<|end|>
468
- <|assistant|>
469
- """
470
 
471
  response = query_hf_model(full_prompt, max_tokens, temperature)
472
 
@@ -514,14 +496,9 @@ async def chat(request: Request):
514
  return JSONResponse({"error": "Message is required"}, status_code=400)
515
 
516
  # Call Hugging Face Inference API
517
- full_message = f"""<|system|>
518
- You are AJ, a professional AI assistant created by AJ STUDIOZ with advanced coding abilities.<|end|>
519
- <|user|>
520
- {message}<|end|>
521
- <|assistant|>
522
- """
523
-
524
- response = query_hf_model(full_message, 1000, 0.7)
525
 
526
  if response.status_code == 200:
527
  result = response.json()
 
14
 
15
  # Hugging Face API configuration
16
  HF_TOKEN = os.getenv("HF_TOKEN", "")
 
17
 
18
+ # Use Qwen2.5-Coder - Excellent for coding and general tasks
19
+ MODEL_NAME = "Qwen/Qwen2.5-Coder-0.5B-Instruct"
20
+ API_URL = f"https://api-inference.huggingface.co/models/{MODEL_NAME}"
21
 
22
  def query_hf_model(prompt: str, max_tokens: int = 1000, temperature: float = 0.7, stream: bool = False):
23
  """Query Hugging Face Inference API"""
 
28
  if HF_TOKEN:
29
  headers["Authorization"] = f"Bearer {HF_TOKEN}"
30
 
31
+ # Use text-generation parameters
32
  payload = {
33
  "inputs": prompt,
34
  "parameters": {
35
+ "max_new_tokens": min(max_tokens, 500), # Limit for faster response
36
  "temperature": temperature,
37
  "return_full_text": False,
38
+ "do_sample": temperature > 0,
39
  "top_p": 0.9
40
+ },
41
+ "options": {
42
+ "wait_for_model": True,
43
+ "use_cache": False
44
  }
45
  }
46
 
47
  try:
48
+ response = requests.post(API_URL, headers=headers, json=payload, timeout=60)
49
  return response
50
  except Exception as e:
51
  # Create a mock response for error handling
 
104
  return {
105
  "service": "AJ STUDIOZ API",
106
  "version": "1.0",
107
+ "model": "AJ-Mini v1.0 (Qwen2.5-Coder-0.5B)",
108
  "status": "online",
109
  "provider": "AJ STUDIOZ",
110
  "website": "https://ajstudioz.co.in",
 
202
  prompt_parts.append("Assistant:")
203
  full_prompt = "\n\n".join(prompt_parts)
204
 
205
+ # Simple prompt format (works with most models)
206
+ response = query_hf_model(full_prompt, max_tokens, temperature)
 
 
 
 
 
 
 
207
 
208
  if response.status_code == 200:
209
  result = response.json()
 
282
  async def stream_chat_response(prompt: str, model: str, temperature: float, max_tokens: int, completion_id: str):
283
  """Generator for streaming responses using Hugging Face Inference API"""
284
  try:
285
+ # Simple prompt format
286
+ full_prompt = f"You are AJ, a professional AI assistant created by AJ STUDIOZ.\n\nUser: {prompt}\n\nAssistant:"
 
 
 
 
 
287
 
288
  response = query_hf_model(full_prompt, max_tokens, temperature, stream=True)
289
 
 
388
  )
389
 
390
  # Non-streaming response
391
+ full_prompt = f"You are AJ, a professional AI assistant created by AJ STUDIOZ.\n\nUser: {prompt}\n\nAssistant:"
 
 
 
 
 
392
 
393
  response = query_hf_model(full_prompt, max_tokens, temperature)
394
 
 
447
  if not prompt:
448
  raise HTTPException(status_code=400, detail="Prompt is required")
449
 
450
+ # Call Hugging Face Inference API
451
+ full_prompt = f"You are AJ, a professional AI assistant by AJ STUDIOZ.\n\nUser: {prompt}\n\nAssistant:"
 
 
 
 
 
452
 
453
  response = query_hf_model(full_prompt, max_tokens, temperature)
454
 
 
496
  return JSONResponse({"error": "Message is required"}, status_code=400)
497
 
498
  # Call Hugging Face Inference API
499
+ full_message = f"You are AJ, a helpful AI assistant by AJ STUDIOZ.\n\nUser: {message}\n\nAssistant:"
500
+
501
+ response = query_hf_model(full_message, 500, 0.7)
 
 
 
 
 
502
 
503
  if response.status_code == 200:
504
  result = response.json()