AJ STUDIOZ commited on
Commit
761e525
·
1 Parent(s): d49710e

Update to Phi-3 model with proper prompt formatting

Browse files
Files changed (1) hide show
  1. app.py +55 -28
app.py CHANGED
@@ -16,13 +16,18 @@ from datetime import datetime
16
  HF_TOKEN = os.getenv("HF_TOKEN", "")
17
  HF_API_URL = "https://api-inference.huggingface.co/models/"
18
 
19
- # Use a powerful model good for coding - Meta Llama is free and excellent
20
- MODEL_NAME = "meta-llama/Llama-3.2-3B-Instruct"
21
  API_URL = HF_API_URL + MODEL_NAME
22
 
23
  def query_hf_model(prompt: str, max_tokens: int = 1000, temperature: float = 0.7, stream: bool = False):
24
  """Query Hugging Face Inference API"""
25
- headers = {"Authorization": f"Bearer {HF_TOKEN}"} if HF_TOKEN else {}
 
 
 
 
 
26
 
27
  payload = {
28
  "inputs": prompt,
@@ -30,15 +35,22 @@ def query_hf_model(prompt: str, max_tokens: int = 1000, temperature: float = 0.7
30
  "max_new_tokens": max_tokens,
31
  "temperature": temperature,
32
  "return_full_text": False,
33
- "do_sample": True
 
34
  }
35
  }
36
 
37
- if stream:
38
- payload["stream"] = True
39
-
40
- response = requests.post(API_URL, headers=headers, json=payload, stream=stream)
41
- return response
 
 
 
 
 
 
42
 
43
  # Simple API key validation for AJ format
44
  VALID_API_KEY_PREFIX = "aj_"
@@ -88,7 +100,7 @@ async def root():
88
  return {
89
  "service": "AJ STUDIOZ API",
90
  "version": "1.0",
91
- "model": "AJ-Mini v1.0 (powered by Llama 3.2 3B)",
92
  "status": "online",
93
  "provider": "AJ STUDIOZ",
94
  "website": "https://ajstudioz.co.in",
@@ -186,12 +198,15 @@ async def anthropic_messages(
186
  prompt_parts.append("Assistant:")
187
  full_prompt = "\n\n".join(prompt_parts)
188
 
189
- # Format for Llama
190
- llama_prompt = f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
191
- {prompt_parts[0]}<|eot_id|><|start_header_id|>user<|end_header_id|>
192
- {prompt_parts[1] if len(prompt_parts) > 1 else 'Hello'}<|eot_id|><|start_header_id|>assistant<|end_header_id|>"""
 
 
 
193
 
194
- response = query_hf_model(llama_prompt, max_tokens, temperature)
195
 
196
  if response.status_code == 200:
197
  result = response.json()
@@ -270,10 +285,13 @@ async def list_models(authorization: Optional[str] = Header(None)):
270
  async def stream_chat_response(prompt: str, model: str, temperature: float, max_tokens: int, completion_id: str):
271
  """Generator for streaming responses using Hugging Face Inference API"""
272
  try:
273
- # Format prompt for Llama
274
- full_prompt = f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
275
- You are AJ, a professional AI assistant created by AJ STUDIOZ with advanced coding and problem-solving abilities.<|eot_id|><|start_header_id|>user<|end_header_id|>
276
- {prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>"""
 
 
 
277
 
278
  response = query_hf_model(full_prompt, max_tokens, temperature, stream=True)
279
 
@@ -378,9 +396,12 @@ async def chat_completions(request: Request, authorization: Optional[str] = Head
378
  )
379
 
380
  # Non-streaming response
381
- full_prompt = f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
382
- You are AJ, a professional AI assistant created by AJ STUDIOZ with advanced coding and problem-solving abilities.<|eot_id|><|start_header_id|>user<|end_header_id|>
383
- {prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>"""
 
 
 
384
 
385
  response = query_hf_model(full_prompt, max_tokens, temperature)
386
 
@@ -440,9 +461,12 @@ async def completions(request: Request, authorization: Optional[str] = Header(No
440
  raise HTTPException(status_code=400, detail="Prompt is required")
441
 
442
  # Call Hugging Face Inference API
443
- full_prompt = f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
444
- You are AJ, a professional AI assistant created by AJ STUDIOZ with advanced coding abilities.<|eot_id|><|start_header_id|>user<|end_header_id|>
445
- {prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>"""
 
 
 
446
 
447
  response = query_hf_model(full_prompt, max_tokens, temperature)
448
 
@@ -490,9 +514,12 @@ async def chat(request: Request):
490
  return JSONResponse({"error": "Message is required"}, status_code=400)
491
 
492
  # Call Hugging Face Inference API
493
- full_message = f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
494
- You are AJ, a professional AI assistant created by AJ STUDIOZ with advanced coding abilities.<|eot_id|><|start_header_id|>user<|end_header_id|>
495
- {message}<|eot_id|><|start_header_id|>assistant<|end_header_id|>"""
 
 
 
496
 
497
  response = query_hf_model(full_message, 1000, 0.7)
498
 
 
16
  HF_TOKEN = os.getenv("HF_TOKEN", "")
17
  HF_API_URL = "https://api-inference.huggingface.co/models/"
18
 
19
+ # Use a powerful free model - Microsoft Phi-3 is excellent and fast
20
+ MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct"
21
  API_URL = HF_API_URL + MODEL_NAME
22
 
23
  def query_hf_model(prompt: str, max_tokens: int = 1000, temperature: float = 0.7, stream: bool = False):
24
  """Query Hugging Face Inference API"""
25
+ headers = {
26
+ "Content-Type": "application/json"
27
+ }
28
+
29
+ if HF_TOKEN:
30
+ headers["Authorization"] = f"Bearer {HF_TOKEN}"
31
 
32
  payload = {
33
  "inputs": prompt,
 
35
  "max_new_tokens": max_tokens,
36
  "temperature": temperature,
37
  "return_full_text": False,
38
+ "do_sample": True,
39
+ "top_p": 0.9
40
  }
41
  }
42
 
43
+ try:
44
+ response = requests.post(API_URL, headers=headers, json=payload, timeout=30)
45
+ return response
46
+ except Exception as e:
47
+ # Create a mock response for error handling
48
+ class ErrorResponse:
49
+ status_code = 500
50
+ def json(self):
51
+ return {"error": str(e)}
52
+ text = str(e)
53
+ return ErrorResponse()
54
 
55
  # Simple API key validation for AJ format
56
  VALID_API_KEY_PREFIX = "aj_"
 
100
  return {
101
  "service": "AJ STUDIOZ API",
102
  "version": "1.0",
103
+ "model": "AJ-Mini v1.0 (powered by Phi-3 Mini)",
104
  "status": "online",
105
  "provider": "AJ STUDIOZ",
106
  "website": "https://ajstudioz.co.in",
 
198
  prompt_parts.append("Assistant:")
199
  full_prompt = "\n\n".join(prompt_parts)
200
 
201
+ # Format for Phi-3
202
+ phi_prompt = f"""<|system|>
203
+ {prompt_parts[0]}<|end|>
204
+ <|user|>
205
+ {prompt_parts[1] if len(prompt_parts) > 1 else 'Hello'}<|end|>
206
+ <|assistant|>
207
+ """
208
 
209
+ response = query_hf_model(phi_prompt, max_tokens, temperature)
210
 
211
  if response.status_code == 200:
212
  result = response.json()
 
285
  async def stream_chat_response(prompt: str, model: str, temperature: float, max_tokens: int, completion_id: str):
286
  """Generator for streaming responses using Hugging Face Inference API"""
287
  try:
288
+ # Format prompt for Phi-3
289
+ full_prompt = f"""<|system|>
290
+ You are AJ, a professional AI assistant created by AJ STUDIOZ with advanced coding and problem-solving abilities.<|end|>
291
+ <|user|>
292
+ {prompt}<|end|>
293
+ <|assistant|>
294
+ """
295
 
296
  response = query_hf_model(full_prompt, max_tokens, temperature, stream=True)
297
 
 
396
  )
397
 
398
  # Non-streaming response
399
+ full_prompt = f"""<|system|>
400
+ You are AJ, a professional AI assistant created by AJ STUDIOZ with advanced coding and problem-solving abilities.<|end|>
401
+ <|user|>
402
+ {prompt}<|end|>
403
+ <|assistant|>
404
+ """
405
 
406
  response = query_hf_model(full_prompt, max_tokens, temperature)
407
 
 
461
  raise HTTPException(status_code=400, detail="Prompt is required")
462
 
463
  # Call Hugging Face Inference API
464
+ full_prompt = f"""<|system|>
465
+ You are AJ, a professional AI assistant created by AJ STUDIOZ with advanced coding abilities.<|end|>
466
+ <|user|>
467
+ {prompt}<|end|>
468
+ <|assistant|>
469
+ """
470
 
471
  response = query_hf_model(full_prompt, max_tokens, temperature)
472
 
 
514
  return JSONResponse({"error": "Message is required"}, status_code=400)
515
 
516
  # Call Hugging Face Inference API
517
+ full_message = f"""<|system|>
518
+ You are AJ, a professional AI assistant created by AJ STUDIOZ with advanced coding abilities.<|end|>
519
+ <|user|>
520
+ {message}<|end|>
521
+ <|assistant|>
522
+ """
523
 
524
  response = query_hf_model(full_message, 1000, 0.7)
525