Spaces:
Sleeping
Sleeping
AJ STUDIOZ commited on
Commit ·
e4b755c
1
Parent(s): 761e525
Switch to Qwen2.5-Coder-0.5B with simplified prompts for reliability
Browse files
app.py
CHANGED
|
@@ -14,11 +14,10 @@ from datetime import datetime
|
|
| 14 |
|
| 15 |
# Hugging Face API configuration
|
| 16 |
HF_TOKEN = os.getenv("HF_TOKEN", "")
|
| 17 |
-
HF_API_URL = "https://api-inference.huggingface.co/models/"
|
| 18 |
|
| 19 |
-
# Use
|
| 20 |
-
MODEL_NAME = "
|
| 21 |
-
API_URL =
|
| 22 |
|
| 23 |
def query_hf_model(prompt: str, max_tokens: int = 1000, temperature: float = 0.7, stream: bool = False):
|
| 24 |
"""Query Hugging Face Inference API"""
|
|
@@ -29,19 +28,24 @@ def query_hf_model(prompt: str, max_tokens: int = 1000, temperature: float = 0.7
|
|
| 29 |
if HF_TOKEN:
|
| 30 |
headers["Authorization"] = f"Bearer {HF_TOKEN}"
|
| 31 |
|
|
|
|
| 32 |
payload = {
|
| 33 |
"inputs": prompt,
|
| 34 |
"parameters": {
|
| 35 |
-
"max_new_tokens": max_tokens,
|
| 36 |
"temperature": temperature,
|
| 37 |
"return_full_text": False,
|
| 38 |
-
"do_sample":
|
| 39 |
"top_p": 0.9
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
}
|
| 41 |
}
|
| 42 |
|
| 43 |
try:
|
| 44 |
-
response = requests.post(API_URL, headers=headers, json=payload, timeout=
|
| 45 |
return response
|
| 46 |
except Exception as e:
|
| 47 |
# Create a mock response for error handling
|
|
@@ -100,7 +104,7 @@ async def root():
|
|
| 100 |
return {
|
| 101 |
"service": "AJ STUDIOZ API",
|
| 102 |
"version": "1.0",
|
| 103 |
-
"model": "AJ-Mini v1.0 (
|
| 104 |
"status": "online",
|
| 105 |
"provider": "AJ STUDIOZ",
|
| 106 |
"website": "https://ajstudioz.co.in",
|
|
@@ -198,15 +202,8 @@ async def anthropic_messages(
|
|
| 198 |
prompt_parts.append("Assistant:")
|
| 199 |
full_prompt = "\n\n".join(prompt_parts)
|
| 200 |
|
| 201 |
-
#
|
| 202 |
-
|
| 203 |
-
{prompt_parts[0]}<|end|>
|
| 204 |
-
<|user|>
|
| 205 |
-
{prompt_parts[1] if len(prompt_parts) > 1 else 'Hello'}<|end|>
|
| 206 |
-
<|assistant|>
|
| 207 |
-
"""
|
| 208 |
-
|
| 209 |
-
response = query_hf_model(phi_prompt, max_tokens, temperature)
|
| 210 |
|
| 211 |
if response.status_code == 200:
|
| 212 |
result = response.json()
|
|
@@ -285,13 +282,8 @@ async def list_models(authorization: Optional[str] = Header(None)):
|
|
| 285 |
async def stream_chat_response(prompt: str, model: str, temperature: float, max_tokens: int, completion_id: str):
|
| 286 |
"""Generator for streaming responses using Hugging Face Inference API"""
|
| 287 |
try:
|
| 288 |
-
#
|
| 289 |
-
full_prompt = f""
|
| 290 |
-
You are AJ, a professional AI assistant created by AJ STUDIOZ with advanced coding and problem-solving abilities.<|end|>
|
| 291 |
-
<|user|>
|
| 292 |
-
{prompt}<|end|>
|
| 293 |
-
<|assistant|>
|
| 294 |
-
"""
|
| 295 |
|
| 296 |
response = query_hf_model(full_prompt, max_tokens, temperature, stream=True)
|
| 297 |
|
|
@@ -396,12 +388,7 @@ async def chat_completions(request: Request, authorization: Optional[str] = Head
|
|
| 396 |
)
|
| 397 |
|
| 398 |
# Non-streaming response
|
| 399 |
-
full_prompt = f""
|
| 400 |
-
You are AJ, a professional AI assistant created by AJ STUDIOZ with advanced coding and problem-solving abilities.<|end|>
|
| 401 |
-
<|user|>
|
| 402 |
-
{prompt}<|end|>
|
| 403 |
-
<|assistant|>
|
| 404 |
-
"""
|
| 405 |
|
| 406 |
response = query_hf_model(full_prompt, max_tokens, temperature)
|
| 407 |
|
|
@@ -460,13 +447,8 @@ async def completions(request: Request, authorization: Optional[str] = Header(No
|
|
| 460 |
if not prompt:
|
| 461 |
raise HTTPException(status_code=400, detail="Prompt is required")
|
| 462 |
|
| 463 |
-
# Call Hugging Face Inference API
|
| 464 |
-
full_prompt = f""
|
| 465 |
-
You are AJ, a professional AI assistant created by AJ STUDIOZ with advanced coding abilities.<|end|>
|
| 466 |
-
<|user|>
|
| 467 |
-
{prompt}<|end|>
|
| 468 |
-
<|assistant|>
|
| 469 |
-
"""
|
| 470 |
|
| 471 |
response = query_hf_model(full_prompt, max_tokens, temperature)
|
| 472 |
|
|
@@ -514,14 +496,9 @@ async def chat(request: Request):
|
|
| 514 |
return JSONResponse({"error": "Message is required"}, status_code=400)
|
| 515 |
|
| 516 |
# Call Hugging Face Inference API
|
| 517 |
-
full_message = f""
|
| 518 |
-
|
| 519 |
-
|
| 520 |
-
{message}<|end|>
|
| 521 |
-
<|assistant|>
|
| 522 |
-
"""
|
| 523 |
-
|
| 524 |
-
response = query_hf_model(full_message, 1000, 0.7)
|
| 525 |
|
| 526 |
if response.status_code == 200:
|
| 527 |
result = response.json()
|
|
|
|
| 14 |
|
| 15 |
# Hugging Face API configuration
|
| 16 |
HF_TOKEN = os.getenv("HF_TOKEN", "")
|
|
|
|
| 17 |
|
| 18 |
+
# Use Qwen2.5-Coder - Excellent for coding and general tasks
|
| 19 |
+
MODEL_NAME = "Qwen/Qwen2.5-Coder-0.5B-Instruct"
|
| 20 |
+
API_URL = f"https://api-inference.huggingface.co/models/{MODEL_NAME}"
|
| 21 |
|
| 22 |
def query_hf_model(prompt: str, max_tokens: int = 1000, temperature: float = 0.7, stream: bool = False):
|
| 23 |
"""Query Hugging Face Inference API"""
|
|
|
|
| 28 |
if HF_TOKEN:
|
| 29 |
headers["Authorization"] = f"Bearer {HF_TOKEN}"
|
| 30 |
|
| 31 |
+
# Use text-generation parameters
|
| 32 |
payload = {
|
| 33 |
"inputs": prompt,
|
| 34 |
"parameters": {
|
| 35 |
+
"max_new_tokens": min(max_tokens, 500), # Limit for faster response
|
| 36 |
"temperature": temperature,
|
| 37 |
"return_full_text": False,
|
| 38 |
+
"do_sample": temperature > 0,
|
| 39 |
"top_p": 0.9
|
| 40 |
+
},
|
| 41 |
+
"options": {
|
| 42 |
+
"wait_for_model": True,
|
| 43 |
+
"use_cache": False
|
| 44 |
}
|
| 45 |
}
|
| 46 |
|
| 47 |
try:
|
| 48 |
+
response = requests.post(API_URL, headers=headers, json=payload, timeout=60)
|
| 49 |
return response
|
| 50 |
except Exception as e:
|
| 51 |
# Create a mock response for error handling
|
|
|
|
| 104 |
return {
|
| 105 |
"service": "AJ STUDIOZ API",
|
| 106 |
"version": "1.0",
|
| 107 |
+
"model": "AJ-Mini v1.0 (Qwen2.5-Coder-0.5B)",
|
| 108 |
"status": "online",
|
| 109 |
"provider": "AJ STUDIOZ",
|
| 110 |
"website": "https://ajstudioz.co.in",
|
|
|
|
| 202 |
prompt_parts.append("Assistant:")
|
| 203 |
full_prompt = "\n\n".join(prompt_parts)
|
| 204 |
|
| 205 |
+
# Simple prompt format (works with most models)
|
| 206 |
+
response = query_hf_model(full_prompt, max_tokens, temperature)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 207 |
|
| 208 |
if response.status_code == 200:
|
| 209 |
result = response.json()
|
|
|
|
| 282 |
async def stream_chat_response(prompt: str, model: str, temperature: float, max_tokens: int, completion_id: str):
|
| 283 |
"""Generator for streaming responses using Hugging Face Inference API"""
|
| 284 |
try:
|
| 285 |
+
# Simple prompt format
|
| 286 |
+
full_prompt = f"You are AJ, a professional AI assistant created by AJ STUDIOZ.\n\nUser: {prompt}\n\nAssistant:"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 287 |
|
| 288 |
response = query_hf_model(full_prompt, max_tokens, temperature, stream=True)
|
| 289 |
|
|
|
|
| 388 |
)
|
| 389 |
|
| 390 |
# Non-streaming response
|
| 391 |
+
full_prompt = f"You are AJ, a professional AI assistant created by AJ STUDIOZ.\n\nUser: {prompt}\n\nAssistant:"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 392 |
|
| 393 |
response = query_hf_model(full_prompt, max_tokens, temperature)
|
| 394 |
|
|
|
|
| 447 |
if not prompt:
|
| 448 |
raise HTTPException(status_code=400, detail="Prompt is required")
|
| 449 |
|
| 450 |
+
# Call Hugging Face Inference API
|
| 451 |
+
full_prompt = f"You are AJ, a professional AI assistant by AJ STUDIOZ.\n\nUser: {prompt}\n\nAssistant:"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 452 |
|
| 453 |
response = query_hf_model(full_prompt, max_tokens, temperature)
|
| 454 |
|
|
|
|
| 496 |
return JSONResponse({"error": "Message is required"}, status_code=400)
|
| 497 |
|
| 498 |
# Call Hugging Face Inference API
|
| 499 |
+
full_message = f"You are AJ, a helpful AI assistant by AJ STUDIOZ.\n\nUser: {message}\n\nAssistant:"
|
| 500 |
+
|
| 501 |
+
response = query_hf_model(full_message, 500, 0.7)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 502 |
|
| 503 |
if response.status_code == 200:
|
| 504 |
result = response.json()
|