Spaces:

kamesh14151
/

aj-studioz-api

Sleeping

App Files Files Community

AJ STUDIOZ commited on Nov 4, 2025

Commit

e4b755c

1 Parent(s): 761e525

Switch to Qwen2.5-Coder-0.5B with simplified prompts for reliability

Browse files

Files changed (1) hide show

app.py +22 -45

app.py CHANGED Viewed

@@ -14,11 +14,10 @@ from datetime import datetime
 # Hugging Face API configuration
 HF_TOKEN = os.getenv("HF_TOKEN", "")
-HF_API_URL = "https://api-inference.huggingface.co/models/"
-# Use a powerful free model - Microsoft Phi-3 is excellent and fast
-MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct"
-API_URL = HF_API_URL + MODEL_NAME
 def query_hf_model(prompt: str, max_tokens: int = 1000, temperature: float = 0.7, stream: bool = False):
     """Query Hugging Face Inference API"""
@@ -29,19 +28,24 @@ def query_hf_model(prompt: str, max_tokens: int = 1000, temperature: float = 0.7
     if HF_TOKEN:
         headers["Authorization"] = f"Bearer {HF_TOKEN}"
     payload = {
         "inputs": prompt,
         "parameters": {
-            "max_new_tokens": max_tokens,
             "temperature": temperature,
             "return_full_text": False,
-            "do_sample": True,
             "top_p": 0.9
         }
     }
     try:
-        response = requests.post(API_URL, headers=headers, json=payload, timeout=30)
         return response
     except Exception as e:
         # Create a mock response for error handling
@@ -100,7 +104,7 @@ async def root():
     return {
         "service": "AJ STUDIOZ API",
         "version": "1.0",
-        "model": "AJ-Mini v1.0 (powered by Phi-3 Mini)",
         "status": "online",
         "provider": "AJ STUDIOZ",
         "website": "https://ajstudioz.co.in",
@@ -198,15 +202,8 @@ async def anthropic_messages(
         prompt_parts.append("Assistant:")
         full_prompt = "\n\n".join(prompt_parts)
-        # Format for Phi-3
-        phi_prompt = f"""<|system|>
-{prompt_parts[0]}<|end|>
-<|user|>
-{prompt_parts[1] if len(prompt_parts) > 1 else 'Hello'}<|end|>
-<|assistant|>
-"""
-        response = query_hf_model(phi_prompt, max_tokens, temperature)
         if response.status_code == 200:
             result = response.json()
@@ -285,13 +282,8 @@ async def list_models(authorization: Optional[str] = Header(None)):
 async def stream_chat_response(prompt: str, model: str, temperature: float, max_tokens: int, completion_id: str):
     """Generator for streaming responses using Hugging Face Inference API"""
     try:
-        # Format prompt for Phi-3
-        full_prompt = f"""<|system|>
-You are AJ, a professional AI assistant created by AJ STUDIOZ with advanced coding and problem-solving abilities.<|end|>
-<|user|>
-{prompt}<|end|>
-<|assistant|>
-"""
         response = query_hf_model(full_prompt, max_tokens, temperature, stream=True)
@@ -396,12 +388,7 @@ async def chat_completions(request: Request, authorization: Optional[str] = Head
             )
         # Non-streaming response
-        full_prompt = f"""<|system|>
-You are AJ, a professional AI assistant created by AJ STUDIOZ with advanced coding and problem-solving abilities.<|end|>
-<|user|>
-{prompt}<|end|>
-<|assistant|>
-"""
         response = query_hf_model(full_prompt, max_tokens, temperature)
@@ -460,13 +447,8 @@ async def completions(request: Request, authorization: Optional[str] = Header(No
         if not prompt:
             raise HTTPException(status_code=400, detail="Prompt is required")
-        # Call Hugging Face Inference API
-        full_prompt = f"""<|system|>
-You are AJ, a professional AI assistant created by AJ STUDIOZ with advanced coding abilities.<|end|>
-<|user|>
-{prompt}<|end|>
-<|assistant|>
-"""
         response = query_hf_model(full_prompt, max_tokens, temperature)
@@ -514,14 +496,9 @@ async def chat(request: Request):
             return JSONResponse({"error": "Message is required"}, status_code=400)
         # Call Hugging Face Inference API
-        full_message = f"""<|system|>
-You are AJ, a professional AI assistant created by AJ STUDIOZ with advanced coding abilities.<|end|>
-<|user|>
-{message}<|end|>
-<|assistant|>
-"""
-        response = query_hf_model(full_message, 1000, 0.7)
         if response.status_code == 200:
             result = response.json()

 # Hugging Face API configuration
 HF_TOKEN = os.getenv("HF_TOKEN", "")
+# Use Qwen2.5-Coder - Excellent for coding and general tasks
+MODEL_NAME = "Qwen/Qwen2.5-Coder-0.5B-Instruct"
+API_URL = f"https://api-inference.huggingface.co/models/{MODEL_NAME}"
 def query_hf_model(prompt: str, max_tokens: int = 1000, temperature: float = 0.7, stream: bool = False):
     """Query Hugging Face Inference API"""
     if HF_TOKEN:
         headers["Authorization"] = f"Bearer {HF_TOKEN}"
+    # Use text-generation parameters
     payload = {
         "inputs": prompt,
         "parameters": {
+            "max_new_tokens": min(max_tokens, 500),  # Limit for faster response
             "temperature": temperature,
             "return_full_text": False,
+            "do_sample": temperature > 0,
             "top_p": 0.9
+        },
+        "options": {
+            "wait_for_model": True,
+            "use_cache": False
         }
     }
     try:
+        response = requests.post(API_URL, headers=headers, json=payload, timeout=60)
         return response
     except Exception as e:
         # Create a mock response for error handling
     return {
         "service": "AJ STUDIOZ API",
         "version": "1.0",
+        "model": "AJ-Mini v1.0 (Qwen2.5-Coder-0.5B)",
         "status": "online",
         "provider": "AJ STUDIOZ",
         "website": "https://ajstudioz.co.in",
         prompt_parts.append("Assistant:")
         full_prompt = "\n\n".join(prompt_parts)
+        # Simple prompt format (works with most models)
+        response = query_hf_model(full_prompt, max_tokens, temperature)
         if response.status_code == 200:
             result = response.json()
 async def stream_chat_response(prompt: str, model: str, temperature: float, max_tokens: int, completion_id: str):
     """Generator for streaming responses using Hugging Face Inference API"""
     try:
+        # Simple prompt format
+        full_prompt = f"You are AJ, a professional AI assistant created by AJ STUDIOZ.\n\nUser: {prompt}\n\nAssistant:"
         response = query_hf_model(full_prompt, max_tokens, temperature, stream=True)
             )
         # Non-streaming response
+        full_prompt = f"You are AJ, a professional AI assistant created by AJ STUDIOZ.\n\nUser: {prompt}\n\nAssistant:"
         response = query_hf_model(full_prompt, max_tokens, temperature)
         if not prompt:
             raise HTTPException(status_code=400, detail="Prompt is required")
+        # Call Hugging Face Inference API
+        full_prompt = f"You are AJ, a professional AI assistant by AJ STUDIOZ.\n\nUser: {prompt}\n\nAssistant:"
         response = query_hf_model(full_prompt, max_tokens, temperature)
             return JSONResponse({"error": "Message is required"}, status_code=400)
         # Call Hugging Face Inference API
+        full_message = f"You are AJ, a helpful AI assistant by AJ STUDIOZ.\n\nUser: {message}\n\nAssistant:"
+        response = query_hf_model(full_message, 500, 0.7)
         if response.status_code == 200:
             result = response.json()