Spaces:

kamesh14151
/

aj-studioz-api

Sleeping

App Files Files Community

AJ STUDIOZ commited on Nov 4, 2025

Commit

761e525

1 Parent(s): d49710e

Update to Phi-3 model with proper prompt formatting

Browse files

Files changed (1) hide show

app.py +55 -28

app.py CHANGED Viewed

@@ -16,13 +16,18 @@ from datetime import datetime
 HF_TOKEN = os.getenv("HF_TOKEN", "")
 HF_API_URL = "https://api-inference.huggingface.co/models/"
-# Use a powerful model good for coding - Meta Llama is free and excellent
-MODEL_NAME = "meta-llama/Llama-3.2-3B-Instruct"
 API_URL = HF_API_URL + MODEL_NAME
 def query_hf_model(prompt: str, max_tokens: int = 1000, temperature: float = 0.7, stream: bool = False):
     """Query Hugging Face Inference API"""
-    headers = {"Authorization": f"Bearer {HF_TOKEN}"} if HF_TOKEN else {}
     payload = {
         "inputs": prompt,
@@ -30,15 +35,22 @@ def query_hf_model(prompt: str, max_tokens: int = 1000, temperature: float = 0.7
             "max_new_tokens": max_tokens,
             "temperature": temperature,
             "return_full_text": False,
-            "do_sample": True
         }
     }
-    if stream:
-        payload["stream"] = True
-    response = requests.post(API_URL, headers=headers, json=payload, stream=stream)
-    return response
 # Simple API key validation for AJ format
 VALID_API_KEY_PREFIX = "aj_"
@@ -88,7 +100,7 @@ async def root():
     return {
         "service": "AJ STUDIOZ API",
         "version": "1.0",
-        "model": "AJ-Mini v1.0 (powered by Llama 3.2 3B)",
         "status": "online",
         "provider": "AJ STUDIOZ",
         "website": "https://ajstudioz.co.in",
@@ -186,12 +198,15 @@ async def anthropic_messages(
         prompt_parts.append("Assistant:")
         full_prompt = "\n\n".join(prompt_parts)
-        # Format for Llama
-        llama_prompt = f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
-{prompt_parts[0]}<|eot_id|><|start_header_id|>user<|end_header_id|>
-{prompt_parts[1] if len(prompt_parts) > 1 else 'Hello'}<|eot_id|><|start_header_id|>assistant<|end_header_id|>"""
-        response = query_hf_model(llama_prompt, max_tokens, temperature)
         if response.status_code == 200:
             result = response.json()
@@ -270,10 +285,13 @@ async def list_models(authorization: Optional[str] = Header(None)):
 async def stream_chat_response(prompt: str, model: str, temperature: float, max_tokens: int, completion_id: str):
     """Generator for streaming responses using Hugging Face Inference API"""
     try:
-        # Format prompt for Llama
-        full_prompt = f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
-You are AJ, a professional AI assistant created by AJ STUDIOZ with advanced coding and problem-solving abilities.<|eot_id|><|start_header_id|>user<|end_header_id|>
-{prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>"""
         response = query_hf_model(full_prompt, max_tokens, temperature, stream=True)
@@ -378,9 +396,12 @@ async def chat_completions(request: Request, authorization: Optional[str] = Head
             )
         # Non-streaming response
-        full_prompt = f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
-You are AJ, a professional AI assistant created by AJ STUDIOZ with advanced coding and problem-solving abilities.<|eot_id|><|start_header_id|>user<|end_header_id|>
-{prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>"""
         response = query_hf_model(full_prompt, max_tokens, temperature)
@@ -440,9 +461,12 @@ async def completions(request: Request, authorization: Optional[str] = Header(No
             raise HTTPException(status_code=400, detail="Prompt is required")
         # Call Hugging Face Inference API
-        full_prompt = f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
-You are AJ, a professional AI assistant created by AJ STUDIOZ with advanced coding abilities.<|eot_id|><|start_header_id|>user<|end_header_id|>
-{prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>"""
         response = query_hf_model(full_prompt, max_tokens, temperature)
@@ -490,9 +514,12 @@ async def chat(request: Request):
             return JSONResponse({"error": "Message is required"}, status_code=400)
         # Call Hugging Face Inference API
-        full_message = f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
-You are AJ, a professional AI assistant created by AJ STUDIOZ with advanced coding abilities.<|eot_id|><|start_header_id|>user<|end_header_id|>
-{message}<|eot_id|><|start_header_id|>assistant<|end_header_id|>"""
         response = query_hf_model(full_message, 1000, 0.7)

 HF_TOKEN = os.getenv("HF_TOKEN", "")
 HF_API_URL = "https://api-inference.huggingface.co/models/"
+# Use a powerful free model - Microsoft Phi-3 is excellent and fast
+MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct"
 API_URL = HF_API_URL + MODEL_NAME
 def query_hf_model(prompt: str, max_tokens: int = 1000, temperature: float = 0.7, stream: bool = False):
     """Query Hugging Face Inference API"""
+    headers = {
+        "Content-Type": "application/json"
+    }
+    if HF_TOKEN:
+        headers["Authorization"] = f"Bearer {HF_TOKEN}"
     payload = {
         "inputs": prompt,
             "max_new_tokens": max_tokens,
             "temperature": temperature,
             "return_full_text": False,
+            "do_sample": True,
+            "top_p": 0.9
         }
     }
+    try:
+        response = requests.post(API_URL, headers=headers, json=payload, timeout=30)
+        return response
+    except Exception as e:
+        # Create a mock response for error handling
+        class ErrorResponse:
+            status_code = 500
+            def json(self):
+                return {"error": str(e)}
+            text = str(e)
+        return ErrorResponse()
 # Simple API key validation for AJ format
 VALID_API_KEY_PREFIX = "aj_"
     return {
         "service": "AJ STUDIOZ API",
         "version": "1.0",
+        "model": "AJ-Mini v1.0 (powered by Phi-3 Mini)",
         "status": "online",
         "provider": "AJ STUDIOZ",
         "website": "https://ajstudioz.co.in",
         prompt_parts.append("Assistant:")
         full_prompt = "\n\n".join(prompt_parts)
+        # Format for Phi-3
+        phi_prompt = f"""<|system|>
+{prompt_parts[0]}<|end|>
+<|user|>
+{prompt_parts[1] if len(prompt_parts) > 1 else 'Hello'}<|end|>
+<|assistant|>
+"""
+        response = query_hf_model(phi_prompt, max_tokens, temperature)
         if response.status_code == 200:
             result = response.json()
 async def stream_chat_response(prompt: str, model: str, temperature: float, max_tokens: int, completion_id: str):
     """Generator for streaming responses using Hugging Face Inference API"""
     try:
+        # Format prompt for Phi-3
+        full_prompt = f"""<|system|>
+You are AJ, a professional AI assistant created by AJ STUDIOZ with advanced coding and problem-solving abilities.<|end|>
+<|user|>
+{prompt}<|end|>
+<|assistant|>
+"""
         response = query_hf_model(full_prompt, max_tokens, temperature, stream=True)
             )
         # Non-streaming response
+        full_prompt = f"""<|system|>
+You are AJ, a professional AI assistant created by AJ STUDIOZ with advanced coding and problem-solving abilities.<|end|>
+<|user|>
+{prompt}<|end|>
+<|assistant|>
+"""
         response = query_hf_model(full_prompt, max_tokens, temperature)
             raise HTTPException(status_code=400, detail="Prompt is required")
         # Call Hugging Face Inference API
+        full_prompt = f"""<|system|>
+You are AJ, a professional AI assistant created by AJ STUDIOZ with advanced coding abilities.<|end|>
+<|user|>
+{prompt}<|end|>
+<|assistant|>
+"""
         response = query_hf_model(full_prompt, max_tokens, temperature)
             return JSONResponse({"error": "Message is required"}, status_code=400)
         # Call Hugging Face Inference API
+        full_message = f"""<|system|>
+You are AJ, a professional AI assistant created by AJ STUDIOZ with advanced coding abilities.<|end|>
+<|user|>
+{message}<|end|>
+<|assistant|>
+"""
         response = query_hf_model(full_message, 1000, 0.7)