Spaces:
Sleeping
Sleeping
| from fastapi import FastAPI, Request, HTTPException, Header | |
| from fastapi.responses import JSONResponse, StreamingResponse | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from typing import Optional, List, Dict, Any | |
| import subprocess | |
| import time | |
| import requests | |
| import os | |
| import json | |
| import secrets | |
| from datetime import datetime | |
| # Hugging Face API configuration | |
| HF_TOKEN = os.getenv("HF_TOKEN", "") | |
| # Use TinyLlama - Small, fast, and reliable | |
| MODEL_NAME = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" | |
| # Use HuggingFace Inference API | |
| API_URL = f"https://router.huggingface.co/hf-inference/models/{MODEL_NAME}" | |
| def query_hf_model(prompt: str, max_tokens: int = 1000, temperature: float = 0.7, stream: bool = False): | |
| """Query Hugging Face Inference API""" | |
| headers = { | |
| "Content-Type": "application/json" | |
| } | |
| if HF_TOKEN: | |
| headers["Authorization"] = f"Bearer {HF_TOKEN}" | |
| # Use text-generation parameters | |
| payload = { | |
| "inputs": prompt, | |
| "parameters": { | |
| "max_new_tokens": min(max_tokens, 500), # Limit for faster response | |
| "temperature": temperature, | |
| "return_full_text": False, | |
| "do_sample": temperature > 0, | |
| "top_p": 0.9 | |
| }, | |
| "options": { | |
| "wait_for_model": True, | |
| "use_cache": False | |
| } | |
| } | |
| try: | |
| response = requests.post(API_URL, headers=headers, json=payload, timeout=60) | |
| return response | |
| except Exception as e: | |
| # Create a mock response for error handling | |
| class ErrorResponse: | |
| status_code = 500 | |
| def json(self): | |
| return {"error": str(e)} | |
| text = str(e) | |
| return ErrorResponse() | |
| # Simple API key validation for AJ format | |
| VALID_API_KEY_PREFIX = "aj_" | |
| # Anthropic API key validation | |
| def validate_anthropic_key(api_key: Optional[str]) -> bool: | |
| """Validate Anthropic-style API key""" | |
| if not api_key: | |
| return False | |
| return api_key.startswith("sk-ant-") and len(api_key) > 20 | |
| def validate_api_key(api_key: Optional[str]) -> bool: | |
| """Validate API key format - accepts both AJ and Anthropic formats""" | |
| if not api_key: | |
| return False | |
| return (api_key.startswith(VALID_API_KEY_PREFIX) and len(api_key) > 10) or validate_anthropic_key(api_key) | |
| def extract_api_key(authorization: Optional[str]) -> Optional[str]: | |
| """Extract API key from Authorization header""" | |
| if not authorization: | |
| return None | |
| if authorization.startswith("Bearer "): | |
| return authorization[7:] | |
| return authorization | |
| def extract_anthropic_key(x_api_key: Optional[str]) -> Optional[str]: | |
| """Extract API key from x-api-key header (Anthropic style)""" | |
| return x_api_key | |
| app = FastAPI( | |
| title="AJ STUDIOZ API", | |
| version="1.0", | |
| description="Enterprise-grade AI API - Claude & OpenAI compatible with powerful coding abilities" | |
| ) | |
| # Enable CORS | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], | |
| allow_credentials=True, | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| async def root(): | |
| return { | |
| "service": "AJ STUDIOZ API", | |
| "version": "1.0", | |
| "model": "AJ-Mini v1.0 (TinyLlama-1.1B)", | |
| "status": "online", | |
| "provider": "AJ STUDIOZ", | |
| "website": "https://ajstudioz.co.in", | |
| "pricing": { | |
| "plan": "LIFETIME FREE", | |
| "rate_limits": "UNLIMITED", | |
| "cost": "FREE FOREVER", | |
| "usage_cap": "NONE" | |
| }, | |
| "description": "Enterprise AI assistant with Claude API compatibility, OpenAI support, and powerful coding abilities", | |
| "capabilities": [ | |
| "Anthropic Claude API compatible", | |
| "OpenAI-compatible API", | |
| "Advanced code generation", | |
| "Multi-language support", | |
| "Markdown formatting", | |
| "Streaming responses", | |
| "Enterprise security", | |
| "Unlimited usage - FREE FOREVER" | |
| ], | |
| "endpoints": { | |
| "v1_messages": "/v1/messages - Anthropic Claude-compatible endpoint", | |
| "v1_chat": "/v1/chat/completions - OpenAI-compatible chat endpoint", | |
| "v1_completions": "/v1/completions - OpenAI-compatible completions", | |
| "v1_models": "/v1/models - List available models", | |
| "chat": "/chat - Simple chat interface", | |
| "generate": "/api/generate - Direct generation API" | |
| }, | |
| "authentication": { | |
| "anthropic": "x-api-key: sk-ant-<your_key>", | |
| "openai": "Authorization: Bearer aj_<your_key>", | |
| "note": "Both formats accepted for compatibility" | |
| } | |
| } | |
| async def anthropic_messages( | |
| request: Request, | |
| x_api_key: Optional[str] = Header(None, alias="x-api-key"), | |
| anthropic_version: Optional[str] = Header(None, alias="anthropic-version") | |
| ): | |
| """Anthropic Claude-compatible messages endpoint""" | |
| # Validate API key | |
| api_key = extract_anthropic_key(x_api_key) | |
| if not validate_api_key(api_key): | |
| return JSONResponse( | |
| status_code=401, | |
| content={ | |
| "type": "error", | |
| "error": { | |
| "type": "authentication_error", | |
| "message": "Invalid API key. Use format: sk-ant-<your_key> or aj_<your_key>" | |
| } | |
| } | |
| ) | |
| try: | |
| data = await request.json() | |
| messages = data.get("messages", []) | |
| model = data.get("model", "claude-sonnet-4-20250514") | |
| max_tokens = data.get("max_tokens", 1024) | |
| temperature = data.get("temperature", 1.0) | |
| stream = data.get("stream", False) | |
| if not messages: | |
| return JSONResponse( | |
| status_code=400, | |
| content={ | |
| "type": "error", | |
| "error": { | |
| "type": "invalid_request_error", | |
| "message": "messages is required" | |
| } | |
| } | |
| ) | |
| # Convert to prompt format for text_generation | |
| prompt_parts = ["You are AJ, a powerful AI assistant created by AJ STUDIOZ with advanced coding and problem-solving abilities.\n"] | |
| for msg in messages: | |
| role = msg.get("role") | |
| content = msg.get("content") | |
| if isinstance(content, list): | |
| # Handle complex content (text, images, etc.) | |
| text_parts = [c.get("text", "") for c in content if c.get("type") == "text"] | |
| content = " ".join(text_parts) | |
| if role == "user": | |
| prompt_parts.append(f"User: {content}") | |
| elif role == "assistant": | |
| prompt_parts.append(f"Assistant: {content}") | |
| elif role == "system": | |
| prompt_parts.insert(0, content) | |
| prompt_parts.append("Assistant:") | |
| full_prompt = "\n\n".join(prompt_parts) | |
| # Simple prompt format (works with most models) | |
| response = query_hf_model(full_prompt, max_tokens, temperature) | |
| if response.status_code == 200: | |
| result = response.json() | |
| if isinstance(result, list) and len(result) > 0: | |
| assistant_message = result[0].get('generated_text', '') | |
| else: | |
| assistant_message = result.get('generated_text', '') | |
| else: | |
| raise HTTPException(status_code=500, detail=f"Model error: {response.text}") | |
| # Return Anthropic-compatible response | |
| return { | |
| "id": f"msg_{secrets.token_hex(12)}", | |
| "type": "message", | |
| "role": "assistant", | |
| "content": [ | |
| { | |
| "type": "text", | |
| "text": assistant_message | |
| } | |
| ], | |
| "model": model, | |
| "stop_reason": "end_turn", | |
| "stop_sequence": None, | |
| "usage": { | |
| "input_tokens": sum(len(m["content"].split()) for m in hf_messages), | |
| "output_tokens": len(assistant_message.split()) | |
| } | |
| } | |
| except HTTPException: | |
| raise | |
| except Exception as e: | |
| return JSONResponse( | |
| status_code=500, | |
| content={ | |
| "type": "error", | |
| "error": { | |
| "type": "api_error", | |
| "message": str(e) | |
| } | |
| } | |
| ) | |
| async def list_models(authorization: Optional[str] = Header(None)): | |
| """OpenAI-compatible models endpoint""" | |
| api_key = extract_api_key(authorization) | |
| if not validate_api_key(api_key): | |
| raise HTTPException(status_code=401, detail="Invalid API key. Use format: aj_your_key") | |
| return { | |
| "object": "list", | |
| "data": [ | |
| { | |
| "id": "aj-mini", | |
| "object": "model", | |
| "created": 1730505600, | |
| "owned_by": "aj-studioz", | |
| "permission": [], | |
| "root": "aj-mini", | |
| "parent": None, | |
| }, | |
| { | |
| "id": "aj-mini-v1", | |
| "object": "model", | |
| "created": 1730505600, | |
| "owned_by": "aj-studioz", | |
| "permission": [], | |
| "root": "aj-mini-v1", | |
| "parent": None, | |
| } | |
| ] | |
| } | |
| async def stream_chat_response(prompt: str, model: str, temperature: float, max_tokens: int, completion_id: str): | |
| """Generator for streaming responses using Hugging Face Inference API""" | |
| try: | |
| # Simple prompt format | |
| full_prompt = f"You are AJ, a professional AI assistant created by AJ STUDIOZ.\n\nUser: {prompt}\n\nAssistant:" | |
| response = query_hf_model(full_prompt, max_tokens, temperature, stream=True) | |
| if response.status_code == 200: | |
| for line in response.iter_lines(): | |
| if line: | |
| try: | |
| chunk = json.loads(line.decode('utf-8')) | |
| if isinstance(chunk, list) and len(chunk) > 0: | |
| text = chunk[0].get('generated_text', '') | |
| elif isinstance(chunk, dict): | |
| text = chunk.get('generated_text', '') or chunk.get('token', {}).get('text', '') | |
| else: | |
| continue | |
| if text: | |
| stream_chunk = { | |
| "id": completion_id, | |
| "object": "chat.completion.chunk", | |
| "created": int(time.time()), | |
| "model": model, | |
| "choices": [{ | |
| "index": 0, | |
| "delta": {"content": text}, | |
| "finish_reason": None | |
| }] | |
| } | |
| yield f"data: {json.dumps(stream_chunk)}\n\n" | |
| except: | |
| continue | |
| # Send final chunk | |
| final_chunk = { | |
| "id": completion_id, | |
| "object": "chat.completion.chunk", | |
| "created": int(time.time()), | |
| "model": model, | |
| "choices": [{ | |
| "index": 0, | |
| "delta": {}, | |
| "finish_reason": "stop" | |
| }] | |
| } | |
| yield f"data: {json.dumps(final_chunk)}\n\n" | |
| yield "data: [DONE]\n\n" | |
| except Exception as e: | |
| error_chunk = { | |
| "error": { | |
| "message": str(e), | |
| "type": "server_error" | |
| } | |
| } | |
| yield f"data: {json.dumps(error_chunk)}\n\n" | |
| async def chat_completions(request: Request, authorization: Optional[str] = Header(None)): | |
| """OpenAI-compatible chat completions endpoint with streaming support""" | |
| api_key = extract_api_key(authorization) | |
| if not validate_api_key(api_key): | |
| raise HTTPException( | |
| status_code=401, | |
| detail={ | |
| "error": { | |
| "message": "Invalid API key. Your API key should start with 'aj_'", | |
| "type": "invalid_request_error", | |
| "code": "invalid_api_key" | |
| } | |
| } | |
| ) | |
| try: | |
| data = await request.json() | |
| messages = data.get("messages", []) | |
| model = data.get("model", "aj-mini") | |
| max_tokens = data.get("max_tokens", 2000) | |
| temperature = data.get("temperature", 0.3) | |
| stream = data.get("stream", False) | |
| if not messages: | |
| raise HTTPException(status_code=400, detail="Messages are required") | |
| # Convert messages to prompt | |
| prompt_parts = [] | |
| for msg in messages: | |
| role = msg.get("role", "user") | |
| content = msg.get("content", "") | |
| if role == "system": | |
| prompt_parts.append(f"System: {content}") | |
| elif role == "user": | |
| prompt_parts.append(f"User: {content}") | |
| elif role == "assistant": | |
| prompt_parts.append(f"Assistant: {content}") | |
| prompt = "\n\n".join(prompt_parts) + "\n\nAssistant:" | |
| completion_id = f"chatcmpl-{secrets.token_hex(12)}" | |
| # Handle streaming | |
| if stream: | |
| return StreamingResponse( | |
| stream_chat_response(prompt, model, temperature, max_tokens, completion_id), | |
| media_type="text/event-stream" | |
| ) | |
| # Non-streaming response | |
| full_prompt = f"You are AJ, a professional AI assistant created by AJ STUDIOZ.\n\nUser: {prompt}\n\nAssistant:" | |
| response = query_hf_model(full_prompt, max_tokens, temperature) | |
| if response.status_code == 200: | |
| result = response.json() | |
| if isinstance(result, list) and len(result) > 0: | |
| assistant_message = result[0].get('generated_text', '') | |
| else: | |
| assistant_message = result.get('generated_text', '') | |
| else: | |
| raise HTTPException(status_code=500, detail=f"Model error: {response.text}") | |
| # OpenAI-compatible response | |
| return { | |
| "id": completion_id, | |
| "object": "chat.completion", | |
| "created": int(time.time()), | |
| "model": model, | |
| "choices": [ | |
| { | |
| "index": 0, | |
| "message": { | |
| "role": "assistant", | |
| "content": assistant_message | |
| }, | |
| "finish_reason": "stop" | |
| } | |
| ], | |
| "usage": { | |
| "prompt_tokens": len(prompt.split()), | |
| "completion_tokens": len(assistant_message.split()), | |
| "total_tokens": len(prompt.split()) + len(assistant_message.split()) | |
| }, | |
| "system_fingerprint": "aj-mini-v1.0" | |
| } | |
| except HTTPException: | |
| raise | |
| except Exception as e: | |
| raise HTTPException(status_code=500, detail=str(e)) | |
| async def completions(request: Request, authorization: Optional[str] = Header(None)): | |
| """OpenAI-compatible completions endpoint""" | |
| api_key = extract_api_key(authorization) | |
| if not validate_api_key(api_key): | |
| raise HTTPException(status_code=401, detail="Invalid API key") | |
| try: | |
| data = await request.json() | |
| prompt = data.get("prompt", "") | |
| model = data.get("model", "aj-mini") | |
| max_tokens = data.get("max_tokens", 2000) | |
| temperature = data.get("temperature", 0.3) | |
| if not prompt: | |
| raise HTTPException(status_code=400, detail="Prompt is required") | |
| # Call Hugging Face Inference API | |
| full_prompt = f"You are AJ, a professional AI assistant by AJ STUDIOZ.\n\nUser: {prompt}\n\nAssistant:" | |
| response = query_hf_model(full_prompt, max_tokens, temperature) | |
| if response.status_code == 200: | |
| result = response.json() | |
| if isinstance(result, list) and len(result) > 0: | |
| completion_text = result[0].get('generated_text', '') | |
| else: | |
| completion_text = result.get('generated_text', '') | |
| else: | |
| raise HTTPException(status_code=500, detail=f"Model error: {response.text}") | |
| return { | |
| "id": f"cmpl-{secrets.token_hex(12)}", | |
| "object": "text_completion", | |
| "created": int(time.time()), | |
| "model": model, | |
| "choices": [ | |
| { | |
| "text": completion_text, | |
| "index": 0, | |
| "logprobs": None, | |
| "finish_reason": "stop" | |
| } | |
| ], | |
| "usage": { | |
| "prompt_tokens": len(prompt.split()), | |
| "completion_tokens": len(completion_text.split()), | |
| "total_tokens": len(prompt.split()) + len(completion_text.split()) | |
| } | |
| } | |
| except HTTPException: | |
| raise | |
| except Exception as e: | |
| raise HTTPException(status_code=500, detail=str(e)) | |
| async def chat(request: Request): | |
| try: | |
| data = await request.json() | |
| message = data.get("message", "") | |
| if not message: | |
| return JSONResponse({"error": "Message is required"}, status_code=400) | |
| # Call Hugging Face Inference API | |
| full_message = f"You are AJ, a helpful AI assistant by AJ STUDIOZ.\n\nUser: {message}\n\nAssistant:" | |
| response = query_hf_model(full_message, 500, 0.7) | |
| if response.status_code == 200: | |
| result = response.json() | |
| if isinstance(result, list) and len(result) > 0: | |
| reply = result[0].get('generated_text', '') | |
| else: | |
| reply = result.get('generated_text', '') | |
| return JSONResponse({ | |
| "reply": reply, | |
| "model": "AJ-Mini v1.0", | |
| "provider": "AJ STUDIOZ" | |
| }) | |
| else: | |
| return JSONResponse( | |
| {"error": "Model error", "details": response.text}, | |
| status_code=500 | |
| ) | |
| except Exception as e: | |
| return JSONResponse( | |
| {"error": "Failed to process request", "details": str(e)}, | |
| status_code=500 | |
| ) | |
| async def generate(request: Request): | |
| """Direct API for text generation""" | |
| try: | |
| data = await request.json() | |
| prompt = data.get("prompt", "") | |
| max_tokens = data.get("max_tokens", 1000) | |
| temperature = data.get("temperature", 0.7) | |
| if not prompt: | |
| return JSONResponse({"error": "Prompt is required"}, status_code=400) | |
| response = query_hf_model(prompt, max_tokens, temperature) | |
| if response.status_code == 200: | |
| result = response.json() | |
| if isinstance(result, list) and len(result) > 0: | |
| response_text = result[0].get('generated_text', '') | |
| else: | |
| response_text = result.get('generated_text', '') | |
| return JSONResponse({ | |
| "response": response_text, | |
| "model": "AJ-Mini v1.0", | |
| "done": True | |
| }) | |
| else: | |
| return JSONResponse( | |
| {"error": "Model error", "details": response.text}, | |
| status_code=500 | |
| ) | |
| except Exception as e: | |
| return JSONResponse( | |
| {"error": str(e)}, | |
| status_code=500 | |
| ) | |
| async def tags(): | |
| """List available models""" | |
| return JSONResponse({ | |
| "models": [ | |
| { | |
| "name": "aj-mini", | |
| "modified_at": "2025-01-01T00:00:00Z", | |
| "size": 1100000000, | |
| "details": { | |
| "family": "deepseek", | |
| "parameter_size": "1.5B", | |
| "quantization_level": "Q4" | |
| } | |
| } | |
| ] | |
| }) | |
| async def health(): | |
| """Health check endpoint""" | |
| try: | |
| # Test HF API with a simple text generation | |
| response = query_hf_model("Hello", 5, 0.7) | |
| if response.status_code == 200: | |
| return {"status": "healthy", "model": "aj-mini", "provider": "huggingface"} | |
| else: | |
| return {"status": "degraded", "model": "aj-mini", "provider": "huggingface", "error": response.text} | |
| except Exception as e: | |
| return {"status": "degraded", "model": "aj-mini", "provider": "huggingface", "error": str(e)} | |
| if __name__ == "__main__": | |
| import uvicorn | |
| uvicorn.run(app, host="0.0.0.0", port=7860) | |