Spaces:
Running
Running
| """ | |
| Sixfinger Backend API - FRONTEND UYUMLU VERSİYON | |
| Ultra-fast AI Chat Backend with Multi-Model Support | |
| """ | |
| import os | |
| import time | |
| import json | |
| import logging | |
| from typing import Optional, Dict, Any | |
| from datetime import datetime | |
| from fastapi import FastAPI, HTTPException, Header, Request | |
| from fastapi.responses import StreamingResponse, JSONResponse | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from pydantic import BaseModel, Field | |
| from groq import Groq | |
| # ========== CONFIGURATION ========== | |
| API_VERSION = "1.0.0" | |
| GROQ_API_KEY = os.getenv("GROQ_API_KEY", "") | |
| # Model mapping - Plan bazlı erişim kontrolü | |
| MODELS = { | |
| # FREE Plan Models | |
| "llama-8b-instant": { | |
| "groq_id": "llama-3.1-8b-instant", | |
| "size": "8B", | |
| "language": "Multilingual", | |
| "speed": "⚡⚡⚡", | |
| "plans": ["free", "starter", "pro", "plus"], | |
| "daily_limit": 14400 | |
| }, | |
| "allam-2-7b": { | |
| "groq_id": "llama-3.1-8b-instant", # Fallback | |
| "size": "7B", | |
| "language": "Turkish/Arabic", | |
| "speed": "⚡⚡", | |
| "plans": ["free", "starter", "pro", "plus"], | |
| "daily_limit": 300 | |
| }, | |
| # STARTER Plan Models | |
| "qwen3-32b": { | |
| "groq_id": "llama-3.3-70b-versatile", | |
| "size": "32B", | |
| "language": "Turkish/Chinese", | |
| "speed": "⚡⚡", | |
| "plans": ["starter", "pro", "plus"], | |
| "daily_limit": 1000 | |
| }, | |
| "llama-70b": { | |
| "groq_id": "llama-3.3-70b-versatile", | |
| "size": "70B", | |
| "language": "Multilingual", | |
| "speed": "⚡⚡", | |
| "plans": ["starter", "pro", "plus"], | |
| "daily_limit": 1000 | |
| }, | |
| "llama-maverick-17b": { | |
| "groq_id": "llama-3.1-8b-instant", | |
| "size": "17B", | |
| "language": "Multilingual", | |
| "speed": "⚡⚡", | |
| "plans": ["starter", "pro", "plus"], | |
| "daily_limit": 1000 | |
| }, | |
| "llama-scout-17b": { | |
| "groq_id": "llama-3.1-8b-instant", | |
| "size": "17B", | |
| "language": "Multilingual", | |
| "speed": "⚡⚡⚡", | |
| "plans": ["starter", "pro", "plus"], | |
| "daily_limit": 1000 | |
| }, | |
| "gpt-oss-20b": { | |
| "groq_id": "llama-3.1-8b-instant", | |
| "size": "20B", | |
| "language": "Multilingual", | |
| "speed": "⚡⚡", | |
| "plans": ["starter", "pro", "plus"], | |
| "daily_limit": 1000 | |
| }, | |
| # PRO Plan Models | |
| "gpt-oss-120b": { | |
| "groq_id": "llama-3.3-70b-versatile", | |
| "size": "120B", | |
| "language": "Multilingual", | |
| "speed": "⚡⚡", | |
| "plans": ["pro", "plus"], | |
| "daily_limit": 1000 | |
| }, | |
| "kimi-k2": { | |
| "groq_id": "llama-3.3-70b-versatile", | |
| "size": "Unknown", | |
| "language": "Chinese", | |
| "speed": "⚡⚡", | |
| "plans": ["pro", "plus"], | |
| "daily_limit": 1000 | |
| } | |
| } | |
| # Plan bazlı otomatik model seçimi | |
| DEFAULT_MODELS = { | |
| "free": "llama-8b-instant", | |
| "starter": "qwen3-32b", | |
| "pro": "llama-70b", | |
| "plus": "gpt-oss-120b" | |
| } | |
| # ========== LOGGING ========== | |
| logging.basicConfig( | |
| level=logging.INFO, | |
| format='[%(asctime)s] %(levelname)s: %(message)s', | |
| datefmt='%Y-%m-%d %H:%M:%S' | |
| ) | |
| logger = logging.getLogger(__name__) | |
| # ========== FASTAPI APP ========== | |
| app = FastAPI( | |
| title="Sixfinger Backend API", | |
| version=API_VERSION, | |
| description="Ultra-fast AI Chat Backend", | |
| docs_url="/docs", | |
| redoc_url="/redoc" | |
| ) | |
| # CORS | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], # Production'da kısıtlayın | |
| allow_credentials=True, | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| # Groq Client | |
| groq_client = Groq(api_key=GROQ_API_KEY) if GROQ_API_KEY else None | |
| # ========== MODELS ========== | |
| class ChatRequest(BaseModel): | |
| prompt: str = Field(..., description="User's message") | |
| max_tokens: int = Field(default=300, ge=1, le=4000) | |
| temperature: float = Field(default=0.7, ge=0, le=2) | |
| top_p: float = Field(default=0.9, ge=0, le=1) | |
| system_prompt: Optional[str] = None | |
| history: Optional[list] = None | |
| class ChatResponse(BaseModel): | |
| response: str | |
| model: str | |
| model_key: str | |
| model_size: str | |
| model_language: str | |
| attempts: int | |
| usage: Dict[str, int] | |
| parameters: Dict[str, Any] | |
| # ========== HELPER FUNCTIONS ========== | |
| def get_allowed_models(plan: str) -> list: | |
| """Plan'a göre izin verilen modelleri döndür""" | |
| return [k for k, v in MODELS.items() if plan in v["plans"]] | |
| def select_model(plan: str, preferred_model: Optional[str] = None) -> str: | |
| """Model seçimi yap""" | |
| allowed_models = get_allowed_models(plan) | |
| # Eğer kullanıcı model belirtmişse ve erişimi varsa | |
| if preferred_model and preferred_model in allowed_models: | |
| return preferred_model | |
| # Otomatik seçim | |
| default = DEFAULT_MODELS.get(plan, "llama-8b-instant") | |
| return default if default in allowed_models else allowed_models[0] | |
| def build_messages(prompt: str, system_prompt: Optional[str], history: Optional[list]) -> list: | |
| """Chat messages listesi oluştur""" | |
| messages = [] | |
| # System prompt | |
| if system_prompt: | |
| messages.append({"role": "system", "content": system_prompt}) | |
| # History | |
| if history: | |
| for msg in history: | |
| if "role" in msg and "content" in msg: | |
| messages.append(msg) | |
| # Current prompt | |
| messages.append({"role": "user", "content": prompt}) | |
| return messages | |
| def call_groq_api( | |
| model_id: str, | |
| messages: list, | |
| max_tokens: int, | |
| temperature: float, | |
| top_p: float, | |
| stream: bool = False | |
| ): | |
| """Groq API'ye istek at (SYNC)""" | |
| if not groq_client: | |
| raise HTTPException(status_code=500, detail="Groq API key not configured") | |
| try: | |
| response = groq_client.chat.completions.create( | |
| model=model_id, | |
| messages=messages, | |
| max_tokens=max_tokens, | |
| temperature=temperature, | |
| top_p=top_p, | |
| stream=stream | |
| ) | |
| return response | |
| except Exception as e: | |
| logger.error(f"Groq API error: {e}") | |
| raise HTTPException(status_code=500, detail=f"Groq API error: {str(e)}") | |
| # ========== ENDPOINTS ========== | |
| def health_check(): | |
| """Health check endpoint""" | |
| return { | |
| "status": "healthy", | |
| "version": API_VERSION, | |
| "timestamp": datetime.now().isoformat(), | |
| "groq_configured": bool(GROQ_API_KEY) | |
| } | |
| def chat( | |
| request: ChatRequest, | |
| x_user_plan: str = Header(default="free", alias="X-User-Plan"), | |
| x_model: Optional[str] = Header(default=None, alias="X-Model") | |
| ): | |
| """ | |
| Normal chat endpoint (JSON response) | |
| Frontend'e TAM UYUMLU format | |
| """ | |
| start_time = time.time() | |
| # Model seçimi | |
| model_key = select_model(x_user_plan, x_model) | |
| model_config = MODELS[model_key] | |
| groq_model_id = model_config["groq_id"] | |
| logger.info(f"Chat request: plan={x_user_plan}, model={model_key}") | |
| # Messages | |
| messages = build_messages( | |
| request.prompt, | |
| request.system_prompt, | |
| request.history | |
| ) | |
| # Groq API call | |
| try: | |
| response = call_groq_api( | |
| model_id=groq_model_id, | |
| messages=messages, | |
| max_tokens=request.max_tokens, | |
| temperature=request.temperature, | |
| top_p=request.top_p, | |
| stream=False | |
| ) | |
| # Extract response | |
| content = response.choices[0].message.content | |
| usage = { | |
| "prompt_tokens": response.usage.prompt_tokens, | |
| "completion_tokens": response.usage.completion_tokens, | |
| "total_tokens": response.usage.total_tokens | |
| } | |
| elapsed = time.time() - start_time | |
| logger.info(f"Chat completed: tokens={usage['total_tokens']}, time={elapsed:.2f}s") | |
| # Frontend'in beklediği EXACT format | |
| return { | |
| "response": content, | |
| "model": groq_model_id, | |
| "model_key": model_key, | |
| "model_size": model_config["size"], | |
| "model_language": model_config["language"], | |
| "attempts": 1, | |
| "usage": usage, | |
| "parameters": { | |
| "max_tokens": request.max_tokens, | |
| "temperature": request.temperature, | |
| "top_p": request.top_p | |
| } | |
| } | |
| except HTTPException: | |
| raise | |
| except Exception as e: | |
| logger.error(f"Chat error: {e}") | |
| raise HTTPException(status_code=500, detail=str(e)) | |
| def chat_stream( | |
| request: ChatRequest, | |
| x_user_plan: str = Header(default="free", alias="X-User-Plan"), | |
| x_model: Optional[str] = Header(default=None, alias="X-Model") | |
| ): | |
| """ | |
| Streaming chat endpoint (SSE) | |
| Frontend'e TAM UYUMLU SSE format | |
| ✅ SYNC generator (FastAPI StreamingResponse için doğru) | |
| """ | |
| # Model seçimi | |
| model_key = select_model(x_user_plan, x_model) | |
| model_config = MODELS[model_key] | |
| groq_model_id = model_config["groq_id"] | |
| logger.info(f"Stream request: plan={x_user_plan}, model={model_key}") | |
| # Messages | |
| messages = build_messages( | |
| request.prompt, | |
| request.system_prompt, | |
| request.history | |
| ) | |
| def generate(): | |
| """ | |
| SSE generator - SYNC function (FastAPI requirement) | |
| Frontend iter_content() ile parse edecek | |
| """ | |
| try: | |
| # Info mesajı | |
| info_msg = json.dumps({'info': f'Trying model: {model_key}'}) | |
| yield f"data: {info_msg}\n\n" | |
| # Groq streaming (SYNC) | |
| response = call_groq_api( | |
| model_id=groq_model_id, | |
| messages=messages, | |
| max_tokens=request.max_tokens, | |
| temperature=request.temperature, | |
| top_p=request.top_p, | |
| stream=True | |
| ) | |
| total_tokens = 0 | |
| prompt_tokens = 0 | |
| completion_tokens = 0 | |
| # Stream chunks | |
| for chunk in response: | |
| # Text chunk | |
| if chunk.choices[0].delta.content: | |
| text = chunk.choices[0].delta.content | |
| text_msg = json.dumps({'text': text}) | |
| yield f"data: {text_msg}\n\n" | |
| # Usage bilgisi (son chunk'ta gelir) | |
| if hasattr(chunk, 'x_groq') and hasattr(chunk.x_groq, 'usage'): | |
| usage_data = chunk.x_groq.usage | |
| if hasattr(usage_data, 'prompt_tokens'): | |
| prompt_tokens = usage_data.prompt_tokens | |
| if hasattr(usage_data, 'completion_tokens'): | |
| completion_tokens = usage_data.completion_tokens | |
| if hasattr(usage_data, 'total_tokens'): | |
| total_tokens = usage_data.total_tokens | |
| # Son usage hesaplama (eğer gelmediyse) | |
| if total_tokens == 0 and completion_tokens > 0: | |
| total_tokens = prompt_tokens + completion_tokens | |
| # Done mesajı - Frontend'in beklediği EXACT format | |
| done_msg = json.dumps({ | |
| 'done': True, | |
| 'model_key': model_key, | |
| 'attempts': 1, | |
| 'usage': { | |
| 'prompt_tokens': prompt_tokens, | |
| 'completion_tokens': completion_tokens, | |
| 'total_tokens': total_tokens | |
| } | |
| }) | |
| yield f"data: {done_msg}\n\n" | |
| logger.info(f"Stream completed: model={model_key}, tokens={total_tokens}") | |
| except Exception as e: | |
| logger.error(f"Stream error: {e}") | |
| error_msg = json.dumps({'error': str(e)}) | |
| yield f"data: {error_msg}\n\n" | |
| return StreamingResponse( | |
| generate(), | |
| media_type="text/event-stream", | |
| headers={ | |
| "Cache-Control": "no-cache", | |
| "X-Accel-Buffering": "no", | |
| "Connection": "keep-alive" | |
| } | |
| ) | |
| def list_models(x_user_plan: str = Header(default="free", alias="X-User-Plan")): | |
| """ | |
| Kullanıcının erişebileceği modelleri listele | |
| """ | |
| allowed_models = get_allowed_models(x_user_plan) | |
| models_info = [] | |
| for model_key in allowed_models: | |
| config = MODELS[model_key] | |
| models_info.append({ | |
| "key": model_key, | |
| "size": config["size"], | |
| "language": config["language"], | |
| "speed": config["speed"], | |
| "daily_limit": config["daily_limit"] | |
| }) | |
| return { | |
| "plan": x_user_plan, | |
| "models": models_info, | |
| "default_model": DEFAULT_MODELS.get(x_user_plan, "llama-8b-instant") | |
| } | |
| async def http_exception_handler(request: Request, exc: HTTPException): | |
| """Custom HTTP exception handler""" | |
| return JSONResponse( | |
| status_code=exc.status_code, | |
| content={ | |
| "error": exc.detail, | |
| "status_code": exc.status_code | |
| } | |
| ) | |
| async def general_exception_handler(request: Request, exc: Exception): | |
| """General exception handler""" | |
| logger.error(f"Unhandled exception: {exc}") | |
| return JSONResponse( | |
| status_code=500, | |
| content={ | |
| "error": "Internal server error", | |
| "detail": str(exc) | |
| } | |
| ) | |
| # ========== STARTUP/SHUTDOWN ========== | |
| async def startup_event(): | |
| logger.info("🚀 Sixfinger Backend API started") | |
| logger.info(f"📦 Version: {API_VERSION}") | |
| logger.info(f"🔑 Groq API: {'✅ Configured' if GROQ_API_KEY else '❌ Not configured'}") | |
| logger.info(f"🤖 Models available: {len(MODELS)}") | |
| async def shutdown_event(): | |
| logger.info("👋 Sixfinger Backend API shutting down") | |
| if __name__ == "__main__": | |
| import uvicorn | |
| uvicorn.run( | |
| "main:app", | |
| host="0.0.0.0", | |
| port=8000, | |
| reload=True, | |
| log_level="info" | |
| ) |