sf-api / app.py
sixfingerdev's picture
Rename main.py to app.py
68b8582 verified
raw
history blame
14.2 kB
"""
Sixfinger Backend API - FRONTEND UYUMLU VERSİYON
Ultra-fast AI Chat Backend with Multi-Model Support
"""
import os
import time
import json
import logging
from typing import Optional, Dict, Any
from datetime import datetime
from fastapi import FastAPI, HTTPException, Header, Request
from fastapi.responses import StreamingResponse, JSONResponse
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel, Field
from groq import Groq
# ========== CONFIGURATION ==========
API_VERSION = "1.0.0"
GROQ_API_KEY = os.getenv("GROQ_API_KEY", "")
# Model mapping - Plan bazlı erişim kontrolü
MODELS = {
# FREE Plan Models
"llama-8b-instant": {
"groq_id": "llama-3.1-8b-instant",
"size": "8B",
"language": "Multilingual",
"speed": "⚡⚡⚡",
"plans": ["free", "starter", "pro", "plus"],
"daily_limit": 14400
},
"allam-2-7b": {
"groq_id": "llama-3.1-8b-instant", # Fallback
"size": "7B",
"language": "Turkish/Arabic",
"speed": "⚡⚡",
"plans": ["free", "starter", "pro", "plus"],
"daily_limit": 300
},
# STARTER Plan Models
"qwen3-32b": {
"groq_id": "llama-3.3-70b-versatile",
"size": "32B",
"language": "Turkish/Chinese",
"speed": "⚡⚡",
"plans": ["starter", "pro", "plus"],
"daily_limit": 1000
},
"llama-70b": {
"groq_id": "llama-3.3-70b-versatile",
"size": "70B",
"language": "Multilingual",
"speed": "⚡⚡",
"plans": ["starter", "pro", "plus"],
"daily_limit": 1000
},
"llama-maverick-17b": {
"groq_id": "llama-3.1-8b-instant",
"size": "17B",
"language": "Multilingual",
"speed": "⚡⚡",
"plans": ["starter", "pro", "plus"],
"daily_limit": 1000
},
"llama-scout-17b": {
"groq_id": "llama-3.1-8b-instant",
"size": "17B",
"language": "Multilingual",
"speed": "⚡⚡⚡",
"plans": ["starter", "pro", "plus"],
"daily_limit": 1000
},
"gpt-oss-20b": {
"groq_id": "llama-3.1-8b-instant",
"size": "20B",
"language": "Multilingual",
"speed": "⚡⚡",
"plans": ["starter", "pro", "plus"],
"daily_limit": 1000
},
# PRO Plan Models
"gpt-oss-120b": {
"groq_id": "llama-3.3-70b-versatile",
"size": "120B",
"language": "Multilingual",
"speed": "⚡⚡",
"plans": ["pro", "plus"],
"daily_limit": 1000
},
"kimi-k2": {
"groq_id": "llama-3.3-70b-versatile",
"size": "Unknown",
"language": "Chinese",
"speed": "⚡⚡",
"plans": ["pro", "plus"],
"daily_limit": 1000
}
}
# Plan bazlı otomatik model seçimi
DEFAULT_MODELS = {
"free": "llama-8b-instant",
"starter": "qwen3-32b",
"pro": "llama-70b",
"plus": "gpt-oss-120b"
}
# ========== LOGGING ==========
logging.basicConfig(
level=logging.INFO,
format='[%(asctime)s] %(levelname)s: %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
)
logger = logging.getLogger(__name__)
# ========== FASTAPI APP ==========
app = FastAPI(
title="Sixfinger Backend API",
version=API_VERSION,
description="Ultra-fast AI Chat Backend",
docs_url="/docs",
redoc_url="/redoc"
)
# CORS
app.add_middleware(
CORSMiddleware,
allow_origins=["*"], # Production'da kısıtlayın
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Groq Client
groq_client = Groq(api_key=GROQ_API_KEY) if GROQ_API_KEY else None
# ========== MODELS ==========
class ChatRequest(BaseModel):
prompt: str = Field(..., description="User's message")
max_tokens: int = Field(default=300, ge=1, le=4000)
temperature: float = Field(default=0.7, ge=0, le=2)
top_p: float = Field(default=0.9, ge=0, le=1)
system_prompt: Optional[str] = None
history: Optional[list] = None
class ChatResponse(BaseModel):
response: str
model: str
model_key: str
model_size: str
model_language: str
attempts: int
usage: Dict[str, int]
parameters: Dict[str, Any]
# ========== HELPER FUNCTIONS ==========
def get_allowed_models(plan: str) -> list:
"""Plan'a göre izin verilen modelleri döndür"""
return [k for k, v in MODELS.items() if plan in v["plans"]]
def select_model(plan: str, preferred_model: Optional[str] = None) -> str:
"""Model seçimi yap"""
allowed_models = get_allowed_models(plan)
# Eğer kullanıcı model belirtmişse ve erişimi varsa
if preferred_model and preferred_model in allowed_models:
return preferred_model
# Otomatik seçim
default = DEFAULT_MODELS.get(plan, "llama-8b-instant")
return default if default in allowed_models else allowed_models[0]
def build_messages(prompt: str, system_prompt: Optional[str], history: Optional[list]) -> list:
"""Chat messages listesi oluştur"""
messages = []
# System prompt
if system_prompt:
messages.append({"role": "system", "content": system_prompt})
# History
if history:
for msg in history:
if "role" in msg and "content" in msg:
messages.append(msg)
# Current prompt
messages.append({"role": "user", "content": prompt})
return messages
def call_groq_api(
model_id: str,
messages: list,
max_tokens: int,
temperature: float,
top_p: float,
stream: bool = False
):
"""Groq API'ye istek at (SYNC)"""
if not groq_client:
raise HTTPException(status_code=500, detail="Groq API key not configured")
try:
response = groq_client.chat.completions.create(
model=model_id,
messages=messages,
max_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
stream=stream
)
return response
except Exception as e:
logger.error(f"Groq API error: {e}")
raise HTTPException(status_code=500, detail=f"Groq API error: {str(e)}")
# ========== ENDPOINTS ==========
@app.get("/health")
def health_check():
"""Health check endpoint"""
return {
"status": "healthy",
"version": API_VERSION,
"timestamp": datetime.now().isoformat(),
"groq_configured": bool(GROQ_API_KEY)
}
@app.post("/api/chat")
def chat(
request: ChatRequest,
x_user_plan: str = Header(default="free", alias="X-User-Plan"),
x_model: Optional[str] = Header(default=None, alias="X-Model")
):
"""
Normal chat endpoint (JSON response)
Frontend'e TAM UYUMLU format
"""
start_time = time.time()
# Model seçimi
model_key = select_model(x_user_plan, x_model)
model_config = MODELS[model_key]
groq_model_id = model_config["groq_id"]
logger.info(f"Chat request: plan={x_user_plan}, model={model_key}")
# Messages
messages = build_messages(
request.prompt,
request.system_prompt,
request.history
)
# Groq API call
try:
response = call_groq_api(
model_id=groq_model_id,
messages=messages,
max_tokens=request.max_tokens,
temperature=request.temperature,
top_p=request.top_p,
stream=False
)
# Extract response
content = response.choices[0].message.content
usage = {
"prompt_tokens": response.usage.prompt_tokens,
"completion_tokens": response.usage.completion_tokens,
"total_tokens": response.usage.total_tokens
}
elapsed = time.time() - start_time
logger.info(f"Chat completed: tokens={usage['total_tokens']}, time={elapsed:.2f}s")
# Frontend'in beklediği EXACT format
return {
"response": content,
"model": groq_model_id,
"model_key": model_key,
"model_size": model_config["size"],
"model_language": model_config["language"],
"attempts": 1,
"usage": usage,
"parameters": {
"max_tokens": request.max_tokens,
"temperature": request.temperature,
"top_p": request.top_p
}
}
except HTTPException:
raise
except Exception as e:
logger.error(f"Chat error: {e}")
raise HTTPException(status_code=500, detail=str(e))
@app.post("/api/chat/stream")
def chat_stream(
request: ChatRequest,
x_user_plan: str = Header(default="free", alias="X-User-Plan"),
x_model: Optional[str] = Header(default=None, alias="X-Model")
):
"""
Streaming chat endpoint (SSE)
Frontend'e TAM UYUMLU SSE format
✅ SYNC generator (FastAPI StreamingResponse için doğru)
"""
# Model seçimi
model_key = select_model(x_user_plan, x_model)
model_config = MODELS[model_key]
groq_model_id = model_config["groq_id"]
logger.info(f"Stream request: plan={x_user_plan}, model={model_key}")
# Messages
messages = build_messages(
request.prompt,
request.system_prompt,
request.history
)
def generate():
"""
SSE generator - SYNC function (FastAPI requirement)
Frontend iter_content() ile parse edecek
"""
try:
# Info mesajı
info_msg = json.dumps({'info': f'Trying model: {model_key}'})
yield f"data: {info_msg}\n\n"
# Groq streaming (SYNC)
response = call_groq_api(
model_id=groq_model_id,
messages=messages,
max_tokens=request.max_tokens,
temperature=request.temperature,
top_p=request.top_p,
stream=True
)
total_tokens = 0
prompt_tokens = 0
completion_tokens = 0
# Stream chunks
for chunk in response:
# Text chunk
if chunk.choices[0].delta.content:
text = chunk.choices[0].delta.content
text_msg = json.dumps({'text': text})
yield f"data: {text_msg}\n\n"
# Usage bilgisi (son chunk'ta gelir)
if hasattr(chunk, 'x_groq') and hasattr(chunk.x_groq, 'usage'):
usage_data = chunk.x_groq.usage
if hasattr(usage_data, 'prompt_tokens'):
prompt_tokens = usage_data.prompt_tokens
if hasattr(usage_data, 'completion_tokens'):
completion_tokens = usage_data.completion_tokens
if hasattr(usage_data, 'total_tokens'):
total_tokens = usage_data.total_tokens
# Son usage hesaplama (eğer gelmediyse)
if total_tokens == 0 and completion_tokens > 0:
total_tokens = prompt_tokens + completion_tokens
# Done mesajı - Frontend'in beklediği EXACT format
done_msg = json.dumps({
'done': True,
'model_key': model_key,
'attempts': 1,
'usage': {
'prompt_tokens': prompt_tokens,
'completion_tokens': completion_tokens,
'total_tokens': total_tokens
}
})
yield f"data: {done_msg}\n\n"
logger.info(f"Stream completed: model={model_key}, tokens={total_tokens}")
except Exception as e:
logger.error(f"Stream error: {e}")
error_msg = json.dumps({'error': str(e)})
yield f"data: {error_msg}\n\n"
return StreamingResponse(
generate(),
media_type="text/event-stream",
headers={
"Cache-Control": "no-cache",
"X-Accel-Buffering": "no",
"Connection": "keep-alive"
}
)
@app.get("/api/models")
def list_models(x_user_plan: str = Header(default="free", alias="X-User-Plan")):
"""
Kullanıcının erişebileceği modelleri listele
"""
allowed_models = get_allowed_models(x_user_plan)
models_info = []
for model_key in allowed_models:
config = MODELS[model_key]
models_info.append({
"key": model_key,
"size": config["size"],
"language": config["language"],
"speed": config["speed"],
"daily_limit": config["daily_limit"]
})
return {
"plan": x_user_plan,
"models": models_info,
"default_model": DEFAULT_MODELS.get(x_user_plan, "llama-8b-instant")
}
@app.exception_handler(HTTPException)
async def http_exception_handler(request: Request, exc: HTTPException):
"""Custom HTTP exception handler"""
return JSONResponse(
status_code=exc.status_code,
content={
"error": exc.detail,
"status_code": exc.status_code
}
)
@app.exception_handler(Exception)
async def general_exception_handler(request: Request, exc: Exception):
"""General exception handler"""
logger.error(f"Unhandled exception: {exc}")
return JSONResponse(
status_code=500,
content={
"error": "Internal server error",
"detail": str(exc)
}
)
# ========== STARTUP/SHUTDOWN ==========
@app.on_event("startup")
async def startup_event():
logger.info("🚀 Sixfinger Backend API started")
logger.info(f"📦 Version: {API_VERSION}")
logger.info(f"🔑 Groq API: {'✅ Configured' if GROQ_API_KEY else '❌ Not configured'}")
logger.info(f"🤖 Models available: {len(MODELS)}")
@app.on_event("shutdown")
async def shutdown_event():
logger.info("👋 Sixfinger Backend API shutting down")
if __name__ == "__main__":
import uvicorn
uvicorn.run(
"main:app",
host="0.0.0.0",
port=8000,
reload=True,
log_level="info"
)