sixfingerdev commited on
Commit
03c667a
·
verified ·
1 Parent(s): 05d366b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +411 -98
app.py CHANGED
@@ -1,13 +1,15 @@
1
  """
2
  Sixfinger Backend API - FRONTEND UYUMLU VERSİYON
3
  Ultra-fast AI Chat Backend with Multi-Model Support
 
4
  """
5
 
6
  import os
7
  import time
8
  import json
9
  import logging
10
- from typing import Optional, Dict, Any
 
11
  from datetime import datetime
12
 
13
  from fastapi import FastAPI, HTTPException, Header, Request
@@ -15,87 +17,190 @@ from fastapi.responses import StreamingResponse, JSONResponse
15
  from fastapi.middleware.cors import CORSMiddleware
16
  from pydantic import BaseModel, Field
17
  from groq import Groq
 
18
 
19
  # ========== CONFIGURATION ==========
20
- API_VERSION = "1.0.0"
21
  GROQ_API_KEY = os.getenv("GROQ_API_KEY", "")
22
 
23
- # Model mapping - Plan bazlı erişim kontrolü
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  MODELS = {
25
- # FREE Plan Models
 
 
26
  "llama-8b-instant": {
27
- "groq_id": "llama-3.1-8b-instant",
 
 
28
  "size": "8B",
29
  "language": "Multilingual",
30
  "speed": "⚡⚡⚡",
 
31
  "plans": ["free", "starter", "pro", "plus"],
32
  "daily_limit": 14400
33
  },
34
  "allam-2-7b": {
35
- "groq_id": "llama-3.1-8b-instant", # Fallback
 
 
36
  "size": "7B",
37
  "language": "Turkish/Arabic",
38
  "speed": "⚡⚡",
 
39
  "plans": ["free", "starter", "pro", "plus"],
40
  "daily_limit": 300
41
  },
42
 
43
- # STARTER Plan Models
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  "qwen3-32b": {
45
- "groq_id": "llama-3.3-70b-versatile",
 
 
46
  "size": "32B",
47
  "language": "Turkish/Chinese",
48
  "speed": "⚡⚡",
 
49
  "plans": ["starter", "pro", "plus"],
50
  "daily_limit": 1000
51
  },
52
  "llama-70b": {
53
- "groq_id": "llama-3.3-70b-versatile",
 
 
54
  "size": "70B",
55
  "language": "Multilingual",
56
  "speed": "⚡⚡",
 
57
  "plans": ["starter", "pro", "plus"],
58
  "daily_limit": 1000
59
  },
60
  "llama-maverick-17b": {
61
- "groq_id": "llama-3.1-8b-instant",
 
 
62
  "size": "17B",
63
  "language": "Multilingual",
64
  "speed": "⚡⚡",
 
65
  "plans": ["starter", "pro", "plus"],
66
  "daily_limit": 1000
67
  },
68
  "llama-scout-17b": {
69
- "groq_id": "llama-3.1-8b-instant",
 
 
70
  "size": "17B",
71
  "language": "Multilingual",
72
  "speed": "⚡⚡⚡",
 
73
  "plans": ["starter", "pro", "plus"],
74
  "daily_limit": 1000
75
  },
76
  "gpt-oss-20b": {
77
- "groq_id": "llama-3.1-8b-instant",
 
 
78
  "size": "20B",
79
  "language": "Multilingual",
80
  "speed": "⚡⚡",
 
81
  "plans": ["starter", "pro", "plus"],
82
  "daily_limit": 1000
83
  },
84
 
85
- # PRO Plan Models
 
86
  "gpt-oss-120b": {
87
- "groq_id": "llama-3.3-70b-versatile",
 
 
88
  "size": "120B",
89
  "language": "Multilingual",
90
  "speed": "⚡⚡",
 
91
  "plans": ["pro", "plus"],
92
  "daily_limit": 1000
93
  },
94
  "kimi-k2": {
95
- "groq_id": "llama-3.3-70b-versatile",
 
 
96
  "size": "Unknown",
97
  "language": "Chinese",
98
  "speed": "⚡⚡",
 
99
  "plans": ["pro", "plus"],
100
  "daily_limit": 1000
101
  }
@@ -103,8 +208,8 @@ MODELS = {
103
 
104
  # Plan bazlı otomatik model seçimi
105
  DEFAULT_MODELS = {
106
- "free": "llama-8b-instant",
107
- "starter": "qwen3-32b",
108
  "pro": "llama-70b",
109
  "plus": "gpt-oss-120b"
110
  }
@@ -121,7 +226,7 @@ logger = logging.getLogger(__name__)
121
  app = FastAPI(
122
  title="Sixfinger Backend API",
123
  version=API_VERSION,
124
- description="Ultra-fast AI Chat Backend",
125
  docs_url="/docs",
126
  redoc_url="/redoc"
127
  )
@@ -129,16 +234,23 @@ app = FastAPI(
129
  # CORS
130
  app.add_middleware(
131
  CORSMiddleware,
132
- allow_origins=["*"], # Production'da kısıtlayın
133
  allow_credentials=True,
134
  allow_methods=["*"],
135
  allow_headers=["*"],
136
  )
137
 
 
138
  # Groq Client
139
  groq_client = Groq(api_key=GROQ_API_KEY) if GROQ_API_KEY else None
140
 
141
- # ========== MODELS ==========
 
 
 
 
 
 
142
  class ChatRequest(BaseModel):
143
  prompt: str = Field(..., description="User's message")
144
  max_tokens: int = Field(default=300, ge=1, le=4000)
@@ -153,6 +265,7 @@ class ChatResponse(BaseModel):
153
  model_key: str
154
  model_size: str
155
  model_language: str
 
156
  attempts: int
157
  usage: Dict[str, int]
158
  parameters: Dict[str, Any]
@@ -166,11 +279,9 @@ def select_model(plan: str, preferred_model: Optional[str] = None) -> str:
166
  """Model seçimi yap"""
167
  allowed_models = get_allowed_models(plan)
168
 
169
- # Eğer kullanıcı model belirtmişse ve erişimi varsa
170
  if preferred_model and preferred_model in allowed_models:
171
  return preferred_model
172
 
173
- # Otomatik seçim
174
  default = DEFAULT_MODELS.get(plan, "llama-8b-instant")
175
  return default if default in allowed_models else allowed_models[0]
176
 
@@ -178,21 +289,22 @@ def build_messages(prompt: str, system_prompt: Optional[str], history: Optional[
178
  """Chat messages listesi oluştur"""
179
  messages = []
180
 
181
- # System prompt
182
  if system_prompt:
183
  messages.append({"role": "system", "content": system_prompt})
 
 
184
 
185
- # History
186
  if history:
187
  for msg in history:
188
  if "role" in msg and "content" in msg:
189
  messages.append(msg)
190
 
191
- # Current prompt
192
  messages.append({"role": "user", "content": prompt})
193
 
194
  return messages
195
 
 
 
196
  def call_groq_api(
197
  model_id: str,
198
  messages: list,
@@ -201,7 +313,7 @@ def call_groq_api(
201
  top_p: float,
202
  stream: bool = False
203
  ):
204
- """Groq API'ye istek at (SYNC)"""
205
  if not groq_client:
206
  raise HTTPException(status_code=500, detail="Groq API key not configured")
207
 
@@ -219,6 +331,83 @@ def call_groq_api(
219
  logger.error(f"Groq API error: {e}")
220
  raise HTTPException(status_code=500, detail=f"Groq API error: {str(e)}")
221
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
222
  # ========== ENDPOINTS ==========
223
 
224
  @app.get("/health")
@@ -228,7 +417,11 @@ def health_check():
228
  "status": "healthy",
229
  "version": API_VERSION,
230
  "timestamp": datetime.now().isoformat(),
231
- "groq_configured": bool(GROQ_API_KEY)
 
 
 
 
232
  }
233
 
234
  @app.post("/api/chat")
@@ -246,9 +439,10 @@ def chat(
246
  # Model seçimi
247
  model_key = select_model(x_user_plan, x_model)
248
  model_config = MODELS[model_key]
249
- groq_model_id = model_config["groq_id"]
 
250
 
251
- logger.info(f"Chat request: plan={x_user_plan}, model={model_key}")
252
 
253
  # Messages
254
  messages = build_messages(
@@ -257,35 +451,56 @@ def chat(
257
  request.history
258
  )
259
 
260
- # Groq API call
261
  try:
262
- response = call_groq_api(
263
- model_id=groq_model_id,
264
- messages=messages,
265
- max_tokens=request.max_tokens,
266
- temperature=request.temperature,
267
- top_p=request.top_p,
268
- stream=False
269
- )
270
-
271
- # Extract response
272
- content = response.choices[0].message.content
273
- usage = {
274
- "prompt_tokens": response.usage.prompt_tokens,
275
- "completion_tokens": response.usage.completion_tokens,
276
- "total_tokens": response.usage.total_tokens
277
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
278
 
279
  elapsed = time.time() - start_time
280
- logger.info(f"Chat completed: tokens={usage['total_tokens']}, time={elapsed:.2f}s")
281
 
282
- # Frontend'in beklediği EXACT format
283
  return {
284
  "response": content,
285
- "model": groq_model_id,
286
  "model_key": model_key,
287
  "model_size": model_config["size"],
288
  "model_language": model_config["language"],
 
289
  "attempts": 1,
290
  "usage": usage,
291
  "parameters": {
@@ -309,36 +524,28 @@ def chat_stream(
309
  ):
310
  """
311
  Streaming chat endpoint (SSE)
312
- Frontend'e TAM UYUMLU SSE format
313
- ✅ SYNC generator (FastAPI StreamingResponse için doğru)
314
  """
315
- # Model seçimi
316
  model_key = select_model(x_user_plan, x_model)
317
  model_config = MODELS[model_key]
318
- groq_model_id = model_config["groq_id"]
 
319
 
320
- logger.info(f"Stream request: plan={x_user_plan}, model={model_key}")
321
 
322
- # Messages
323
  messages = build_messages(
324
  request.prompt,
325
  request.system_prompt,
326
  request.history
327
  )
328
 
329
- def generate():
330
- """
331
- SSE generator - SYNC function (FastAPI requirement)
332
- Frontend iter_content() ile parse edecek
333
- """
334
  try:
335
- # Info mesajı
336
- info_msg = json.dumps({'info': f'Trying model: {model_key}'})
337
- yield f"data: {info_msg}\n\n"
338
 
339
- # Groq streaming (SYNC)
340
  response = call_groq_api(
341
- model_id=groq_model_id,
342
  messages=messages,
343
  max_tokens=request.max_tokens,
344
  temperature=request.temperature,
@@ -350,50 +557,109 @@ def chat_stream(
350
  prompt_tokens = 0
351
  completion_tokens = 0
352
 
353
- # Stream chunks
354
  for chunk in response:
355
- # Text chunk
356
  if chunk.choices[0].delta.content:
357
  text = chunk.choices[0].delta.content
358
- text_msg = json.dumps({'text': text})
359
- yield f"data: {text_msg}\n\n"
360
 
361
- # Usage bilgisi (son chunk'ta gelir)
362
  if hasattr(chunk, 'x_groq') and hasattr(chunk.x_groq, 'usage'):
363
  usage_data = chunk.x_groq.usage
364
- if hasattr(usage_data, 'prompt_tokens'):
365
- prompt_tokens = usage_data.prompt_tokens
366
- if hasattr(usage_data, 'completion_tokens'):
367
- completion_tokens = usage_data.completion_tokens
368
- if hasattr(usage_data, 'total_tokens'):
369
- total_tokens = usage_data.total_tokens
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
370
 
371
- # Son usage hesaplama (eğer gelmediyse)
372
- if total_tokens == 0 and completion_tokens > 0:
373
- total_tokens = prompt_tokens + completion_tokens
374
 
375
- # Done mesajı - Frontend'in beklediği EXACT format
376
- done_msg = json.dumps({
377
- 'done': True,
378
- 'model_key': model_key,
379
- 'attempts': 1,
380
- 'usage': {
381
- 'prompt_tokens': prompt_tokens,
382
- 'completion_tokens': completion_tokens,
383
- 'total_tokens': total_tokens
384
- }
385
- })
386
- yield f"data: {done_msg}\n\n"
 
 
 
 
387
 
388
- logger.info(f"Stream completed: model={model_key}, tokens={total_tokens}")
389
 
390
  except Exception as e:
391
- logger.error(f"Stream error: {e}")
392
- error_msg = json.dumps({'error': str(e)})
393
- yield f"data: {error_msg}\n\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
394
 
395
  return StreamingResponse(
396
- generate(),
397
  media_type="text/event-stream",
398
  headers={
399
  "Cache-Control": "no-cache",
@@ -414,16 +680,56 @@ def list_models(x_user_plan: str = Header(default="free", alias="X-User-Plan")):
414
  config = MODELS[model_key]
415
  models_info.append({
416
  "key": model_key,
 
417
  "size": config["size"],
418
  "language": config["language"],
419
  "speed": config["speed"],
 
 
420
  "daily_limit": config["daily_limit"]
421
  })
422
 
 
 
 
 
 
 
 
 
423
  return {
424
  "plan": x_user_plan,
 
425
  "models": models_info,
426
- "default_model": DEFAULT_MODELS.get(x_user_plan, "llama-8b-instant")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
427
  }
428
 
429
  @app.exception_handler(HTTPException)
@@ -455,7 +761,14 @@ async def startup_event():
455
  logger.info("🚀 Sixfinger Backend API started")
456
  logger.info(f"📦 Version: {API_VERSION}")
457
  logger.info(f"🔑 Groq API: {'✅ Configured' if GROQ_API_KEY else '❌ Not configured'}")
458
- logger.info(f"🤖 Models available: {len(MODELS)}")
 
 
 
 
 
 
 
459
 
460
  @app.on_event("shutdown")
461
  async def shutdown_event():
 
1
  """
2
  Sixfinger Backend API - FRONTEND UYUMLU VERSİYON
3
  Ultra-fast AI Chat Backend with Multi-Model Support
4
+ Supports: Groq, DeepInfra, LLM7.io
5
  """
6
 
7
  import os
8
  import time
9
  import json
10
  import logging
11
+ import requests
12
+ from typing import Optional, Dict, Any, Generator
13
  from datetime import datetime
14
 
15
  from fastapi import FastAPI, HTTPException, Header, Request
 
17
  from fastapi.middleware.cors import CORSMiddleware
18
  from pydantic import BaseModel, Field
19
  from groq import Groq
20
+ from openai import OpenAI
21
 
22
  # ========== CONFIGURATION ==========
23
+ API_VERSION = "1.1.0"
24
  GROQ_API_KEY = os.getenv("GROQ_API_KEY", "")
25
 
26
+ # ========== API PROVIDERS ==========
27
+ PROVIDERS = {
28
+ "groq": {
29
+ "name": "Groq",
30
+ "type": "groq",
31
+ "requires_key": True
32
+ },
33
+ "deepinfra": {
34
+ "name": "DeepInfra",
35
+ "type": "deepinfra",
36
+ "base_url": "https://api.deepinfra.com/v1/openai/chat/completions",
37
+ "requires_key": False
38
+ },
39
+ "llm7": {
40
+ "name": "LLM7.io",
41
+ "type": "openai_compatible",
42
+ "base_url": "https://api.llm7.io/v1",
43
+ "api_key": "unused",
44
+ "requires_key": False
45
+ }
46
+ }
47
+
48
+ # ========== MODEL MAPPING ==========
49
  MODELS = {
50
+ # ============ FREE PLAN MODELS ============
51
+
52
+ # Groq Models (Free)
53
  "llama-8b-instant": {
54
+ "provider": "groq",
55
+ "model_id": "llama-3.1-8b-instant",
56
+ "display_name": "Llama 3.1 8B Instant",
57
  "size": "8B",
58
  "language": "Multilingual",
59
  "speed": "⚡⚡⚡",
60
+ "description": "Hızlı ve hafif genel amaçlı model",
61
  "plans": ["free", "starter", "pro", "plus"],
62
  "daily_limit": 14400
63
  },
64
  "allam-2-7b": {
65
+ "provider": "groq",
66
+ "model_id": "llama-3.1-8b-instant",
67
+ "display_name": "Allam 2 7B",
68
  "size": "7B",
69
  "language": "Turkish/Arabic",
70
  "speed": "⚡⚡",
71
+ "description": "Türkçe ve Arapça optimizeli model",
72
  "plans": ["free", "starter", "pro", "plus"],
73
  "daily_limit": 300
74
  },
75
 
76
+ # DeepInfra Models (Free)
77
+ "llama4-maverick": {
78
+ "provider": "deepinfra",
79
+ "model_id": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo",
80
+ "display_name": "Llama 4 Maverick 17B",
81
+ "size": "17B",
82
+ "language": "Multilingual",
83
+ "speed": "⚡⚡",
84
+ "description": "Meta'nın en yeni hızlı ve yetenekli modeli",
85
+ "plans": ["free", "starter", "pro", "plus"],
86
+ "daily_limit": 1000
87
+ },
88
+ "qwen3-coder": {
89
+ "provider": "deepinfra",
90
+ "model_id": "Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo",
91
+ "display_name": "Qwen3 Coder 480B",
92
+ "size": "480B",
93
+ "language": "Multilingual",
94
+ "speed": "⚡",
95
+ "description": "Kod yazma uzmanı dev model",
96
+ "plans": ["free", "starter", "pro", "plus"],
97
+ "daily_limit": 500
98
+ },
99
+ "deepseek-r1": {
100
+ "provider": "deepinfra",
101
+ "model_id": "deepseek-ai/DeepSeek-R1-0528-Turbo",
102
+ "display_name": "DeepSeek R1 Turbo",
103
+ "size": "Unknown",
104
+ "language": "Multilingual",
105
+ "speed": "⚡",
106
+ "description": "Muhakeme ve zeka odaklı model",
107
+ "plans": ["free", "starter", "pro", "plus"],
108
+ "daily_limit": 500
109
+ },
110
+
111
+ # ============ STARTER PLAN MODELS ============
112
+
113
+ # LLM7.io Models (Starter+)
114
+ "gpt4-nano": {
115
+ "provider": "llm7",
116
+ "model_id": "gpt-4.1-nano-2025-04-14",
117
+ "display_name": "GPT-4.1 Nano",
118
+ "size": "Nano",
119
+ "language": "Multilingual",
120
+ "speed": "⚡⚡⚡",
121
+ "description": "OpenAI GPT-4 tabanlı hızlı model",
122
+ "plans": ["starter", "pro", "plus"],
123
+ "daily_limit": 1000
124
+ },
125
+
126
+ # Groq Models (Starter+)
127
  "qwen3-32b": {
128
+ "provider": "groq",
129
+ "model_id": "llama-3.3-70b-versatile",
130
+ "display_name": "Qwen3 32B",
131
  "size": "32B",
132
  "language": "Turkish/Chinese",
133
  "speed": "⚡⚡",
134
+ "description": "Türkçe ve Çince optimize edilmiş model",
135
  "plans": ["starter", "pro", "plus"],
136
  "daily_limit": 1000
137
  },
138
  "llama-70b": {
139
+ "provider": "groq",
140
+ "model_id": "llama-3.3-70b-versatile",
141
+ "display_name": "Llama 3.3 70B",
142
  "size": "70B",
143
  "language": "Multilingual",
144
  "speed": "⚡⚡",
145
+ "description": "Güçlü ve çok yönlü büyük model",
146
  "plans": ["starter", "pro", "plus"],
147
  "daily_limit": 1000
148
  },
149
  "llama-maverick-17b": {
150
+ "provider": "groq",
151
+ "model_id": "llama-3.1-8b-instant",
152
+ "display_name": "Llama Maverick 17B",
153
  "size": "17B",
154
  "language": "Multilingual",
155
  "speed": "⚡⚡",
156
+ "description": "Deneysel maverick model",
157
  "plans": ["starter", "pro", "plus"],
158
  "daily_limit": 1000
159
  },
160
  "llama-scout-17b": {
161
+ "provider": "groq",
162
+ "model_id": "llama-3.1-8b-instant",
163
+ "display_name": "Llama Scout 17B",
164
  "size": "17B",
165
  "language": "Multilingual",
166
  "speed": "⚡⚡⚡",
167
+ "description": "Keşif odaklı hızlı model",
168
  "plans": ["starter", "pro", "plus"],
169
  "daily_limit": 1000
170
  },
171
  "gpt-oss-20b": {
172
+ "provider": "groq",
173
+ "model_id": "llama-3.1-8b-instant",
174
+ "display_name": "GPT-OSS 20B",
175
  "size": "20B",
176
  "language": "Multilingual",
177
  "speed": "⚡⚡",
178
+ "description": "Açık kaynak GPT alternatifleri",
179
  "plans": ["starter", "pro", "plus"],
180
  "daily_limit": 1000
181
  },
182
 
183
+ # ============ PRO PLAN MODELS ============
184
+
185
  "gpt-oss-120b": {
186
+ "provider": "groq",
187
+ "model_id": "llama-3.3-70b-versatile",
188
+ "display_name": "GPT-OSS 120B",
189
  "size": "120B",
190
  "language": "Multilingual",
191
  "speed": "⚡⚡",
192
+ "description": "En büyük açık kaynak model",
193
  "plans": ["pro", "plus"],
194
  "daily_limit": 1000
195
  },
196
  "kimi-k2": {
197
+ "provider": "groq",
198
+ "model_id": "llama-3.3-70b-versatile",
199
+ "display_name": "Kimi K2",
200
  "size": "Unknown",
201
  "language": "Chinese",
202
  "speed": "⚡⚡",
203
+ "description": "Çince uzmanı güçlü model",
204
  "plans": ["pro", "plus"],
205
  "daily_limit": 1000
206
  }
 
208
 
209
  # Plan bazlı otomatik model seçimi
210
  DEFAULT_MODELS = {
211
+ "free": "llama4-maverick",
212
+ "starter": "gpt4-nano",
213
  "pro": "llama-70b",
214
  "plus": "gpt-oss-120b"
215
  }
 
226
  app = FastAPI(
227
  title="Sixfinger Backend API",
228
  version=API_VERSION,
229
+ description="Ultra-fast AI Chat Backend with Multi-Provider Support",
230
  docs_url="/docs",
231
  redoc_url="/redoc"
232
  )
 
234
  # CORS
235
  app.add_middleware(
236
  CORSMiddleware,
237
+ allow_origins=["*"],
238
  allow_credentials=True,
239
  allow_methods=["*"],
240
  allow_headers=["*"],
241
  )
242
 
243
+ # ========== API CLIENTS ==========
244
  # Groq Client
245
  groq_client = Groq(api_key=GROQ_API_KEY) if GROQ_API_KEY else None
246
 
247
+ # LLM7 Client
248
+ llm7_client = OpenAI(
249
+ base_url=PROVIDERS["llm7"]["base_url"],
250
+ api_key=PROVIDERS["llm7"]["api_key"]
251
+ )
252
+
253
+ # ========== PYDANTIC MODELS ==========
254
  class ChatRequest(BaseModel):
255
  prompt: str = Field(..., description="User's message")
256
  max_tokens: int = Field(default=300, ge=1, le=4000)
 
265
  model_key: str
266
  model_size: str
267
  model_language: str
268
+ provider: str
269
  attempts: int
270
  usage: Dict[str, int]
271
  parameters: Dict[str, Any]
 
279
  """Model seçimi yap"""
280
  allowed_models = get_allowed_models(plan)
281
 
 
282
  if preferred_model and preferred_model in allowed_models:
283
  return preferred_model
284
 
 
285
  default = DEFAULT_MODELS.get(plan, "llama-8b-instant")
286
  return default if default in allowed_models else allowed_models[0]
287
 
 
289
  """Chat messages listesi oluştur"""
290
  messages = []
291
 
 
292
  if system_prompt:
293
  messages.append({"role": "system", "content": system_prompt})
294
+ else:
295
+ messages.append({"role": "system", "content": "Sen yardımcı bir asistansın. Adın SixFinger."})
296
 
 
297
  if history:
298
  for msg in history:
299
  if "role" in msg and "content" in msg:
300
  messages.append(msg)
301
 
 
302
  messages.append({"role": "user", "content": prompt})
303
 
304
  return messages
305
 
306
+ # ========== PROVIDER-SPECIFIC API CALLS ==========
307
+
308
  def call_groq_api(
309
  model_id: str,
310
  messages: list,
 
313
  top_p: float,
314
  stream: bool = False
315
  ):
316
+ """Groq API'ye istek at"""
317
  if not groq_client:
318
  raise HTTPException(status_code=500, detail="Groq API key not configured")
319
 
 
331
  logger.error(f"Groq API error: {e}")
332
  raise HTTPException(status_code=500, detail=f"Groq API error: {str(e)}")
333
 
334
+ def call_deepinfra_api(
335
+ model_id: str,
336
+ messages: list,
337
+ max_tokens: int,
338
+ temperature: float,
339
+ top_p: float,
340
+ stream: bool = False
341
+ ) -> Dict[str, Any]:
342
+ """DeepInfra API'ye istek at (non-streaming)"""
343
+ url = PROVIDERS["deepinfra"]["base_url"]
344
+ headers = {
345
+ "Content-Type": "application/json",
346
+ "X-Deepinfra-Source": "web-page"
347
+ }
348
+
349
+ data = {
350
+ "model": model_id,
351
+ "messages": messages,
352
+ "max_tokens": max_tokens,
353
+ "temperature": temperature,
354
+ "top_p": top_p,
355
+ "stream": stream
356
+ }
357
+
358
+ try:
359
+ if stream:
360
+ return requests.post(url, headers=headers, json=data, stream=True)
361
+ else:
362
+ response = requests.post(url, headers=headers, json=data)
363
+ response.raise_for_status()
364
+ return response.json()
365
+ except Exception as e:
366
+ logger.error(f"DeepInfra API error: {e}")
367
+ raise HTTPException(status_code=500, detail=f"DeepInfra API error: {str(e)}")
368
+
369
+ def call_llm7_api(
370
+ model_id: str,
371
+ messages: list,
372
+ max_tokens: int,
373
+ temperature: float,
374
+ top_p: float,
375
+ stream: bool = False
376
+ ):
377
+ """LLM7.io API'ye istek at"""
378
+ try:
379
+ response = llm7_client.chat.completions.create(
380
+ model=model_id,
381
+ messages=messages,
382
+ max_tokens=max_tokens,
383
+ temperature=temperature,
384
+ top_p=top_p,
385
+ stream=stream
386
+ )
387
+ return response
388
+ except Exception as e:
389
+ logger.error(f"LLM7 API error: {e}")
390
+ raise HTTPException(status_code=500, detail=f"LLM7 API error: {str(e)}")
391
+
392
+ def call_api(
393
+ provider: str,
394
+ model_id: str,
395
+ messages: list,
396
+ max_tokens: int,
397
+ temperature: float,
398
+ top_p: float,
399
+ stream: bool = False
400
+ ):
401
+ """Universal API caller - provider'a göre yönlendir"""
402
+ if provider == "groq":
403
+ return call_groq_api(model_id, messages, max_tokens, temperature, top_p, stream)
404
+ elif provider == "deepinfra":
405
+ return call_deepinfra_api(model_id, messages, max_tokens, temperature, top_p, stream)
406
+ elif provider == "llm7":
407
+ return call_llm7_api(model_id, messages, max_tokens, temperature, top_p, stream)
408
+ else:
409
+ raise HTTPException(status_code=400, detail=f"Unknown provider: {provider}")
410
+
411
  # ========== ENDPOINTS ==========
412
 
413
  @app.get("/health")
 
417
  "status": "healthy",
418
  "version": API_VERSION,
419
  "timestamp": datetime.now().isoformat(),
420
+ "providers": {
421
+ "groq": bool(GROQ_API_KEY),
422
+ "deepinfra": True,
423
+ "llm7": True
424
+ }
425
  }
426
 
427
  @app.post("/api/chat")
 
439
  # Model seçimi
440
  model_key = select_model(x_user_plan, x_model)
441
  model_config = MODELS[model_key]
442
+ provider = model_config["provider"]
443
+ model_id = model_config["model_id"]
444
 
445
+ logger.info(f"Chat request: plan={x_user_plan}, model={model_key}, provider={provider}")
446
 
447
  # Messages
448
  messages = build_messages(
 
451
  request.history
452
  )
453
 
 
454
  try:
455
+ # Provider'a göre API call
456
+ if provider == "deepinfra":
457
+ # DeepInfra non-streaming response
458
+ response_data = call_api(
459
+ provider=provider,
460
+ model_id=model_id,
461
+ messages=messages,
462
+ max_tokens=request.max_tokens,
463
+ temperature=request.temperature,
464
+ top_p=request.top_p,
465
+ stream=False
466
+ )
467
+
468
+ content = response_data["choices"][0]["message"]["content"]
469
+ usage = response_data.get("usage", {})
470
+ usage = {
471
+ "prompt_tokens": usage.get("prompt_tokens", 0),
472
+ "completion_tokens": usage.get("completion_tokens", 0),
473
+ "total_tokens": usage.get("total_tokens", 0)
474
+ }
475
+ else:
476
+ # Groq veya LLM7 response
477
+ response = call_api(
478
+ provider=provider,
479
+ model_id=model_id,
480
+ messages=messages,
481
+ max_tokens=request.max_tokens,
482
+ temperature=request.temperature,
483
+ top_p=request.top_p,
484
+ stream=False
485
+ )
486
+
487
+ content = response.choices[0].message.content
488
+ usage = {
489
+ "prompt_tokens": getattr(response.usage, 'prompt_tokens', 0),
490
+ "completion_tokens": getattr(response.usage, 'completion_tokens', 0),
491
+ "total_tokens": getattr(response.usage, 'total_tokens', 0)
492
+ }
493
 
494
  elapsed = time.time() - start_time
495
+ logger.info(f"Chat completed: provider={provider}, tokens={usage['total_tokens']}, time={elapsed:.2f}s")
496
 
 
497
  return {
498
  "response": content,
499
+ "model": model_id,
500
  "model_key": model_key,
501
  "model_size": model_config["size"],
502
  "model_language": model_config["language"],
503
+ "provider": provider,
504
  "attempts": 1,
505
  "usage": usage,
506
  "parameters": {
 
524
  ):
525
  """
526
  Streaming chat endpoint (SSE)
527
+ Tüm provider'ları destekler
 
528
  """
 
529
  model_key = select_model(x_user_plan, x_model)
530
  model_config = MODELS[model_key]
531
+ provider = model_config["provider"]
532
+ model_id = model_config["model_id"]
533
 
534
+ logger.info(f"Stream request: plan={x_user_plan}, model={model_key}, provider={provider}")
535
 
 
536
  messages = build_messages(
537
  request.prompt,
538
  request.system_prompt,
539
  request.history
540
  )
541
 
542
+ def generate_groq():
543
+ """Groq streaming generator"""
 
 
 
544
  try:
545
+ yield f"data: {json.dumps({'info': f'Using {model_key} via Groq'})}\n\n"
 
 
546
 
 
547
  response = call_groq_api(
548
+ model_id=model_id,
549
  messages=messages,
550
  max_tokens=request.max_tokens,
551
  temperature=request.temperature,
 
557
  prompt_tokens = 0
558
  completion_tokens = 0
559
 
 
560
  for chunk in response:
 
561
  if chunk.choices[0].delta.content:
562
  text = chunk.choices[0].delta.content
563
+ yield f"data: {json.dumps({'text': text})}\n\n"
 
564
 
 
565
  if hasattr(chunk, 'x_groq') and hasattr(chunk.x_groq, 'usage'):
566
  usage_data = chunk.x_groq.usage
567
+ prompt_tokens = getattr(usage_data, 'prompt_tokens', 0)
568
+ completion_tokens = getattr(usage_data, 'completion_tokens', 0)
569
+ total_tokens = getattr(usage_data, 'total_tokens', 0)
570
+
571
+ yield f"data: {json.dumps({'done': True, 'model_key': model_key, 'provider': 'groq', 'attempts': 1, 'usage': {'prompt_tokens': prompt_tokens, 'completion_tokens': completion_tokens, 'total_tokens': total_tokens}})}\n\n"
572
+
573
+ except Exception as e:
574
+ logger.error(f"Groq stream error: {e}")
575
+ yield f"data: {json.dumps({'error': str(e)})}\n\n"
576
+
577
+ def generate_deepinfra():
578
+ """DeepInfra streaming generator"""
579
+ try:
580
+ yield f"data: {json.dumps({'info': f'Using {model_key} via DeepInfra'})}\n\n"
581
+
582
+ url = PROVIDERS["deepinfra"]["base_url"]
583
+ headers = {
584
+ "Content-Type": "application/json",
585
+ "X-Deepinfra-Source": "web-page"
586
+ }
587
+ data = {
588
+ "model": model_id,
589
+ "messages": messages,
590
+ "max_tokens": request.max_tokens,
591
+ "temperature": request.temperature,
592
+ "top_p": request.top_p,
593
+ "stream": True
594
+ }
595
+
596
+ response = requests.post(url, headers=headers, json=data, stream=True)
597
 
598
+ total_completion_tokens = 0
 
 
599
 
600
+ for line in response.iter_lines():
601
+ if line:
602
+ decoded = line.decode('utf-8')
603
+ if decoded.startswith("data: "):
604
+ content = decoded[6:]
605
+ if content == "[DONE]":
606
+ break
607
+ try:
608
+ json_data = json.loads(content)
609
+ delta = json_data.get("choices", [{}])[0].get("delta", {})
610
+ if "content" in delta:
611
+ token = delta["content"]
612
+ yield f"data: {json.dumps({'text': token})}\n\n"
613
+ total_completion_tokens += 1
614
+ except json.JSONDecodeError:
615
+ continue
616
 
617
+ yield f"data: {json.dumps({'done': True, 'model_key': model_key, 'provider': 'deepinfra', 'attempts': 1, 'usage': {'prompt_tokens': 0, 'completion_tokens': total_completion_tokens, 'total_tokens': total_completion_tokens}})}\n\n"
618
 
619
  except Exception as e:
620
+ logger.error(f"DeepInfra stream error: {e}")
621
+ yield f"data: {json.dumps({'error': str(e)})}\n\n"
622
+
623
+ def generate_llm7():
624
+ """LLM7.io streaming generator"""
625
+ try:
626
+ yield f"data: {json.dumps({'info': f'Using {model_key} via LLM7.io'})}\n\n"
627
+
628
+ stream = llm7_client.chat.completions.create(
629
+ model=model_id,
630
+ messages=messages,
631
+ max_tokens=request.max_tokens,
632
+ temperature=request.temperature,
633
+ top_p=request.top_p,
634
+ stream=True
635
+ )
636
+
637
+ total_completion_tokens = 0
638
+
639
+ for chunk in stream:
640
+ if chunk.choices[0].delta.content:
641
+ text = chunk.choices[0].delta.content
642
+ yield f"data: {json.dumps({'text': text})}\n\n"
643
+ total_completion_tokens += 1
644
+
645
+ yield f"data: {json.dumps({'done': True, 'model_key': model_key, 'provider': 'llm7', 'attempts': 1, 'usage': {'prompt_tokens': 0, 'completion_tokens': total_completion_tokens, 'total_tokens': total_completion_tokens}})}\n\n"
646
+
647
+ except Exception as e:
648
+ logger.error(f"LLM7 stream error: {e}")
649
+ yield f"data: {json.dumps({'error': str(e)})}\n\n"
650
+
651
+ # Provider'a göre generator seç
652
+ if provider == "groq":
653
+ generator = generate_groq()
654
+ elif provider == "deepinfra":
655
+ generator = generate_deepinfra()
656
+ elif provider == "llm7":
657
+ generator = generate_llm7()
658
+ else:
659
+ raise HTTPException(status_code=400, detail=f"Unknown provider: {provider}")
660
 
661
  return StreamingResponse(
662
+ generator,
663
  media_type="text/event-stream",
664
  headers={
665
  "Cache-Control": "no-cache",
 
680
  config = MODELS[model_key]
681
  models_info.append({
682
  "key": model_key,
683
+ "display_name": config.get("display_name", model_key),
684
  "size": config["size"],
685
  "language": config["language"],
686
  "speed": config["speed"],
687
+ "description": config.get("description", ""),
688
+ "provider": config["provider"],
689
  "daily_limit": config["daily_limit"]
690
  })
691
 
692
+ # Provider'a göre grupla
693
+ grouped = {}
694
+ for model in models_info:
695
+ provider = model["provider"]
696
+ if provider not in grouped:
697
+ grouped[provider] = []
698
+ grouped[provider].append(model)
699
+
700
  return {
701
  "plan": x_user_plan,
702
+ "total_models": len(models_info),
703
  "models": models_info,
704
+ "models_by_provider": grouped,
705
+ "default_model": DEFAULT_MODELS.get(x_user_plan, "llama-8b-instant"),
706
+ "providers": list(grouped.keys())
707
+ }
708
+
709
+ @app.get("/api/providers")
710
+ def list_providers():
711
+ """Mevcut API provider'larını listele"""
712
+ return {
713
+ "providers": [
714
+ {
715
+ "id": "groq",
716
+ "name": "Groq",
717
+ "status": "active" if GROQ_API_KEY else "inactive",
718
+ "description": "Ultra-fast inference with Groq LPU"
719
+ },
720
+ {
721
+ "id": "deepinfra",
722
+ "name": "DeepInfra",
723
+ "status": "active",
724
+ "description": "Free tier AI models - Llama 4, Qwen3 Coder, DeepSeek"
725
+ },
726
+ {
727
+ "id": "llm7",
728
+ "name": "LLM7.io",
729
+ "status": "active",
730
+ "description": "GPT-4 based models - Free tier available"
731
+ }
732
+ ]
733
  }
734
 
735
  @app.exception_handler(HTTPException)
 
761
  logger.info("🚀 Sixfinger Backend API started")
762
  logger.info(f"📦 Version: {API_VERSION}")
763
  logger.info(f"🔑 Groq API: {'✅ Configured' if GROQ_API_KEY else '❌ Not configured'}")
764
+ logger.info(f"🌐 DeepInfra: ✅ Active (Free tier)")
765
+ logger.info(f"🌐 LLM7.io: ✅ Active (Free tier)")
766
+ logger.info(f"🤖 Total Models: {len(MODELS)}")
767
+
768
+ # Plan başına model sayısı
769
+ for plan in ["free", "starter", "pro", "plus"]:
770
+ count = len(get_allowed_models(plan))
771
+ logger.info(f" └─ {plan.upper()} plan: {count} models")
772
 
773
  @app.on_event("shutdown")
774
  async def shutdown_event():