sixfingerdev commited on
Commit
08e620f
Β·
verified Β·
1 Parent(s): e743be1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +350 -935
app.py CHANGED
@@ -1,104 +1,187 @@
1
- # app.py - Sixfinger Multi-Model Backend with Thinking Models & Auto-Fallback
2
  import json
3
  import os
4
- import random
5
  from datetime import datetime
6
- from flask import Flask, request, jsonify, Response, render_template_string
7
- from huggingface_hub import InferenceClient
8
  import traceback
9
 
10
  app = Flask(__name__)
11
  app.config['JSON_AS_ASCII'] = False
12
 
13
  # ========== CONFIGURATION ==========
14
- HF_TOKEN = os.getenv("HF_TOKEN")
15
  PORT = int(os.getenv("PORT", 7860))
16
- MAX_RETRIES = 5
 
 
 
 
17
 
18
  # ========== MODEL CATEGORIES ==========
19
 
20
- # THINKING MODELS
21
- THINKING_MODELS = {
22
- 'deepseek-r1': {
23
- 'id': 'deepseek-ai/DeepSeek-R1',
24
- 'description': 'DeepSeek Reasoning Model (Thinking Process)',
25
- 'size': '671B',
26
- 'supports_thinking': True
 
 
 
 
 
 
27
  },
28
- 'qwen3-vl-235b-thinking': {
29
- 'id': 'Qwen/Qwen3-VL-235B-A22B-Thinking',
30
- 'description': 'Qwen3 VL Thinking (Multimodal Reasoning)',
31
- 'size': '235B (22B active)',
32
- 'supports_thinking': True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  },
34
- 'qwen3-vl-32b-thinking': {
35
- 'id': 'Qwen/Qwen3-VL-32B-Thinking',
36
- 'description': 'Qwen3 VL Thinking (Compact)',
 
 
 
 
37
  'size': '32B',
38
- 'supports_thinking': True
 
 
39
  },
40
  'gpt-oss-120b': {
41
  'id': 'openai/gpt-oss-120b',
42
- 'description': 'GPT OSS 120B',
 
 
 
 
43
  'size': '120B',
44
- 'supports_thinking': False
45
- }
46
- }
47
-
48
- # NORMAL MODELS
49
- NORMAL_MODELS = {
50
- 'qwen-72b': {
51
- 'id': 'Qwen/Qwen2.5-72B-Instruct',
52
- 'description': 'Qwen 2.5 (MΓΌkemmel TΓΌrkΓ§e)',
53
- 'size': '72B'
54
  },
55
- 'qwen-32b': {
56
- 'id': 'Qwen/Qwen2.5-32B-Instruct',
57
- 'description': 'Qwen 2.5 Compact',
58
- 'size': '32B'
 
 
 
 
 
 
 
59
  },
60
- 'qwen3-vl-235b-instruct': {
61
- 'id': 'Qwen/Qwen3-VL-235B-A22B-Instruct',
62
- 'description': 'Qwen3 VL Instruct (Multimodal)',
63
- 'size': '235B (22B active)'
64
- },
65
- 'llama-70b': {
66
- 'id': 'meta-llama/Meta-Llama-3.1-70B-Instruct',
67
- 'description': 'Meta Llama 3.1',
68
- 'size': '70B'
 
 
69
  },
70
- 'mixtral-8x7b': {
71
- 'id': 'mistralai/Mixtral-8x7B-Instruct-v0.1',
72
- 'description': 'Mixtral MoE',
73
- 'size': '8x7B'
 
 
 
 
 
 
 
74
  },
75
- 'gemma-27b': {
76
- 'id': 'google/gemma-2-27b-it',
77
- 'description': 'Google Gemma 2',
78
- 'size': '27B'
 
 
 
 
 
 
 
79
  }
80
  }
81
 
82
- # ========== SYSTEM PROMPTS ==========
83
- SYSTEM_PROMPTS = {
84
- 'sixfinger': """Sen Sixfinger'sΔ±n.
85
- - İstanbul delikanlısı gibi konuşursun
86
- - Samimi, esprili ve sokak argosunu kullanΔ±rsΔ±n
87
- - KΔ±sa ve net cevaplar verirsin
88
- - "Kanka", "lan", "abi", "reis", "kral" gibi kelimeler kullanΔ±rsΔ±n
89
- - Dozında küfür edebilirsin ama aşırıya kaçmazsın
90
- - Türkçe dışına çıkmazsın""",
91
-
92
- 'assistant': """Sen yardΔ±msever bir AI asistanΔ±sΔ±n.
93
- - TΓΌrkΓ§e'yi mΓΌkemmel kullanΔ±rsΔ±n
94
- - DetaylΔ± ve aΓ§Δ±klayΔ±cΔ± cevaplar verirsin
95
- - Profesyonel ama samimi bir dil kullanΔ±rsΔ±n""",
96
 
97
- 'thinking': """Sen derin düşünen bir AI'sın.
98
- - Adım adım düşünürsün (chain-of-thought)
99
- - Problemleri analiz edersin
100
- - Reasoning sΓΌrecini gΓΆsterirsin
101
- - TΓΌrkΓ§e'de mΓΌkemmel aΓ§Δ±klamalar yaparsΔ±n"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
  }
103
 
104
  # ========== STATISTICS ==========
@@ -112,201 +195,221 @@ stats = {
112
  'start_time': datetime.utcnow()
113
  }
114
 
115
- model_clients = {}
116
-
117
  # ========== HELPER FUNCTIONS ==========
118
 
119
- def get_client(model_id):
120
- """Get or create client for model"""
121
- if model_id in model_clients:
122
- return model_clients[model_id]
123
 
124
- try:
125
- client = InferenceClient(model=model_id, token=HF_TOKEN)
126
- model_clients[model_id] = client
127
- return client
128
- except Exception as e:
129
- print(f"❌ Failed to create client for {model_id}: {e}")
130
- return None
131
-
132
- def select_random_model(use_thinking=False):
133
- """Rastgele model seΓ§"""
134
- models = THINKING_MODELS if use_thinking else NORMAL_MODELS
135
- model_keys = list(models.keys())
136
- random.shuffle(model_keys)
137
- return model_keys
138
 
139
  def try_model(model_key, messages, max_tokens, temperature, top_p, stream=False):
140
  """Bir model'i dene"""
141
- if model_key in THINKING_MODELS:
142
- model_info = THINKING_MODELS[model_key]
143
- elif model_key in NORMAL_MODELS:
144
- model_info = NORMAL_MODELS[model_key]
145
- else:
146
  return None, f"Unknown model: {model_key}"
147
 
 
148
  model_id = model_info['id']
149
 
150
  try:
151
- client = get_client(model_id)
152
- if not client:
153
- return None, "Client creation failed"
154
-
155
  if stream:
156
- return client.chat_completion(
157
- messages=messages, max_tokens=max_tokens,
158
- temperature=temperature, top_p=top_p, stream=True
159
- ), None
 
 
 
 
 
 
 
160
  else:
161
- response = client.chat_completion(
162
- messages=messages, max_tokens=max_tokens,
163
- temperature=temperature, top_p=top_p
 
 
 
164
  )
 
165
  stats['model_usage'][model_key] = stats['model_usage'].get(model_key, 0) + 1
166
  return response, None
167
 
168
  except Exception as e:
169
  error_msg = str(e)
170
  stats['model_failures'][model_key] = stats['model_failures'].get(model_key, 0) + 1
 
171
  print(f"❌ Model {model_key} failed: {error_msg}")
172
 
173
- if 'rate limit' in error_msg.lower() or 'quota' in error_msg.lower():
174
- return None, f"Rate limit/Quota exceeded"
 
 
175
  elif 'timeout' in error_msg.lower():
176
- return None, f"Timeout"
177
  else:
178
- return None, f"Error: {error_msg}"
179
 
180
- def format_messages(system_prompt, user_message, history=None):
181
  """Format messages"""
182
- messages = [{"role": "system", "content": system_prompt}]
 
 
 
 
 
 
 
 
 
 
183
  if history:
184
  messages.extend(history)
185
- messages.append({"role": "user", "content": user_message})
186
- return messages
187
-
188
- def extract_thinking_process(response_text):
189
- """Extract thinking process"""
190
- if '<think>' in response_text and '</think>' in response_text:
191
- start = response_text.find('<think>') + 7
192
- end = response_text.find('</think>')
193
- thinking = response_text[start:end].strip()
194
- answer = response_text[end+8:].strip()
195
- return thinking, answer
196
 
197
- if '<reasoning>' in response_text and '</reasoning>' in response_text:
198
- start = response_text.find('<reasoning>') + 11
199
- end = response_text.find('</reasoning>')
200
- thinking = response_text[start:end].strip()
201
- answer = response_text[end+12:].strip()
202
- return thinking, answer
203
 
204
- return None, response_text
205
 
206
  # ========== ROUTES ==========
207
 
208
  @app.route('/')
209
  def index():
210
- """Documentation Homepage"""
211
  uptime = datetime.utcnow() - stats['start_time']
212
  uptime_str = str(uptime).split('.')[0]
213
 
214
- success_rate = (stats['successful_requests'] / max(stats['total_requests'], 1) * 100)
215
-
216
- return render_template_string(DOCUMENTATION_HTML,
217
- uptime=uptime_str,
218
- total_requests=stats['total_requests'],
219
- successful_requests=stats['successful_requests'],
220
- failed_requests=stats['failed_requests'],
221
- success_rate=f"{success_rate:.1f}",
222
- fallback_count=stats['fallback_count'],
223
- thinking_models=THINKING_MODELS,
224
- normal_models=NORMAL_MODELS,
225
- model_usage=stats['model_usage'],
226
- model_failures=stats['model_failures'])
227
-
228
- @app.route('/api/info')
229
- def api_info():
230
- """API Info (JSON)"""
231
- uptime = datetime.utcnow() - stats['start_time']
232
-
233
  return jsonify({
234
- 'name': 'Sixfinger Multi-Model Backend',
235
- 'version': '3.0.0',
236
  'status': 'online',
237
- 'uptime': str(uptime).split('.')[0],
238
- 'stats': stats,
239
- 'model_categories': {
240
- 'thinking_models': len(THINKING_MODELS),
241
- 'normal_models': len(NORMAL_MODELS)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
242
  }
243
  })
244
 
245
  @app.route('/api/models')
246
  def list_models():
247
- """List all models"""
248
  return jsonify({
249
- 'thinking_models': [
250
  {
251
  'key': key,
252
  'model_id': info['id'],
253
  'description': info['description'],
254
  'size': info['size'],
 
 
 
 
 
 
 
 
255
  'usage_count': stats['model_usage'].get(key, 0),
256
  'failure_count': stats['model_failures'].get(key, 0)
257
  }
258
- for key, info in THINKING_MODELS.items()
259
  ],
260
- 'normal_models': [
261
  {
262
  'key': key,
263
  'model_id': info['id'],
264
  'description': info['description'],
265
  'size': info['size'],
 
 
 
 
 
 
 
 
 
266
  'usage_count': stats['model_usage'].get(key, 0),
267
  'failure_count': stats['model_failures'].get(key, 0)
268
  }
269
- for key, info in NORMAL_MODELS.items()
270
- ]
 
271
  })
272
 
273
  @app.route('/api/chat', methods=['POST'])
274
  def chat():
275
- """Chat with auto-fallback"""
276
  stats['total_requests'] += 1
277
 
278
  try:
279
  data = request.json
280
- if not data:
281
- stats['failed_requests'] += 1
282
- return jsonify({'error': 'JSON body required'}), 400
283
-
284
- prompt = data.get('prompt') or data.get('message')
285
- if not prompt:
286
  stats['failed_requests'] += 1
287
  return jsonify({'error': 'prompt required'}), 400
288
 
289
- use_thinking = data.get('thinking', False)
 
290
  max_tokens = min(data.get('max_tokens', 1000), 4000)
291
- temperature = min(max(data.get('temperature', 0.7), 0.1), 2.0)
292
  top_p = min(max(data.get('top_p', 0.9), 0.1), 1.0)
 
 
293
 
294
- personality = data.get('personality', 'thinking' if use_thinking else 'sixfinger')
295
- system_prompt = SYSTEM_PROMPTS.get(personality, SYSTEM_PROMPTS['sixfinger'])
 
296
 
297
- history = data.get('history', [])
298
- messages = format_messages(system_prompt, prompt, history)
 
 
 
 
299
 
300
- model_keys_to_try = select_random_model(use_thinking)
 
301
 
302
- last_error = None
303
  attempts = []
304
 
305
- for i, model_key in enumerate(model_keys_to_try):
306
- if i >= MAX_RETRIES:
307
  break
308
 
309
- print(f"πŸ”„ Trying model {i+1}/{min(MAX_RETRIES, len(model_keys_to_try))}: {model_key}")
310
 
311
  response, error = try_model(model_key, messages, max_tokens, temperature, top_p)
312
 
@@ -317,50 +420,38 @@ def chat():
317
  })
318
 
319
  if response:
320
- response_text = response.choices[0].message.content
321
- thinking_process, final_answer = extract_thinking_process(response_text)
322
-
323
- model_info = THINKING_MODELS.get(model_key) or NORMAL_MODELS.get(model_key)
324
 
325
  stats['successful_requests'] += 1
326
  if i > 0:
327
  stats['fallback_count'] += 1
328
 
329
- prompt_tokens = sum(len(m['content'].split()) for m in messages)
330
- completion_tokens = len(response_text.split())
331
-
332
  result = {
333
- 'response': final_answer.strip(),
334
  'model': model_info['id'],
335
  'model_key': model_key,
336
- 'model_category': 'thinking' if use_thinking else 'normal',
 
337
  'attempts': i + 1,
338
  'usage': {
339
- 'prompt_tokens': int(prompt_tokens * 1.3),
340
- 'completion_tokens': int(completion_tokens * 1.3),
341
- 'total_tokens': int((prompt_tokens + completion_tokens) * 1.3)
342
  },
343
  'parameters': {
344
  'max_tokens': max_tokens,
345
  'temperature': temperature,
346
- 'top_p': top_p,
347
- 'thinking_enabled': use_thinking
348
  }
349
  }
350
 
351
- if thinking_process:
352
- result['thinking_process'] = thinking_process
353
- result['has_thinking'] = True
354
- else:
355
- result['has_thinking'] = False
356
-
357
  if i > 0:
358
  result['fallback_attempts'] = attempts
359
 
360
  return jsonify(result)
361
 
362
  else:
363
- last_error = error
364
  print(f"❌ {model_key} failed: {error}")
365
  continue
366
 
@@ -368,45 +459,52 @@ def chat():
368
 
369
  return jsonify({
370
  'error': 'All models failed',
371
- 'last_error': last_error,
372
  'attempts': attempts,
373
- 'total_attempts': len(attempts)
 
374
  }), 503
375
 
376
  except Exception as e:
377
  stats['failed_requests'] += 1
378
- return jsonify({'error': str(e), 'traceback': traceback.format_exc()}), 500
 
 
 
379
 
380
  @app.route('/api/chat/stream', methods=['POST'])
381
  def chat_stream():
382
- """Streaming chat with fallback"""
383
  stats['total_requests'] += 1
384
 
385
  try:
386
  data = request.json
387
- if not data:
388
- return jsonify({'error': 'JSON body required'}), 400
389
-
390
- prompt = data.get('prompt') or data.get('message')
391
- if not prompt:
392
  return jsonify({'error': 'prompt required'}), 400
393
 
394
- use_thinking = data.get('thinking', False)
 
395
  max_tokens = min(data.get('max_tokens', 1000), 4000)
396
- temperature = min(max(data.get('temperature', 0.7), 0.1), 2.0)
397
  top_p = min(max(data.get('top_p', 0.9), 0.1), 1.0)
 
 
398
 
399
- personality = data.get('personality', 'thinking' if use_thinking else 'sixfinger')
400
- system_prompt = SYSTEM_PROMPTS.get(personality, SYSTEM_PROMPTS['sixfinger'])
 
401
 
402
- history = data.get('history', [])
403
- messages = format_messages(system_prompt, prompt, history)
404
 
405
- model_keys_to_try = select_random_model(use_thinking)
 
 
 
 
406
 
407
  def generate():
408
- for i, model_key in enumerate(model_keys_to_try):
409
- if i >= MAX_RETRIES:
410
  break
411
 
412
  yield f"data: {json.dumps({'info': f'Trying model: {model_key}'}, ensure_ascii=False)}\n\n"
@@ -415,23 +513,21 @@ def chat_stream():
415
 
416
  if stream_response:
417
  try:
418
- for message in stream_response:
419
- chunk = message.choices[0].delta.content
420
- if chunk:
421
- yield f"data: {json.dumps({'text': chunk}, ensure_ascii=False)}\n\n"
422
 
423
  stats['successful_requests'] += 1
424
- stats['model_usage'][model_key] = stats['model_usage'].get(model_key, 0) + 1
425
-
426
  if i > 0:
427
  stats['fallback_count'] += 1
428
 
429
- model_info = THINKING_MODELS.get(model_key) or NORMAL_MODELS.get(model_key)
430
  yield f"data: {json.dumps({'done': True, 'model': model_info['id'], 'model_key': model_key, 'attempts': i+1})}\n\n"
431
  return
432
 
433
  except Exception as e:
434
- yield f"data: {json.dumps({'error': f'Stream error: {str(e)}'}, ensure_ascii=False)}\n\n"
435
  continue
436
 
437
  else:
@@ -490,718 +586,37 @@ def internal_error(e):
490
  @app.after_request
491
  def after_request(response):
492
  response.headers.add('Access-Control-Allow-Origin', '*')
493
- response.headers.add('Access-Control-Allow-Headers', 'Content-Type,X-API-Key')
494
  response.headers.add('Access-Control-Allow-Methods', 'GET,POST,OPTIONS')
495
  return response
496
 
497
- # ========== HTML DOCUMENTATION ==========
498
-
499
- DOCUMENTATION_HTML = """
500
- <!DOCTYPE html>
501
- <html lang="tr">
502
- <head>
503
- <meta charset="UTF-8">
504
- <meta name="viewport" content="width=device-width, initial-scale=1.0">
505
- <title>Sixfinger Thinking Backend - API Documentation</title>
506
- <style>
507
- * { margin: 0; padding: 0; box-sizing: border-box; }
508
- body {
509
- font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
510
- background: linear-gradient(135deg, #1e3c72 0%, #2a5298 50%, #7e22ce 100%);
511
- color: #333;
512
- line-height: 1.6;
513
- }
514
- .container { max-width: 1400px; margin: 0 auto; padding: 20px; }
515
-
516
- /* Header */
517
- header {
518
- background: rgba(255, 255, 255, 0.95);
519
- backdrop-filter: blur(10px);
520
- padding: 40px;
521
- border-radius: 20px;
522
- box-shadow: 0 20px 60px rgba(0,0,0,0.3);
523
- margin-bottom: 30px;
524
- text-align: center;
525
- }
526
- h1 {
527
- color: #7e22ce;
528
- font-size: 3em;
529
- margin-bottom: 10px;
530
- text-shadow: 2px 2px 4px rgba(0,0,0,0.1);
531
- }
532
- .subtitle {
533
- color: #666;
534
- font-size: 1.3em;
535
- margin-bottom: 20px;
536
- }
537
- .badge {
538
- display: inline-block;
539
- background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
540
- color: white;
541
- padding: 8px 20px;
542
- border-radius: 25px;
543
- font-size: 0.9em;
544
- margin: 5px;
545
- font-weight: bold;
546
- }
547
- .badge.thinking { background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%); }
548
- .badge.fallback { background: linear-gradient(135deg, #4facfe 0%, #00f2fe 100%); }
549
-
550
- /* Stats Grid */
551
- .stats-grid {
552
- display: grid;
553
- grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
554
- gap: 20px;
555
- margin: 30px 0;
556
- }
557
- .stat-box {
558
- background: rgba(255, 255, 255, 0.95);
559
- padding: 25px;
560
- border-radius: 15px;
561
- text-align: center;
562
- box-shadow: 0 10px 30px rgba(0,0,0,0.2);
563
- transition: transform 0.3s;
564
- }
565
- .stat-box:hover { transform: translateY(-10px); }
566
- .stat-box h3 {
567
- font-size: 2.5em;
568
- color: #7e22ce;
569
- margin-bottom: 5px;
570
- }
571
- .stat-box p { color: #666; font-weight: 500; }
572
-
573
- /* Sections */
574
- .section {
575
- background: rgba(255, 255, 255, 0.95);
576
- backdrop-filter: blur(10px);
577
- padding: 40px;
578
- border-radius: 20px;
579
- box-shadow: 0 20px 60px rgba(0,0,0,0.3);
580
- margin-bottom: 30px;
581
- }
582
- h2 {
583
- color: #7e22ce;
584
- margin-bottom: 25px;
585
- padding-bottom: 15px;
586
- border-bottom: 3px solid #f0f0f0;
587
- font-size: 2em;
588
- }
589
- h3 {
590
- color: #1e3c72;
591
- margin: 25px 0 15px;
592
- font-size: 1.5em;
593
- }
594
-
595
- /* Code blocks */
596
- code {
597
- background: #f8f9fa;
598
- padding: 3px 8px;
599
- border-radius: 5px;
600
- font-family: 'Courier New', monospace;
601
- color: #e83e8c;
602
- font-size: 0.95em;
603
- }
604
- pre {
605
- background: #1e1e1e;
606
- color: #d4d4d4;
607
- padding: 25px;
608
- border-radius: 12px;
609
- overflow-x: auto;
610
- margin: 20px 0;
611
- border-left: 5px solid #7e22ce;
612
- position: relative;
613
- }
614
- pre code {
615
- background: none;
616
- color: inherit;
617
- padding: 0;
618
- font-size: 0.9em;
619
- }
620
-
621
- /* Model Cards */
622
- .model-grid {
623
- display: grid;
624
- grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));
625
- gap: 20px;
626
- margin: 20px 0;
627
- }
628
- .model-card {
629
- background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
630
- padding: 20px;
631
- border-radius: 12px;
632
- border-left: 5px solid #7e22ce;
633
- transition: all 0.3s;
634
- }
635
- .model-card:hover {
636
- transform: translateX(10px);
637
- box-shadow: 0 10px 30px rgba(0,0,0,0.2);
638
- }
639
- .model-card.thinking {
640
- border-left-color: #f5576c;
641
- background: linear-gradient(135deg, #ffecd2 0%, #fcb69f 100%);
642
- }
643
- .model-card h4 {
644
- color: #1e3c72;
645
- margin-bottom: 10px;
646
- font-size: 1.2em;
647
- }
648
- .model-card .model-id {
649
- font-family: monospace;
650
- font-size: 0.85em;
651
- color: #666;
652
- margin: 5px 0;
653
- }
654
- .model-card .stats {
655
- display: flex;
656
- justify-content: space-between;
657
- margin-top: 10px;
658
- font-size: 0.9em;
659
- }
660
- .model-card .stats span {
661
- padding: 5px 10px;
662
- background: rgba(255,255,255,0.5);
663
- border-radius: 5px;
664
- }
665
-
666
- /* Tables */
667
- table {
668
- width: 100%;
669
- border-collapse: collapse;
670
- margin: 20px 0;
671
- }
672
- th, td {
673
- padding: 15px;
674
- text-align: left;
675
- border-bottom: 1px solid #e0e0e0;
676
- }
677
- th {
678
- background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
679
- color: white;
680
- font-weight: 600;
681
- }
682
- tr:hover { background: #f8f9fa; }
683
-
684
- /* Info boxes */
685
- .info-box {
686
- background: #e7f3ff;
687
- border-left: 5px solid #2196F3;
688
- padding: 20px;
689
- margin: 20px 0;
690
- border-radius: 8px;
691
- }
692
- .success-box {
693
- background: #d4edda;
694
- border-left: 5px solid #28a745;
695
- padding: 20px;
696
- margin: 20px 0;
697
- border-radius: 8px;
698
- }
699
- .warning-box {
700
- background: #fff3cd;
701
- border-left: 5px solid #ffc107;
702
- padding: 20px;
703
- margin: 20px 0;
704
- border-radius: 8px;
705
- }
706
-
707
- /* Tabs */
708
- .tabs {
709
- display: flex;
710
- gap: 10px;
711
- margin-bottom: 20px;
712
- }
713
- .tab {
714
- padding: 12px 25px;
715
- background: #f8f9fa;
716
- border-radius: 8px;
717
- cursor: pointer;
718
- transition: all 0.3s;
719
- border: 2px solid transparent;
720
- }
721
- .tab:hover, .tab.active {
722
- background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
723
- color: white;
724
- border-color: #764ba2;
725
- }
726
-
727
- /* Footer */
728
- footer {
729
- text-align: center;
730
- color: white;
731
- padding: 30px;
732
- margin-top: 40px;
733
- font-size: 1.1em;
734
- }
735
-
736
- /* Responsive */
737
- @media (max-width: 768px) {
738
- h1 { font-size: 2em; }
739
- .stats-grid, .model-grid { grid-template-columns: 1fr; }
740
- .section { padding: 20px; }
741
- }
742
-
743
- /* Copy button */
744
- .copy-btn {
745
- position: absolute;
746
- top: 10px;
747
- right: 10px;
748
- background: #7e22ce;
749
- color: white;
750
- border: none;
751
- padding: 8px 15px;
752
- border-radius: 6px;
753
- cursor: pointer;
754
- font-size: 0.85em;
755
- transition: all 0.3s;
756
- }
757
- .copy-btn:hover {
758
- background: #9333ea;
759
- transform: scale(1.05);
760
- }
761
- </style>
762
- </head>
763
- <body>
764
- <div class="container">
765
- <header>
766
- <h1>🧠 Sixfinger Thinking Backend</h1>
767
- <p class="subtitle">Multi-Model AI with Auto-Fallback & Chain-of-Thought</p>
768
- <div>
769
- <span class="badge thinking">4 Thinking Models</span>
770
- <span class="badge">6 Normal Models</span>
771
- <span class="badge fallback">Auto-Fallback</span>
772
- <span class="badge">∞ Unlimited</span>
773
- </div>
774
- </header>
775
-
776
- <!-- Stats -->
777
- <div class="stats-grid">
778
- <div class="stat-box">
779
- <h3>{{ uptime }}</h3>
780
- <p>⏰ Uptime</p>
781
- </div>
782
- <div class="stat-box">
783
- <h3>{{ total_requests }}</h3>
784
- <p>πŸ“Š Total Requests</p>
785
- </div>
786
- <div class="stat-box">
787
- <h3>{{ success_rate }}%</h3>
788
- <p>βœ… Success Rate</p>
789
- </div>
790
- <div class="stat-box">
791
- <h3>{{ fallback_count }}</h3>
792
- <p>πŸ”„ Fallbacks</p>
793
- </div>
794
- </div>
795
-
796
- <!-- Quick Start -->
797
- <div class="section">
798
- <h2>πŸš€ Quick Start</h2>
799
-
800
- <div class="success-box">
801
- <strong>βœ… No API Key Required!</strong><br>
802
- Direkt kullanabilirsiniz. Rate limit yok, tamamen sΔ±nΔ±rsΔ±z!
803
- </div>
804
-
805
- <h3>1. Thinking Mode (Deep Reasoning)</h3>
806
- <pre><button class="copy-btn" onclick="copyCode(this)">πŸ“‹ Copy</button><code>curl -X POST https://yourusername-sixfinger-backend.hf.space/api/chat \
807
- -H "Content-Type: application/json" \
808
- -d '{
809
- "prompt": "2+2 neden 4 eder? Matematiksel olarak aΓ§Δ±kla.",
810
- "thinking": true,
811
- "max_tokens": 2000
812
- }'</code></pre>
813
-
814
- <h3>2. Normal Mode (Fast Response)</h3>
815
- <pre><button class="copy-btn" onclick="copyCode(this)">πŸ“‹ Copy</button><code>curl -X POST https://yourusername-sixfinger-backend.hf.space/api/chat \
816
- -H "Content-Type: application/json" \
817
- -d '{
818
- "prompt": "Merhaba kanka!",
819
- "thinking": false,
820
- "personality": "sixfinger"
821
- }'</code></pre>
822
-
823
- <h3>3. Streaming Mode</h3>
824
- <pre><button class="copy-btn" onclick="copyCode(this)">πŸ“‹ Copy</button><code>curl -X POST https://yourusername-sixfinger-backend.hf.space/api/chat/stream \
825
- -H "Content-Type: application/json" \
826
- -d '{"prompt": "Python nedir?", "thinking": true}' \
827
- --no-buffer</code></pre>
828
- </div>
829
-
830
- <!-- Thinking vs Normal -->
831
- <div class="section">
832
- <h2>🧠 Thinking vs Normal Models</h2>
833
-
834
- <table>
835
- <tr>
836
- <th>Feature</th>
837
- <th>Thinking Models</th>
838
- <th>Normal Models</th>
839
- </tr>
840
- <tr>
841
- <td><strong>Use Case</strong></td>
842
- <td>Matematik, reasoning, problem Γ§ΓΆzme</td>
843
- <td>Genel sohbet, hΔ±zlΔ± cevaplar</td>
844
- </tr>
845
- <tr>
846
- <td><strong>Response Time</strong></td>
847
- <td>⏱️ Yavaş (5-15 saniye)</td>
848
- <td>⚑ Hızlı (1-3 saniye)</td>
849
- </tr>
850
- <tr>
851
- <td><strong>Chain-of-Thought</strong></td>
852
- <td>βœ… Evet (<code>&lt;think&gt;...&lt;/think&gt;</code>)</td>
853
- <td>❌ Hayır</td>
854
- </tr>
855
- <tr>
856
- <td><strong>Models</strong></td>
857
- <td>DeepSeek-R1, Qwen3-VL Thinking, GPT-OSS-120B</td>
858
- <td>Qwen2.5, Llama-3.1, Mixtral, Gemma</td>
859
- </tr>
860
- <tr>
861
- <td><strong>Token Limit</strong></td>
862
- <td>4000 max</td>
863
- <td>4000 max</td>
864
- </tr>
865
- </table>
866
- </div>
867
-
868
- <!-- Thinking Models -->
869
- <div class="section">
870
- <h2>πŸ”₯ Thinking Models (Reasoning)</h2>
871
-
872
- <div class="model-grid">
873
- {% for key, info in thinking_models.items() %}
874
- <div class="model-card thinking">
875
- <h4>{{ info['description'] }}</h4>
876
- <div class="model-id">{{ info['id'] }}</div>
877
- <p>πŸ“¦ Size: <strong>{{ info['size'] }}</strong></p>
878
- <div class="stats">
879
- <span>βœ… {{ model_usage.get(key, 0) }} kullanΔ±m</span>
880
- <span>❌ {{ model_failures.get(key, 0) }} hata</span>
881
- </div>
882
- </div>
883
- {% endfor %}
884
- </div>
885
- </div>
886
-
887
- <!-- Normal Models -->
888
- <div class="section">
889
- <h2>⚑ Normal Models (Fast)</h2>
890
-
891
- <div class="model-grid">
892
- {% for key, info in normal_models.items() %}
893
- <div class="model-card">
894
- <h4>{{ info['description'] }}</h4>
895
- <div class="model-id">{{ info['id'] }}</div>
896
- <p>πŸ“¦ Size: <strong>{{ info['size'] }}</strong></p>
897
- <div class="stats">
898
- <span>βœ… {{ model_usage.get(key, 0) }} kullanΔ±m</span>
899
- <span>❌ {{ model_failures.get(key, 0) }} hata</span>
900
- </div>
901
- </div>
902
- {% endfor %}
903
- </div>
904
- </div>
905
-
906
- <!-- API Endpoints -->
907
- <div class="section">
908
- <h2>🌐 API Endpoints</h2>
909
-
910
- <h3>POST /api/chat</h3>
911
- <p>Normal chat endpoint (JSON response)</p>
912
-
913
- <h4>Request Parameters:</h4>
914
- <table>
915
- <tr>
916
- <th>Parameter</th>
917
- <th>Type</th>
918
- <th>Required</th>
919
- <th>Default</th>
920
- <th>Description</th>
921
- </tr>
922
- <tr>
923
- <td><code>prompt</code></td>
924
- <td>string</td>
925
- <td>βœ…</td>
926
- <td>-</td>
927
- <td>User message</td>
928
- </tr>
929
- <tr>
930
- <td><code>thinking</code></td>
931
- <td>boolean</td>
932
- <td>❌</td>
933
- <td>false</td>
934
- <td>true = thinking models, false = normal models</td>
935
- </tr>
936
- <tr>
937
- <td><code>personality</code></td>
938
- <td>string</td>
939
- <td>❌</td>
940
- <td>sixfinger</td>
941
- <td>sixfinger | assistant | thinking</td>
942
- </tr>
943
- <tr>
944
- <td><code>max_tokens</code></td>
945
- <td>integer</td>
946
- <td>❌</td>
947
- <td>1000</td>
948
- <td>Max tokens (1-4000)</td>
949
- </tr>
950
- <tr>
951
- <td><code>temperature</code></td>
952
- <td>float</td>
953
- <td>❌</td>
954
- <td>0.7</td>
955
- <td>Creativity (0.1-2.0)</td>
956
- </tr>
957
- <tr>
958
- <td><code>history</code></td>
959
- <td>array</td>
960
- <td>❌</td>
961
- <td>[]</td>
962
- <td>Conversation history</td>
963
- </tr>
964
- </table>
965
-
966
- <h4>Response Example:</h4>
967
- <pre><code>{
968
- "response": "Kanka bak şimdi 2+2=4 olur çünkü...",
969
- "thinking_process": "Adım 1: 2 sayısı... Adım 2: Toplama işlemi...",
970
- "has_thinking": true,
971
- "model": "deepseek-ai/DeepSeek-R1",
972
- "model_key": "deepseek-r1",
973
- "model_category": "thinking",
974
- "attempts": 1,
975
- "usage": {
976
- "prompt_tokens": 50,
977
- "completion_tokens": 200,
978
- "total_tokens": 250
979
- }
980
- }</code></pre>
981
-
982
- <div class="info-box">
983
- <strong>πŸ’‘ Auto-Fallback:</strong><br>
984
- Eğer seçilen model hata verirse (rate limit, timeout, vb.) otomatik olarak başka bir model denenir.
985
- Response'ta <code>attempts</code> field'ı kaç model denendiğini gâsterir.
986
- </div>
987
- </div>
988
-
989
- <!-- Code Examples -->
990
- <div class="section">
991
- <h2>πŸ’» Code Examples</h2>
992
-
993
- <h3>Python</h3>
994
- <pre><button class="copy-btn" onclick="copyCode(this)">πŸ“‹ Copy</button><code>import requests
995
-
996
- API_URL = "https://yourusername-sixfinger-backend.hf.space/api/chat"
997
-
998
- def chat(prompt, thinking=False, personality="sixfinger"):
999
- response = requests.post(API_URL, json={
1000
- "prompt": prompt,
1001
- "thinking": thinking,
1002
- "personality": personality,
1003
- "max_tokens": 1500
1004
- })
1005
-
1006
- data = response.json()
1007
-
1008
- # Thinking process varsa gΓΆster
1009
- if data.get('has_thinking'):
1010
- print("🧠 Thinking Process:")
1011
- print(data['thinking_process'])
1012
- print("\nπŸ“ Answer:")
1013
-
1014
- print(data['response'])
1015
- print(f"\nπŸ“Š Model: {data['model_key']} (Attempt: {data['attempts']})")
1016
-
1017
- # Thinking mode
1018
- chat("3x + 7 = 22 denklemini Γ§ΓΆz", thinking=True)
1019
-
1020
- # Normal mode
1021
- chat("Merhaba kanka!", thinking=False)</code></pre>
1022
-
1023
- <h3>JavaScript (Fetch)</h3>
1024
- <pre><button class="copy-btn" onclick="copyCode(this)">πŸ“‹ Copy</button><code>const API_URL = "https://yourusername-sixfinger-backend.hf.space/api/chat";
1025
-
1026
- async function chat(prompt, thinking = false) {
1027
- const response = await fetch(API_URL, {
1028
- method: "POST",
1029
- headers: { "Content-Type": "application/json" },
1030
- body: JSON.stringify({
1031
- prompt: prompt,
1032
- thinking: thinking,
1033
- max_tokens: 1500
1034
- })
1035
- });
1036
-
1037
- const data = await response.json();
1038
-
1039
- if (data.has_thinking) {
1040
- console.log("🧠 Thinking:", data.thinking_process);
1041
- }
1042
-
1043
- console.log("πŸ“ Answer:", data.response);
1044
- console.log("πŸ“Š Model:", data.model_key);
1045
- }
1046
-
1047
- // Usage
1048
- chat("Python nedir?", true);</code></pre>
1049
-
1050
- <h3>cURL (Streaming)</h3>
1051
- <pre><button class="copy-btn" onclick="copyCode(this)">πŸ“‹ Copy</button><code>curl -X POST https://yourusername-sixfinger-backend.hf.space/api/chat/stream \
1052
- -H "Content-Type: application/json" \
1053
- -d '{
1054
- "prompt": "Yapay zeka nedir?",
1055
- "thinking": true,
1056
- "max_tokens": 2000
1057
- }' \
1058
- --no-buffer</code></pre>
1059
- </div>
1060
-
1061
- <!-- Fallback Mechanism -->
1062
- <div class="section">
1063
- <h2>πŸ”„ Auto-Fallback Mechanism</h2>
1064
-
1065
- <div class="warning-box">
1066
- <strong>⚠️ NasΔ±l Γ‡alışır?</strong>
1067
- <ol style="margin: 15px 0 0 20px;">
1068
- <li>Rastgele bir model seΓ§ilir (kategori: thinking/normal)</li>
1069
- <li>Model'e istek atΔ±lΔ±r</li>
1070
- <li><strong>Başarısız olursa:</strong> Otomatik başka model denenir</li>
1071
- <li>Maksimum 5 model denenir</li>
1072
- <li>Tüm modeller başarısız olursa <code>503 Service Unavailable</code> dâner</li>
1073
- </ol>
1074
- </div>
1075
-
1076
- <h3>Hata Tipleri:</h3>
1077
- <ul style="margin-left: 20px;">
1078
- <li><strong>Rate Limit Exceeded:</strong> Model kotasΔ± doldu β†’ başka model dene</li>
1079
- <li><strong>Timeout:</strong> Model yanΔ±t vermedi β†’ başka model dene</li>
1080
- <li><strong>Model Error:</strong> Model hatasΔ± β†’ başka model dene</li>
1081
- </ul>
1082
-
1083
- <h3>Response'ta Fallback Bilgisi:</h3>
1084
- <pre><code>{
1085
- "response": "...",
1086
- "attempts": 3, // 3 model denendi
1087
- "fallback_attempts": [
1088
- {"model": "deepseek-r1", "success": false, "error": "Rate limit"},
1089
- {"model": "qwen3-vl-235b-thinking", "success": false, "error": "Timeout"},
1090
- {"model": "qwen3-vl-32b-thinking", "success": true, "error": null}
1091
- ]
1092
- }</code></pre>
1093
- </div>
1094
-
1095
- <!-- Other Endpoints -->
1096
- <div class="section">
1097
- <h2>πŸ“‘ Other Endpoints</h2>
1098
-
1099
- <h3>GET /api/models</h3>
1100
- <p>TΓΌm modelleri ve istatistiklerini listele</p>
1101
- <pre><code>curl https://yourusername-sixfinger-backend.hf.space/api/models</code></pre>
1102
-
1103
- <h3>GET /api/stats</h3>
1104
- <p>DetaylΔ± kullanΔ±m istatistikleri</p>
1105
- <pre><code>curl https://yourusername-sixfinger-backend.hf.space/api/stats</code></pre>
1106
-
1107
- <h3>GET /health</h3>
1108
- <p>Health check</p>
1109
- <pre><code>curl https://yourusername-sixfinger-backend.hf.space/health</code></pre>
1110
- </div>
1111
-
1112
- <!-- Best Practices -->
1113
- <div class="section">
1114
- <h2>βœ… Best Practices</h2>
1115
-
1116
- <h3>1. Thinking Mode KullanΔ±mΔ±</h3>
1117
- <p><strong>βœ… Δ°yi:</strong> Matematik, reasoning, problem Γ§ΓΆzme, analiz</p>
1118
- <pre><code>{"prompt": "Bir araba 60 km/s hΔ±zla 3 saat giderse kaΓ§ km yol alΔ±r?", "thinking": true}</code></pre>
1119
-
1120
- <p><strong>❌ Kâtü:</strong> Basit sorular, selamlaşma</p>
1121
- <pre><code>{"prompt": "Merhaba", "thinking": true} // Gereksiz, normal mode yeterli</code></pre>
1122
-
1123
- <h3>2. Max Tokens AyarΔ±</h3>
1124
- <ul style="margin-left: 20px;">
1125
- <li>KΔ±sa cevaplar: <code>max_tokens: 200-500</code></li>
1126
- <li>Orta cevaplar: <code>max_tokens: 500-1000</code></li>
1127
- <li>Uzun cevaplar/thinking: <code>max_tokens: 1500-4000</code></li>
1128
- </ul>
1129
-
1130
- <h3>3. Error Handling</h3>
1131
- <pre><code>try {
1132
- const response = await fetch(API_URL, {...});
1133
- const data = await response.json();
1134
-
1135
- if (data.error) {
1136
- console.error('API Error:', data.error);
1137
- // Fallback logic
1138
- }
1139
- } catch (error) {
1140
- console.error('Network Error:', error);
1141
- }</code></pre>
1142
- </div>
1143
-
1144
- <!-- Contact -->
1145
- <div class="section">
1146
- <h2>πŸ“ž Δ°letişim & Destek</h2>
1147
-
1148
- <div class="success-box">
1149
- <p><strong>οΏ½οΏ½οΏ½οΏ½ API URL:</strong> <code>https://yourusername-sixfinger-backend.hf.space</code></p>
1150
- <p><strong>πŸ“Š Status:</strong> <span style="color: #28a745;">● Online</span></p>
1151
- <p><strong>πŸ“§ Email:</strong> sixfingerdev@gmail.com</p>
1152
- <p><strong>πŸ”— GitHub:</strong> Hugging Face Spaces</p>
1153
- </div>
1154
- </div>
1155
-
1156
- <footer>
1157
- <p>Made with ❀️ by Sixfinger Team</p>
1158
- <p>🧠 Thinking Models | πŸ”„ Auto-Fallback | ⚑ Unlimited API</p>
1159
- <p style="margin-top: 15px;">
1160
- <strong>Version:</strong> 3.0.0 |
1161
- <strong>Models:</strong> {{ thinking_models|length + normal_models|length }} |
1162
- <strong>Uptime:</strong> {{ uptime }}
1163
- </p>
1164
- </footer>
1165
- </div>
1166
-
1167
- <script>
1168
- function copyCode(btn) {
1169
- const pre = btn.parentElement;
1170
- const code = pre.querySelector('code').textContent;
1171
- navigator.clipboard.writeText(code).then(() => {
1172
- const originalText = btn.textContent;
1173
- btn.textContent = 'βœ… Copied!';
1174
- setTimeout(() => {
1175
- btn.textContent = originalText;
1176
- }, 2000);
1177
- });
1178
- }
1179
-
1180
- console.log('%c🧠 Sixfinger Thinking Backend Ready!', 'font-size: 20px; color: #7e22ce; font-weight: bold;');
1181
- console.log('%cThinking Models: {{ thinking_models|length }} | Normal Models: {{ normal_models|length }}', 'font-size: 14px; color: #1e3c72;');
1182
- </script>
1183
- </body>
1184
- </html>
1185
- """
1186
 
1187
  if __name__ == '__main__':
1188
- print("\n" + "=" * 60)
1189
- print("πŸš€ Sixfinger Multi-Model Backend (Thinking + Fallback)")
1190
- print("=" * 60)
 
1191
  print(f"πŸ“‘ Port: {PORT}")
1192
- print(f"🧠 Thinking Models: {len(THINKING_MODELS)}")
1193
- print(f"⚑ Normal Models: {len(NORMAL_MODELS)}")
1194
- print(f"πŸ”„ Max Retries: {MAX_RETRIES}")
1195
- print("=" * 60)
1196
- print("Thinking Models:")
1197
- for key, info in THINKING_MODELS.items():
1198
  print(f" β€’ {key}: {info['description']}")
1199
- print("\nNormal Models:")
1200
- for key, info in NORMAL_MODELS.items():
 
 
1201
  print(f" β€’ {key}: {info['description']}")
1202
- print("=" * 60)
 
 
 
 
 
 
1203
  print("βœ… Server ready!")
1204
- print("πŸ“– Documentation: http://0.0.0.0:7860")
1205
- print("=" * 60 + "\n")
1206
 
1207
  app.run(host='0.0.0.0', port=PORT, debug=False, threaded=True)
 
1
+ # app.py - Sixfinger Groq Backend (Allam-2-7B Eklendi)
2
  import json
3
  import os
 
4
  from datetime import datetime
5
+ from flask import Flask, request, jsonify, Response
6
+ from groq import Groq
7
  import traceback
8
 
9
  app = Flask(__name__)
10
  app.config['JSON_AS_ASCII'] = False
11
 
12
  # ========== CONFIGURATION ==========
13
+ GROQ_API_KEY = os.getenv("GROQ_API_KEY")
14
  PORT = int(os.getenv("PORT", 7860))
15
+
16
+ if not GROQ_API_KEY:
17
+ raise ValueError("❌ GROQ_API_KEY environment variable gerekli!")
18
+
19
+ groq_client = Groq(api_key=GROQ_API_KEY)
20
 
21
  # ========== MODEL CATEGORIES ==========
22
 
23
+ # FREE PLAN MODELS (YΓΌksek limitli)
24
+ FREE_MODELS = {
25
+ 'llama-8b-instant': {
26
+ 'id': 'llama-3.1-8b-instant',
27
+ 'description': 'Llama 3.1 8B Instant (Ultra Fast)',
28
+ 'rpm': 30,
29
+ 'rpd': 14400, # ⭐ EN YÜKSEK
30
+ 'tpm': 14400,
31
+ 'tpd': 6000000,
32
+ 'size': '8B',
33
+ 'speed': '⚑⚑⚑',
34
+ 'plan_required': 'free',
35
+ 'language': 'Multilingual'
36
  },
37
+ 'allam-2-7b': {
38
+ 'id': 'allam-2-7b',
39
+ 'description': 'Allam 2 7B (Arabic/Turkish Optimized)', # βœ… EKLENDI
40
+ 'rpm': 30,
41
+ 'rpd': 300,
42
+ 'tpm': 7000,
43
+ 'tpd': 60000,
44
+ 'size': '7B',
45
+ 'speed': '⚑⚑',
46
+ 'plan_required': 'free',
47
+ 'language': 'Arabic/Turkish'
48
+ }
49
+ }
50
+
51
+ # PAID PLAN MODELS (Güçlü ama düşük limit)
52
+ PAID_MODELS = {
53
+ 'llama-70b': {
54
+ 'id': 'llama-3.3-70b-versatile',
55
+ 'description': 'Llama 3.3 70B Versatile (Powerful)',
56
+ 'rpm': 30,
57
+ 'rpd': 1000,
58
+ 'tpm': 1000,
59
+ 'tpd': 12000000,
60
+ 'size': '70B',
61
+ 'speed': '⚑⚑',
62
+ 'plan_required': 'starter',
63
+ 'language': 'Multilingual'
64
  },
65
+ 'qwen3-32b': {
66
+ 'id': 'qwen/qwen3-32b',
67
+ 'description': 'Qwen3 32B (TΓΌrkΓ§e Optimized)',
68
+ 'rpm': 60,
69
+ 'rpd': 1000,
70
+ 'tpm': 1000,
71
+ 'tpd': 6000000,
72
  'size': '32B',
73
+ 'speed': '⚑⚑',
74
+ 'plan_required': 'starter',
75
+ 'language': 'Turkish/Chinese'
76
  },
77
  'gpt-oss-120b': {
78
  'id': 'openai/gpt-oss-120b',
79
+ 'description': 'GPT OSS 120B (Giant)',
80
+ 'rpm': 30,
81
+ 'rpd': 1000,
82
+ 'tpm': 1000,
83
+ 'tpd': 8000000,
84
  'size': '120B',
85
+ 'speed': '⚑⚑',
86
+ 'plan_required': 'pro',
87
+ 'language': 'Multilingual'
 
 
 
 
 
 
 
88
  },
89
+ 'llama-maverick-17b': {
90
+ 'id': 'meta-llama/llama-4-maverick-17b-128e-instruct',
91
+ 'description': 'Llama 4 Maverick 17B (Latest)',
92
+ 'rpm': 30,
93
+ 'rpd': 1000,
94
+ 'tpm': 1000,
95
+ 'tpd': 6000000,
96
+ 'size': '17B',
97
+ 'speed': '⚑⚑',
98
+ 'plan_required': 'starter',
99
+ 'language': 'Multilingual'
100
  },
101
+ 'llama-scout-17b': {
102
+ 'id': 'meta-llama/llama-4-scout-17b-16e-instruct',
103
+ 'description': 'Llama 4 Scout 17B (Fast)',
104
+ 'rpm': 30,
105
+ 'rpd': 1000,
106
+ 'tpm': 1000,
107
+ 'tpd': 30000000, # Γ‡ok yΓΌksek token limit!
108
+ 'size': '17B',
109
+ 'speed': '⚑⚑⚑',
110
+ 'plan_required': 'starter',
111
+ 'language': 'Multilingual'
112
  },
113
+ 'gpt-oss-20b': {
114
+ 'id': 'openai/gpt-oss-20b',
115
+ 'description': 'GPT OSS 20B (Compact)',
116
+ 'rpm': 30,
117
+ 'rpd': 1000,
118
+ 'tpm': 1000,
119
+ 'tpd': 8000000,
120
+ 'size': '20B',
121
+ 'speed': '⚑⚑',
122
+ 'plan_required': 'starter',
123
+ 'language': 'Multilingual'
124
  },
125
+ 'kimi-k2': {
126
+ 'id': 'moonshotai/kimi-k2-instruct',
127
+ 'description': 'Kimi K2 Instruct (Chinese)',
128
+ 'rpm': 60,
129
+ 'rpd': 1000,
130
+ 'tpm': 1000,
131
+ 'tpd': 10000000,
132
+ 'size': 'Unknown',
133
+ 'speed': '⚑⚑',
134
+ 'plan_required': 'pro',
135
+ 'language': 'Chinese/Multilingual'
136
  }
137
  }
138
 
139
+ # TÜM MODELLER
140
+ ALL_MODELS = {**FREE_MODELS, **PAID_MODELS}
141
+
142
+ # DEFAULT MODEL PRIORITY (fallback iΓ§in)
143
+ MODEL_PRIORITY = [
144
+ # FREE (ΓΆnce en yΓΌksek limitli)
145
+ 'llama-8b-instant', # 14,400 RPD (FREE iΓ§in ana)
146
+ 'allam-2-7b', # 300 RPD (FREE iΓ§in yedek)
 
 
 
 
 
 
147
 
148
+ # PAID (gΓΌΓ§lΓΌden zayΔ±fa)
149
+ 'llama-70b', # 70B (en gΓΌΓ§lΓΌ genel amaΓ§lΔ±)
150
+ 'gpt-oss-120b', # 120B (giant)
151
+ 'qwen3-32b', # 32B (TΓΌrkΓ§e)
152
+ 'llama-scout-17b', # 17B (hΔ±zlΔ± + yΓΌksek token limit)
153
+ 'llama-maverick-17b', # 17B (son model)
154
+ 'gpt-oss-20b', # 20B
155
+ 'kimi-k2' # Chinese
156
+ ]
157
+
158
+ # ========== PLAN - MODEL MAPPING ==========
159
+ PLAN_ALLOWED_MODELS = {
160
+ 'free': [
161
+ 'llama-8b-instant', # Ana model (14.4K/gΓΌn)
162
+ 'allam-2-7b' # Yedek/alternatif (300/gΓΌn)
163
+ ],
164
+ 'starter': [
165
+ 'llama-8b-instant',
166
+ 'allam-2-7b',
167
+ 'qwen3-32b', # TΓΌrkΓ§e iΓ§in
168
+ 'llama-70b', # GΓΌΓ§lΓΌ model
169
+ 'llama-maverick-17b',
170
+ 'llama-scout-17b',
171
+ 'gpt-oss-20b'
172
+ ],
173
+ 'pro': [
174
+ 'llama-8b-instant',
175
+ 'allam-2-7b',
176
+ 'qwen3-32b',
177
+ 'llama-70b',
178
+ 'llama-maverick-17b',
179
+ 'llama-scout-17b',
180
+ 'gpt-oss-20b',
181
+ 'gpt-oss-120b', # Giant model
182
+ 'kimi-k2' # Chinese model
183
+ ],
184
+ 'plus': list(ALL_MODELS.keys()) # TΓΌm modeller
185
  }
186
 
187
  # ========== STATISTICS ==========
 
195
  'start_time': datetime.utcnow()
196
  }
197
 
 
 
198
  # ========== HELPER FUNCTIONS ==========
199
 
200
+ def get_allowed_models(user_plan='free', preferred_model=None):
201
+ """KullanΔ±cΔ±nΔ±n planΔ±na gΓΆre izinli modelleri dΓΆndΓΌr"""
202
+ allowed = PLAN_ALLOWED_MODELS.get(user_plan, ['llama-8b-instant'])
 
203
 
204
+ # Preferred model varsa ve izinliyse ΓΆncelikli yap
205
+ if preferred_model and preferred_model in allowed:
206
+ models = [preferred_model] + [m for m in allowed if m != preferred_model]
207
+ else:
208
+ # MODEL_PRIORITY'ye gΓΆre sΔ±rala
209
+ models = [m for m in MODEL_PRIORITY if m in allowed]
210
+
211
+ return models
 
 
 
 
 
 
212
 
213
  def try_model(model_key, messages, max_tokens, temperature, top_p, stream=False):
214
  """Bir model'i dene"""
215
+ if model_key not in ALL_MODELS:
 
 
 
 
216
  return None, f"Unknown model: {model_key}"
217
 
218
+ model_info = ALL_MODELS[model_key]
219
  model_id = model_info['id']
220
 
221
  try:
 
 
 
 
222
  if stream:
223
+ response = groq_client.chat.completions.create(
224
+ model=model_id,
225
+ messages=messages,
226
+ max_tokens=max_tokens,
227
+ temperature=temperature,
228
+ top_p=top_p,
229
+ stream=True
230
+ )
231
+
232
+ stats['model_usage'][model_key] = stats['model_usage'].get(model_key, 0) + 1
233
+ return response, None
234
  else:
235
+ response = groq_client.chat.completions.create(
236
+ model=model_id,
237
+ messages=messages,
238
+ max_tokens=max_tokens,
239
+ temperature=temperature,
240
+ top_p=top_p
241
  )
242
+
243
  stats['model_usage'][model_key] = stats['model_usage'].get(model_key, 0) + 1
244
  return response, None
245
 
246
  except Exception as e:
247
  error_msg = str(e)
248
  stats['model_failures'][model_key] = stats['model_failures'].get(model_key, 0) + 1
249
+
250
  print(f"❌ Model {model_key} failed: {error_msg}")
251
 
252
+ if 'rate_limit' in error_msg.lower() or 'rate limit' in error_msg.lower():
253
+ return None, "Rate limit exceeded"
254
+ elif 'quota' in error_msg.lower():
255
+ return None, "Quota exceeded"
256
  elif 'timeout' in error_msg.lower():
257
+ return None, "Timeout"
258
  else:
259
+ return None, f"Error: {error_msg[:150]}"
260
 
261
+ def format_messages(prompt, system_prompt=None, history=None):
262
  """Format messages"""
263
+ messages = []
264
+
265
+ if system_prompt:
266
+ messages.append({"role": "system", "content": system_prompt})
267
+ else:
268
+ # Default system prompt
269
+ messages.append({
270
+ "role": "system",
271
+ "content": "Sen yardΔ±msever ve bilgili bir AI asistanΔ±sΔ±n. TΓΌrkΓ§e'yi mΓΌkemmel kullanΔ±rsΔ±n."
272
+ })
273
+
274
  if history:
275
  messages.extend(history)
 
 
 
 
 
 
 
 
 
 
 
276
 
277
+ messages.append({"role": "user", "content": prompt})
 
 
 
 
 
278
 
279
+ return messages
280
 
281
  # ========== ROUTES ==========
282
 
283
  @app.route('/')
284
  def index():
285
+ """API Documentation"""
286
  uptime = datetime.utcnow() - stats['start_time']
287
  uptime_str = str(uptime).split('.')[0]
288
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
289
  return jsonify({
290
+ 'name': 'Sixfinger Groq Backend',
291
+ 'version': '4.0.1',
292
  'status': 'online',
293
+ 'provider': 'Groq',
294
+ 'uptime': uptime_str,
295
+ 'models': {
296
+ 'free': [
297
+ {'key': k, 'rpd': v['rpd'], 'language': v['language']}
298
+ for k, v in FREE_MODELS.items()
299
+ ],
300
+ 'paid': [
301
+ {'key': k, 'rpd': v['rpd'], 'plan': v['plan_required']}
302
+ for k, v in PAID_MODELS.items()
303
+ ],
304
+ 'total': len(ALL_MODELS)
305
+ },
306
+ 'stats': {
307
+ 'total_requests': stats['total_requests'],
308
+ 'successful': stats['successful_requests'],
309
+ 'failed': stats['failed_requests'],
310
+ 'success_rate': f"{(stats['successful_requests'] / max(stats['total_requests'], 1) * 100):.2f}%",
311
+ 'fallback_count': stats['fallback_count']
312
+ },
313
+ 'endpoints': {
314
+ 'chat': 'POST /api/chat',
315
+ 'chat_stream': 'POST /api/chat/stream',
316
+ 'models': 'GET /api/models',
317
+ 'stats': 'GET /api/stats',
318
+ 'health': 'GET /health'
319
+ },
320
+ 'headers': {
321
+ 'X-Model': 'Preferred model key (optional)',
322
+ 'X-User-Plan': 'User plan: free, starter, pro, plus (default: free)'
323
  }
324
  })
325
 
326
  @app.route('/api/models')
327
  def list_models():
328
+ """List all models with details"""
329
  return jsonify({
330
+ 'free_models': [
331
  {
332
  'key': key,
333
  'model_id': info['id'],
334
  'description': info['description'],
335
  'size': info['size'],
336
+ 'speed': info['speed'],
337
+ 'language': info['language'],
338
+ 'limits': {
339
+ 'rpm': info['rpm'],
340
+ 'rpd': info['rpd'],
341
+ 'tpm': info['tpm'],
342
+ 'tpd': info['tpd']
343
+ },
344
  'usage_count': stats['model_usage'].get(key, 0),
345
  'failure_count': stats['model_failures'].get(key, 0)
346
  }
347
+ for key, info in FREE_MODELS.items()
348
  ],
349
+ 'paid_models': [
350
  {
351
  'key': key,
352
  'model_id': info['id'],
353
  'description': info['description'],
354
  'size': info['size'],
355
+ 'speed': info['speed'],
356
+ 'language': info['language'],
357
+ 'plan_required': info['plan_required'],
358
+ 'limits': {
359
+ 'rpm': info['rpm'],
360
+ 'rpd': info['rpd'],
361
+ 'tpm': info['tpm'],
362
+ 'tpd': info['tpd']
363
+ },
364
  'usage_count': stats['model_usage'].get(key, 0),
365
  'failure_count': stats['model_failures'].get(key, 0)
366
  }
367
+ for key, info in PAID_MODELS.items()
368
+ ],
369
+ 'plan_permissions': PLAN_ALLOWED_MODELS
370
  })
371
 
372
  @app.route('/api/chat', methods=['POST'])
373
  def chat():
374
+ """Chat endpoint (non-streaming)"""
375
  stats['total_requests'] += 1
376
 
377
  try:
378
  data = request.json
379
+ if not data or 'prompt' not in data:
 
 
 
 
 
380
  stats['failed_requests'] += 1
381
  return jsonify({'error': 'prompt required'}), 400
382
 
383
+ # Request parameters
384
+ prompt = data['prompt']
385
  max_tokens = min(data.get('max_tokens', 1000), 4000)
386
+ temperature = min(max(data.get('temperature', 0.7), 0.0), 2.0)
387
  top_p = min(max(data.get('top_p', 0.9), 0.1), 1.0)
388
+ system_prompt = data.get('system_prompt')
389
+ history = data.get('history', [])
390
 
391
+ # Model selection
392
+ preferred_model = request.headers.get('X-Model') or data.get('model')
393
+ user_plan = request.headers.get('X-User-Plan', 'free').lower()
394
 
395
+ # Validate plan
396
+ if user_plan not in PLAN_ALLOWED_MODELS:
397
+ user_plan = 'free'
398
+
399
+ # Get allowed models
400
+ models_to_try = get_allowed_models(user_plan, preferred_model)
401
 
402
+ # Format messages
403
+ messages = format_messages(prompt, system_prompt, history)
404
 
405
+ # Try models
406
  attempts = []
407
 
408
+ for i, model_key in enumerate(models_to_try):
409
+ if i >= 5: # Max 5 attempts
410
  break
411
 
412
+ print(f"πŸ”„ Trying model {i+1}/{min(5, len(models_to_try))}: {model_key}")
413
 
414
  response, error = try_model(model_key, messages, max_tokens, temperature, top_p)
415
 
 
420
  })
421
 
422
  if response:
423
+ content = response.choices[0].message.content
424
+ model_info = ALL_MODELS[model_key]
 
 
425
 
426
  stats['successful_requests'] += 1
427
  if i > 0:
428
  stats['fallback_count'] += 1
429
 
 
 
 
430
  result = {
431
+ 'response': content,
432
  'model': model_info['id'],
433
  'model_key': model_key,
434
+ 'model_size': model_info['size'],
435
+ 'model_language': model_info['language'],
436
  'attempts': i + 1,
437
  'usage': {
438
+ 'prompt_tokens': response.usage.prompt_tokens,
439
+ 'completion_tokens': response.usage.completion_tokens,
440
+ 'total_tokens': response.usage.total_tokens
441
  },
442
  'parameters': {
443
  'max_tokens': max_tokens,
444
  'temperature': temperature,
445
+ 'top_p': top_p
 
446
  }
447
  }
448
 
 
 
 
 
 
 
449
  if i > 0:
450
  result['fallback_attempts'] = attempts
451
 
452
  return jsonify(result)
453
 
454
  else:
 
455
  print(f"❌ {model_key} failed: {error}")
456
  continue
457
 
 
459
 
460
  return jsonify({
461
  'error': 'All models failed',
 
462
  'attempts': attempts,
463
+ 'user_plan': user_plan,
464
+ 'models_tried': [a['model'] for a in attempts]
465
  }), 503
466
 
467
  except Exception as e:
468
  stats['failed_requests'] += 1
469
+ return jsonify({
470
+ 'error': str(e),
471
+ 'traceback': traceback.format_exc()
472
+ }), 500
473
 
474
  @app.route('/api/chat/stream', methods=['POST'])
475
  def chat_stream():
476
+ """Chat endpoint (streaming)"""
477
  stats['total_requests'] += 1
478
 
479
  try:
480
  data = request.json
481
+ if not data or 'prompt' not in data:
 
 
 
 
482
  return jsonify({'error': 'prompt required'}), 400
483
 
484
+ # Request parameters
485
+ prompt = data['prompt']
486
  max_tokens = min(data.get('max_tokens', 1000), 4000)
487
+ temperature = min(max(data.get('temperature', 0.7), 0.0), 2.0)
488
  top_p = min(max(data.get('top_p', 0.9), 0.1), 1.0)
489
+ system_prompt = data.get('system_prompt')
490
+ history = data.get('history', [])
491
 
492
+ # Model selection
493
+ preferred_model = request.headers.get('X-Model') or data.get('model')
494
+ user_plan = request.headers.get('X-User-Plan', 'free').lower()
495
 
496
+ if user_plan not in PLAN_ALLOWED_MODELS:
497
+ user_plan = 'free'
498
 
499
+ # Get allowed models
500
+ models_to_try = get_allowed_models(user_plan, preferred_model)
501
+
502
+ # Format messages
503
+ messages = format_messages(prompt, system_prompt, history)
504
 
505
  def generate():
506
+ for i, model_key in enumerate(models_to_try):
507
+ if i >= 5:
508
  break
509
 
510
  yield f"data: {json.dumps({'info': f'Trying model: {model_key}'}, ensure_ascii=False)}\n\n"
 
513
 
514
  if stream_response:
515
  try:
516
+ for chunk in stream_response:
517
+ if chunk.choices[0].delta.content:
518
+ text = chunk.choices[0].delta.content
519
+ yield f"data: {json.dumps({'text': text}, ensure_ascii=False)}\n\n"
520
 
521
  stats['successful_requests'] += 1
 
 
522
  if i > 0:
523
  stats['fallback_count'] += 1
524
 
525
+ model_info = ALL_MODELS[model_key]
526
  yield f"data: {json.dumps({'done': True, 'model': model_info['id'], 'model_key': model_key, 'attempts': i+1})}\n\n"
527
  return
528
 
529
  except Exception as e:
530
+ yield f"data: {json.dumps({'warning': f'Stream error: {str(e)}'}, ensure_ascii=False)}\n\n"
531
  continue
532
 
533
  else:
 
586
  @app.after_request
587
  def after_request(response):
588
  response.headers.add('Access-Control-Allow-Origin', '*')
589
+ response.headers.add('Access-Control-Allow-Headers', 'Content-Type,X-API-Key,X-Model,X-User-Plan')
590
  response.headers.add('Access-Control-Allow-Methods', 'GET,POST,OPTIONS')
591
  return response
592
 
593
+ # ========== MAIN ==========
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
594
 
595
  if __name__ == '__main__':
596
+ print("\n" + "=" * 70)
597
+ print("πŸš€ SIXFINGER GROQ BACKEND v4.0.1")
598
+ print("=" * 70)
599
+ print(f"βœ… Groq API Key: {GROQ_API_KEY[:20]}...")
600
  print(f"πŸ“‘ Port: {PORT}")
601
+ print("=" * 70)
602
+
603
+ print("\nπŸ†“ FREE PLAN MODELS:")
604
+ for key, info in FREE_MODELS.items():
 
 
605
  print(f" β€’ {key}: {info['description']}")
606
+ print(f" RPD: {info['rpd']:,} | TPD: {info['tpd']:,} | Language: {info['language']}")
607
+
608
+ print("\nπŸ’Ž PAID PLAN MODELS:")
609
+ for key, info in PAID_MODELS.items():
610
  print(f" β€’ {key}: {info['description']}")
611
+ print(f" Plan: {info['plan_required']}+ | RPD: {info['rpd']:,} | Language: {info['language']}")
612
+
613
+ print("\nπŸ“Š PLAN PERMISSIONS:")
614
+ for plan, models in PLAN_ALLOWED_MODELS.items():
615
+ print(f" β€’ {plan.upper()}: {len(models)} modeller - {', '.join(models[:3])}...")
616
+
617
+ print("\n" + "=" * 70)
618
  print("βœ… Server ready!")
619
+ print("πŸ“– API Docs: http://0.0.0.0:7860")
620
+ print("=" * 70 + "\n")
621
 
622
  app.run(host='0.0.0.0', port=PORT, debug=False, threaded=True)