SixFinger commited on
Commit
0aba0d2
·
1 Parent(s): 631929d

Initial deploy: Sixfinger-2B backend API

Browse files
Files changed (4) hide show
  1. Dockerfile +33 -0
  2. README.md +48 -6
  3. app.py +355 -0
  4. requirements.txt +7 -0
Dockerfile ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ # Set working directory
4
+ WORKDIR /app
5
+
6
+ # Install system dependencies
7
+ RUN apt-get update && apt-get install -y \
8
+ git \
9
+ curl \
10
+ && rm -rf /var/lib/apt/lists/*
11
+
12
+ # Copy requirements
13
+ COPY requirements.txt .
14
+
15
+ # Install Python dependencies
16
+ RUN pip install --no-cache-dir -r requirements.txt
17
+
18
+ # Copy application
19
+ COPY app.py .
20
+
21
+ # Expose port
22
+ EXPOSE 7860
23
+
24
+ # Environment variables
25
+ ENV PORT=7860
26
+ ENV PYTHONUNBUFFERED=1
27
+
28
+ # Health check
29
+ HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
30
+ CMD curl -f http://localhost:7860/health || exit 1
31
+
32
+ # Run application
33
+ CMD ["python", "app.py"]
README.md CHANGED
@@ -1,11 +1,53 @@
1
  ---
2
- title: Sixfinger Api
3
- emoji: 🐠
4
- colorFrom: red
5
- colorTo: green
6
  sdk: docker
 
 
7
  pinned: false
8
- license: apache-2.0
9
  ---
10
 
11
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: Sixfinger-2B Backend API
3
+ emoji: 🤖
4
+ colorFrom: blue
5
+ colorTo: purple
6
  sdk: docker
7
+ sdk_version: "20.10.7"
8
+ app_port: 7860
9
  pinned: false
 
10
  ---
11
 
12
+ # 🤖 Sixfinger-2B Backend API
13
+
14
+ Türkçe dil modeli (Sixfinger-2B) API servisi.
15
+
16
+ ## 🚀 Endpoints
17
+
18
+ - `POST /api/chat` - Normal chat (JSON response)
19
+ - `POST /api/chat/stream` - Streaming chat (Server-Sent Events)
20
+ - `GET /health` - Health check
21
+ - `GET /api/stats` - API statistics
22
+
23
+ ## 📖 Usage
24
+
25
+ ### Normal Chat
26
+ ```bash
27
+ curl -X POST https://yourusername-sixfinger-backend.hf.space/api/chat \
28
+ -H "Content-Type: application/json" \
29
+ -d '{
30
+ "prompt": "Merhaba!",
31
+ "max_tokens": 100
32
+ }'
33
+ Streaming Chat
34
+ Bash
35
+
36
+ curl -X POST https://yourusername-sixfinger-backend.hf.space/api/chat/stream \
37
+ -H "Content-Type: application/json" \
38
+ -d '{"prompt": "Python nedir?"}' \
39
+ --no-buffer
40
+ 🔧 Parameters
41
+ prompt (required): User message
42
+ max_tokens (optional): Maximum tokens (default: 300, max: 2000)
43
+ temperature (optional): Creativity (0.1-2.0, default: 0.8)
44
+ top_p (optional): Nucleus sampling (0.1-1.0, default: 0.9)
45
+ repetition_penalty (optional): Repetition penalty (1.0-2.0, default: 1.15)
46
+ 📊 Model
47
+ Name: Sixfinger-2B
48
+ Size: 2 Billion parameters
49
+ Language: Turkish (Istanbul slang)
50
+ Quantization: 4-bit (NF4)
51
+ 🔗 Links
52
+ Model: https://huggingface.co/yourusername/sixfinger-2b
53
+ Dashboard: https://sfapi.pythonanywhere.com
app.py ADDED
@@ -0,0 +1,355 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py - Sixfinger-2B Backend API for Hugging Face Spaces
2
+ import json
3
+ import os
4
+ from datetime import datetime
5
+ from threading import Thread
6
+
7
+ from flask import Flask, request, jsonify, Response
8
+ from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer, BitsAndBytesConfig
9
+ import torch
10
+
11
+ app = Flask(__name__)
12
+ app.config['JSON_AS_ASCII'] = False
13
+
14
+ # ========== CONFIGURATION ==========
15
+ MODEL_NAME = os.getenv("MODEL_NAME", "sixfingerdev/sixfinger-2b") # ✅ Buraya kendi model URL'ini yaz
16
+ PORT = int(os.getenv("PORT", 7860))
17
+
18
+ # ========== MODEL LOADING ==========
19
+ print("=" * 60)
20
+ print("🔄 Loading Sixfinger-2B model...")
21
+ print(f"📦 Model: {MODEL_NAME}")
22
+ print("=" * 60)
23
+
24
+ try:
25
+ # Tokenizer
26
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
27
+ if tokenizer.pad_token is None:
28
+ tokenizer.pad_token = tokenizer.eos_token
29
+
30
+ # Device detection
31
+ device = "cuda" if torch.cuda.is_available() else "cpu"
32
+ print(f"🖥️ Device: {device}")
33
+
34
+ # Model loading with quantization (if GPU available)
35
+ if device == "cuda":
36
+ print("⚡ Loading with 4-bit quantization...")
37
+ quantization_config = BitsAndBytesConfig(
38
+ load_in_4bit=True,
39
+ bnb_4bit_compute_dtype=torch.float16,
40
+ bnb_4bit_quant_type="nf4",
41
+ bnb_4bit_use_double_quant=True,
42
+ )
43
+
44
+ model = AutoModelForCausalLM.from_pretrained(
45
+ MODEL_NAME,
46
+ device_map="auto",
47
+ torch_dtype=torch.float16,
48
+ quantization_config=quantization_config,
49
+ low_cpu_mem_usage=True,
50
+ trust_remote_code=True
51
+ )
52
+ else:
53
+ print("⚠️ No GPU detected, using CPU (slower)...")
54
+ model = AutoModelForCausalLM.from_pretrained(
55
+ MODEL_NAME,
56
+ torch_dtype=torch.float32,
57
+ low_cpu_mem_usage=True,
58
+ trust_remote_code=True
59
+ )
60
+ model.to(device)
61
+
62
+ print("=" * 60)
63
+ print("✅ Model loaded successfully!")
64
+ print(f"📊 Device: {model.device}")
65
+ print(f"💾 Model size: ~{sum(p.numel() for p in model.parameters()) / 1e9:.2f}B parameters")
66
+ print("=" * 60)
67
+
68
+ except Exception as e:
69
+ print("=" * 60)
70
+ print(f"❌ Model loading failed: {e}")
71
+ print("=" * 60)
72
+ exit(1)
73
+
74
+ # ========== SYSTEM PROMPT ==========
75
+ SYSTEM_PROMPT = """Sen Sixfinger'sın.
76
+
77
+ Karakter Özellikleri:
78
+ - İstanbul delikanlısı gibi konuşursun
79
+ - Samimi, esprili ve sokak argosunu kullanırsın
80
+ - Kısa ve net cevaplar verirsin
81
+ - "Kanka", "lan", "abi", "reis", "kral" gibi kelimeler kullanırsın
82
+ - Dozunda küfür edebilirsin ama aşırıya kaçmazsın
83
+ - Türkçe dışına çıkmazsın
84
+ - Karşındakini eziklemezsin ama taşak geçebilirsin
85
+ - Bilgi verirken bile rahat tavırlısın
86
+
87
+ Örnek Cevap Tarzı:
88
+ "Kanka bak şimdi işin özeti şu..."
89
+ "Yav abi şimdi bu iş öyle değil..."
90
+ "Lan dur hele, sana bi şey diyim..."
91
+ """
92
+
93
+ # ========== STATISTICS ==========
94
+ stats = {
95
+ 'total_requests': 0,
96
+ 'total_tokens': 0,
97
+ 'start_time': datetime.utcnow()
98
+ }
99
+
100
+ # ========== ROUTES ==========
101
+
102
+ @app.route('/')
103
+ def index():
104
+ """API Info & Documentation"""
105
+ uptime = datetime.utcnow() - stats['start_time']
106
+ uptime_str = str(uptime).split('.')[0]
107
+
108
+ return jsonify({
109
+ 'name': 'Sixfinger-2B Backend API',
110
+ 'version': '1.0.0',
111
+ 'status': 'online',
112
+ 'model': MODEL_NAME,
113
+ 'device': str(model.device),
114
+ 'uptime': uptime_str,
115
+ 'stats': {
116
+ 'total_requests': stats['total_requests'],
117
+ 'total_tokens': stats['total_tokens']
118
+ },
119
+ 'endpoints': {
120
+ 'POST /api/chat': 'Normal chat (JSON response)',
121
+ 'POST /api/chat/stream': 'Streaming chat (SSE)',
122
+ 'GET /health': 'Health check',
123
+ 'GET /api/stats': 'Statistics'
124
+ },
125
+ 'usage': {
126
+ 'example_curl': f'curl -X POST https://yourusername-sixfinger-backend.hf.space/api/chat -H "Content-Type: application/json" -d \'{{"prompt": "Merhaba!"}}\''
127
+ }
128
+ })
129
+
130
+ @app.route('/api/chat', methods=['POST'])
131
+ def chat():
132
+ """Normal Chat Endpoint"""
133
+ try:
134
+ # Validation
135
+ data = request.json
136
+ if not data:
137
+ return jsonify({'error': 'JSON body required'}), 400
138
+
139
+ prompt = data.get('prompt') or data.get('message')
140
+ if not prompt:
141
+ return jsonify({'error': 'prompt or message parameter required'}), 400
142
+
143
+ # Parameters
144
+ max_tokens = min(data.get('max_tokens', 300), 2000)
145
+ temperature = min(max(data.get('temperature', 0.8), 0.1), 2.0)
146
+ top_p = min(max(data.get('top_p', 0.9), 0.1), 1.0)
147
+ repetition_penalty = min(max(data.get('repetition_penalty', 1.15), 1.0), 2.0)
148
+
149
+ # Prepare messages
150
+ messages = [
151
+ {"role": "system", "content": SYSTEM_PROMPT},
152
+ {"role": "user", "content": prompt}
153
+ ]
154
+
155
+ # Tokenize
156
+ tokenized = tokenizer.apply_chat_template(
157
+ messages,
158
+ return_tensors="pt",
159
+ add_generation_prompt=True,
160
+ return_dict=True
161
+ ).to(model.device)
162
+
163
+ prompt_tokens = tokenized["input_ids"].shape[1]
164
+
165
+ # Generate
166
+ with torch.no_grad():
167
+ output = model.generate(
168
+ input_ids=tokenized["input_ids"],
169
+ attention_mask=tokenized["attention_mask"],
170
+ max_new_tokens=max_tokens,
171
+ temperature=temperature,
172
+ top_p=top_p,
173
+ do_sample=True,
174
+ repetition_penalty=repetition_penalty,
175
+ pad_token_id=tokenizer.eos_token_id
176
+ )
177
+
178
+ # Decode
179
+ generated_text = tokenizer.decode(
180
+ output[0][tokenized["input_ids"].shape[1]:],
181
+ skip_special_tokens=True
182
+ )
183
+
184
+ completion_tokens = output.shape[1] - prompt_tokens
185
+ total_tokens = output.shape[1]
186
+
187
+ # Update stats
188
+ stats['total_requests'] += 1
189
+ stats['total_tokens'] += total_tokens
190
+
191
+ return jsonify({
192
+ 'response': generated_text.strip(),
193
+ 'model': MODEL_NAME,
194
+ 'usage': {
195
+ 'prompt_tokens': prompt_tokens,
196
+ 'completion_tokens': completion_tokens,
197
+ 'total_tokens': total_tokens
198
+ },
199
+ 'parameters': {
200
+ 'max_tokens': max_tokens,
201
+ 'temperature': temperature,
202
+ 'top_p': top_p,
203
+ 'repetition_penalty': repetition_penalty
204
+ }
205
+ })
206
+
207
+ except Exception as e:
208
+ import traceback
209
+ return jsonify({
210
+ 'error': 'Model error',
211
+ 'detail': str(e),
212
+ 'traceback': traceback.format_exc()
213
+ }), 500
214
+
215
+ @app.route('/api/chat/stream', methods=['POST'])
216
+ def chat_stream():
217
+ """Streaming Chat Endpoint"""
218
+ try:
219
+ data = request.json
220
+ if not data:
221
+ return jsonify({'error': 'JSON body required'}), 400
222
+
223
+ prompt = data.get('prompt') or data.get('message')
224
+ if not prompt:
225
+ return jsonify({'error': 'prompt or message required'}), 400
226
+
227
+ max_tokens = min(data.get('max_tokens', 300), 2000)
228
+ temperature = min(max(data.get('temperature', 0.8), 0.1), 2.0)
229
+ top_p = min(max(data.get('top_p', 0.9), 0.1), 1.0)
230
+ repetition_penalty = min(max(data.get('repetition_penalty', 1.15), 1.0), 2.0)
231
+
232
+ def generate():
233
+ try:
234
+ messages = [
235
+ {"role": "system", "content": SYSTEM_PROMPT},
236
+ {"role": "user", "content": prompt}
237
+ ]
238
+
239
+ tokenized = tokenizer.apply_chat_template(
240
+ messages,
241
+ return_tensors="pt",
242
+ add_generation_prompt=True,
243
+ return_dict=True
244
+ ).to(model.device)
245
+
246
+ streamer = TextIteratorStreamer(
247
+ tokenizer,
248
+ skip_prompt=True,
249
+ skip_special_tokens=True
250
+ )
251
+
252
+ gen_kwargs = {
253
+ "input_ids": tokenized["input_ids"],
254
+ "attention_mask": tokenized["attention_mask"],
255
+ "max_new_tokens": max_tokens,
256
+ "temperature": temperature,
257
+ "top_p": top_p,
258
+ "do_sample": True,
259
+ "repetition_penalty": repetition_penalty,
260
+ "streamer": streamer,
261
+ "pad_token_id": tokenizer.eos_token_id
262
+ }
263
+
264
+ thread = Thread(target=model.generate, kwargs=gen_kwargs)
265
+ thread.start()
266
+
267
+ for token in streamer:
268
+ if token.strip():
269
+ yield f"data: {json.dumps({'text': token}, ensure_ascii=False)}\n\n"
270
+
271
+ stats['total_requests'] += 1
272
+ yield f"data: {json.dumps({'done': True})}\n\n"
273
+
274
+ except Exception as e:
275
+ yield f"data: {json.dumps({'error': str(e)})}\n\n"
276
+
277
+ return Response(generate(), mimetype='text/event-stream')
278
+
279
+ except Exception as e:
280
+ return jsonify({'error': 'Request error', 'detail': str(e)}), 400
281
+
282
+ @app.route('/health')
283
+ def health():
284
+ """Health Check"""
285
+ uptime = datetime.utcnow() - stats['start_time']
286
+
287
+ return jsonify({
288
+ 'status': 'ok',
289
+ 'model': MODEL_NAME,
290
+ 'device': str(model.device),
291
+ 'uptime_seconds': int(uptime.total_seconds()),
292
+ 'total_requests': stats['total_requests'],
293
+ 'total_tokens': stats['total_tokens'],
294
+ 'timestamp': datetime.utcnow().isoformat()
295
+ })
296
+
297
+ @app.route('/api/stats')
298
+ def api_stats():
299
+ """API Statistics"""
300
+ uptime = datetime.utcnow() - stats['start_time']
301
+
302
+ return jsonify({
303
+ 'total_requests': stats['total_requests'],
304
+ 'total_tokens': stats['total_tokens'],
305
+ 'uptime_seconds': int(uptime.total_seconds()),
306
+ 'model': MODEL_NAME,
307
+ 'device': str(model.device),
308
+ 'status': 'online',
309
+ 'timestamp': datetime.utcnow().isoformat()
310
+ })
311
+
312
+ # ========== ERROR HANDLERS ==========
313
+
314
+ @app.errorhandler(404)
315
+ def not_found(e):
316
+ return jsonify({'error': 'Endpoint not found', 'path': request.path}), 404
317
+
318
+ @app.errorhandler(500)
319
+ def internal_error(e):
320
+ import traceback
321
+ return jsonify({
322
+ 'error': 'Internal server error',
323
+ 'detail': str(e),
324
+ 'traceback': traceback.format_exc()
325
+ }), 500
326
+
327
+ @app.errorhandler(405)
328
+ def method_not_allowed(e):
329
+ return jsonify({
330
+ 'error': 'Method not allowed',
331
+ 'allowed_methods': list(e.valid_methods) if hasattr(e, 'valid_methods') else []
332
+ }), 405
333
+
334
+ # ========== CORS (if needed) ==========
335
+ @app.after_request
336
+ def after_request(response):
337
+ response.headers.add('Access-Control-Allow-Origin', '*')
338
+ response.headers.add('Access-Control-Allow-Headers', 'Content-Type')
339
+ response.headers.add('Access-Control-Allow-Methods', 'GET,POST,OPTIONS')
340
+ return response
341
+
342
+ # ========== MAIN ==========
343
+
344
+ if __name__ == '__main__':
345
+ print("\n" + "=" * 60)
346
+ print("🚀 Starting Sixfinger-2B Backend API")
347
+ print("=" * 60)
348
+ print(f"📡 Port: {PORT}")
349
+ print(f"📦 Model: {MODEL_NAME}")
350
+ print(f"🖥️ Device: {model.device}")
351
+ print("=" * 60)
352
+ print("✅ Server ready!")
353
+ print("=" * 60 + "\n")
354
+
355
+ app.run(host='0.0.0.0', port=PORT, debug=False, threaded=True)
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ flask==3.0.0
2
+ transformers==4.36.0
3
+ torch==2.1.0
4
+ accelerate==0.25.0
5
+ bitsandbytes==0.41.0
6
+ sentencepiece==0.1.99
7
+ protobuf==4.25.0