blaze-aura69 commited on
Commit
3df9b01
·
verified ·
1 Parent(s): 194ce58

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +448 -0
app.py CHANGED
@@ -0,0 +1,448 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from fastapi.middleware.cors import CORSMiddleware
3
+ from pydantic import BaseModel
4
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
5
+ import torch
6
+ import asyncio
7
+ import httpx
8
+ import time
9
+ from concurrent.futures import ThreadPoolExecutor
10
+ import hashlib
11
+
12
+ app = FastAPI(title="Distributed M2M100 API")
13
+
14
+ # =====================================================
15
+ # CORS
16
+ # =====================================================
17
+ app.add_middleware(
18
+ CORSMiddleware,
19
+ allow_origins=["*"],
20
+ allow_credentials=True,
21
+ allow_methods=["*"],
22
+ allow_headers=["*"],
23
+ )
24
+
25
+ # =====================================================
26
+ # MODEL
27
+ # =====================================================
28
+ MODEL_NAME = "facebook/m2m100_418M"
29
+ print("Loading model...")
30
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
31
+ model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME)
32
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
33
+ model.to(device)
34
+ model.eval()
35
+ torch.set_num_threads(2)
36
+ torch.set_num_interop_threads(1)
37
+ print("Model loaded!")
38
+
39
+ # =====================================================
40
+ # CONFIG
41
+ # =====================================================
42
+ MAX_ACTIVE_REQUESTS = 3
43
+ MAX_QUEUE_SIZE = 3
44
+ MAX_CHARACTERS = 3000 # Character limit without spaces per user
45
+ USERNAME = "blaze-aura69"
46
+ SPACE_PREFIX = "trans"
47
+ DISCOVERY_CHUNK_SIZE = 50
48
+ CACHE_TTL = 86400 # Cache time-to-live: 1 day in seconds (24 * 60 * 60)
49
+ MAX_CACHE_SIZE = 10000 # Maximum number of cached translations
50
+
51
+ # =====================================================
52
+ # GLOBALS
53
+ # =====================================================
54
+ active_requests = 0
55
+ request_queue = asyncio.Queue(maxsize=MAX_QUEUE_SIZE)
56
+ executor = ThreadPoolExecutor(max_workers=3)
57
+ lock = asyncio.Lock()
58
+
59
+ # 🔥 cooldown map (NEW)
60
+ cooldown = {}
61
+
62
+ # 🔥 Translation cache with TTL (NEW)
63
+ translation_cache = {}
64
+
65
+ # =====================================================
66
+ # LANGUAGES
67
+ # =====================================================
68
+ SUPPORTED_LANGS = {
69
+ "af": "Afrikaans",
70
+ "am": "Amharic",
71
+ "ar": "Arabic",
72
+ "ast": "Asturian",
73
+ "az": "Azerbaijani",
74
+ "ba": "Bashkir",
75
+ "be": "Belarusian",
76
+ "bg": "Bulgarian",
77
+ "bn": "Bengali",
78
+ "br": "Breton",
79
+ "bs": "Bosnian",
80
+ "ca": "Catalan",
81
+ "ceb": "Cebuano",
82
+ "cs": "Czech",
83
+ "cy": "Welsh",
84
+ "da": "Danish",
85
+ "de": "German",
86
+ "el": "Greek",
87
+ "en": "English",
88
+ "es": "Spanish",
89
+ "et": "Estonian",
90
+ "fa": "Persian",
91
+ "ff": "Fulah",
92
+ "fi": "Finnish",
93
+ "fr": "French",
94
+ "fy": "Western Frisian",
95
+ "ga": "Irish",
96
+ "gd": "Scottish Gaelic",
97
+ "gl": "Galician",
98
+ "gu": "Gujarati",
99
+ "ha": "Hausa",
100
+ "he": "Hebrew",
101
+ "hi": "Hindi",
102
+ "hr": "Croatian",
103
+ "ht": "Haitian Creole",
104
+ "hu": "Hungarian",
105
+ "hy": "Armenian",
106
+ "id": "Indonesian",
107
+ "ig": "Igbo",
108
+ "ilo": "Ilocano",
109
+ "is": "Icelandic",
110
+ "it": "Italian",
111
+ "ja": "Japanese",
112
+ "jv": "Javanese",
113
+ "ka": "Georgian",
114
+ "kk": "Kazakh",
115
+ "km": "Khmer",
116
+ "kn": "Kannada",
117
+ "ko": "Korean",
118
+ "lb": "Luxembourgish",
119
+ "lg": "Ganda",
120
+ "ln": "Lingala",
121
+ "lo": "Lao",
122
+ "lt": "Lithuanian",
123
+ "lv": "Latvian",
124
+ "mg": "Malagasy",
125
+ "mk": "Macedonian",
126
+ "ml": "Malayalam",
127
+ "mn": "Mongolian",
128
+ "mr": "Marathi",
129
+ "ms": "Malay",
130
+ "my": "Myanmar",
131
+ "ne": "Nepali",
132
+ "nl": "Dutch",
133
+ "no": "Norwegian",
134
+ "ns": "Northern Sotho",
135
+ "oc": "Occitan",
136
+ "or": "Odia",
137
+ "pa": "Punjabi",
138
+ "pl": "Polish",
139
+ "ps": "Pashto",
140
+ "pt": "Portuguese",
141
+ "ro": "Romanian",
142
+ "ru": "Russian",
143
+ "sd": "Sindhi",
144
+ "si": "Sinhala",
145
+ "sk": "Slovak",
146
+ "sl": "Slovenian",
147
+ "so": "Somali",
148
+ "sq": "Albanian",
149
+ "sr": "Serbian",
150
+ "ss": "Swati",
151
+ "su": "Sundanese",
152
+ "sv": "Swedish",
153
+ "sw": "Swahili",
154
+ "ta": "Tamil",
155
+ "th": "Thai",
156
+ "tl": "Tagalog",
157
+ "tn": "Tswana",
158
+ "tr": "Turkish",
159
+ "uk": "Ukrainian",
160
+ "ur": "Urdu",
161
+ "uz": "Uzbek",
162
+ "vi": "Vietnamese",
163
+ "wo": "Wolof",
164
+ "xh": "Xhosa",
165
+ "yi": "Yiddish",
166
+ "yo": "Yoruba",
167
+ "zh": "Chinese",
168
+ "zu": "Zulu"
169
+ }
170
+
171
+ # =====================================================
172
+ # REQUEST MODEL
173
+ # =====================================================
174
+ class TranslateRequest(BaseModel):
175
+ text: str
176
+ source_lang: str
177
+ target_lang: str
178
+
179
+ # =====================================================
180
+ # TRANSLATION
181
+ # =====================================================
182
+ def blocking_translate(text, source_lang, target_lang):
183
+ tokenizer.src_lang = source_lang
184
+ encoded = tokenizer(
185
+ text, return_tensors="pt"
186
+ ).to(device)
187
+ generated_tokens = model.generate(
188
+ **encoded,
189
+ forced_bos_token_id=tokenizer.get_lang_id(target_lang),
190
+ max_length=1024,
191
+ num_beams=1
192
+ )
193
+ return tokenizer.batch_decode(
194
+ generated_tokens, skip_special_tokens=True
195
+ )[0]
196
+
197
+ # =====================================================
198
+ # CACHE HELPERS (NEW)
199
+ # =====================================================
200
+ def get_cache_key(text, source_lang, target_lang):
201
+ """Generate a unique cache key for a translation request"""
202
+ raw_key = f"{text}|{source_lang}|{target_lang}"
203
+ return hashlib.md5(raw_key.encode()).hexdigest()
204
+
205
+ def get_from_cache(text, source_lang, target_lang):
206
+ """Retrieve translation from cache if valid"""
207
+ cache_key = get_cache_key(text, source_lang, target_lang)
208
+ if cache_key in translation_cache:
209
+ cached_data = translation_cache[cache_key]
210
+ if time.time() - cached_data["timestamp"] < CACHE_TTL:
211
+ return cached_data["translated_text"]
212
+ else:
213
+ # Remove expired cache entry
214
+ del translation_cache[cache_key]
215
+ return None
216
+
217
+ def add_to_cache(text, source_lang, target_lang, translated_text):
218
+ """Add translation to cache with timestamp. Override oldest entry if cache is full."""
219
+ cache_key = get_cache_key(text, source_lang, target_lang)
220
+
221
+ # If key already exists, update it
222
+ if cache_key in translation_cache:
223
+ translation_cache[cache_key] = {
224
+ "translated_text": translated_text,
225
+ "timestamp": time.time()
226
+ }
227
+ return
228
+
229
+ # If cache is full, find and remove the oldest entry
230
+ if len(translation_cache) >= MAX_CACHE_SIZE:
231
+ oldest_key = min(
232
+ translation_cache.keys(),
233
+ key=lambda k: translation_cache[k]["timestamp"]
234
+ )
235
+ del translation_cache[oldest_key]
236
+
237
+ # Add new entry
238
+ translation_cache[cache_key] = {
239
+ "translated_text": translated_text,
240
+ "timestamp": time.time()
241
+ }
242
+
243
+ # =====================================================
244
+ # COOLDOWN HELPERS
245
+ # =====================================================
246
+ def is_blocked(url):
247
+ return cooldown.get(url, 0) > time.time()
248
+
249
+ # =====================================================
250
+ # CHECK SINGLE SPACE
251
+ # =====================================================
252
+ async def check_space(client, i):
253
+ space_name = SPACE_PREFIX if i == 0 else f"{SPACE_PREFIX}{i}"
254
+ url = f"https://{USERNAME}-{space_name}.hf.space"
255
+ if is_blocked(url):
256
+ return {
257
+ "exists": True,
258
+ "space_name": space_name,
259
+ "url": url,
260
+ "status_data": {
261
+ "current_space_status": "cooldown"
262
+ }
263
+ }
264
+ try:
265
+ response = await client.get(f"{url}/status")
266
+ if response.status_code == 200:
267
+ try:
268
+ return {
269
+ "exists": True,
270
+ "space_name": space_name,
271
+ "url": url,
272
+ "status_data": response.json()
273
+ }
274
+ except:
275
+ return {
276
+ "exists": True,
277
+ "space_name": space_name,
278
+ "url": url,
279
+ "status_data": None
280
+ }
281
+ elif response.status_code == 404:
282
+ return {
283
+ "exists": False,
284
+ "space_name": space_name,
285
+ "url": url
286
+ }
287
+ elif response.status_code == 429:
288
+ cooldown[url] = time.time() + 1
289
+ return {
290
+ "exists": True,
291
+ "space_name": space_name,
292
+ "url": url,
293
+ "status_data": {
294
+ "current_space_status": "rate_limited"
295
+ }
296
+ }
297
+ else:
298
+ return {
299
+ "exists": True,
300
+ "space_name": space_name,
301
+ "url": url,
302
+ "status_data": {
303
+ "current_space_status": "error"
304
+ }
305
+ }
306
+ except:
307
+ return {
308
+ "exists": False,
309
+ "space_name": space_name,
310
+ "url": url
311
+ }
312
+
313
+ # =====================================================
314
+ # DISCOVERY
315
+ # =====================================================
316
+ async def discover_spaces_parallel():
317
+ existing_spaces = []
318
+ empty_space_url = None
319
+ timeout = httpx.Timeout(5.0)
320
+ async with httpx.AsyncClient(timeout=timeout) as client:
321
+ start = 0
322
+ while True:
323
+ tasks = [
324
+ check_space(client, i)
325
+ for i in range(start, start + DISCOVERY_CHUNK_SIZE)
326
+ ]
327
+ results = await asyncio.gather(*tasks)
328
+
329
+ found_empty = False
330
+ for result in results:
331
+ if result["exists"]:
332
+ status = (result.get("status_data") or {}).get("current_space_status")
333
+ if status in ["rate_limited", "cooldown"]:
334
+ continue
335
+ existing_spaces.append({
336
+ "space_name": result["space_name"],
337
+ "url": result["url"],
338
+ "status": result.get("status_data")
339
+ })
340
+ else:
341
+ empty_space_url = result["url"]
342
+ found_empty = True
343
+ break
344
+
345
+ if found_empty:
346
+ break
347
+ start += DISCOVERY_CHUNK_SIZE
348
+
349
+ return {
350
+ "existing_spaces": existing_spaces,
351
+ "empty_space_url": empty_space_url
352
+ }
353
+
354
+ # =====================================================
355
+ # STATUS
356
+ # =====================================================
357
+ @app.get("/status")
358
+ async def status():
359
+ queue_size = request_queue.qsize()
360
+ full = (
361
+ active_requests >= MAX_ACTIVE_REQUESTS
362
+ and queue_size >= MAX_QUEUE_SIZE
363
+ )
364
+ discovered = await discover_spaces_parallel()
365
+ return {
366
+ "current_space_status": "full" if full else "available",
367
+ "active_requests": active_requests,
368
+ "queue_size": queue_size,
369
+ "empty_space_url": discovered["empty_space_url"],
370
+ "total_spaces": len(discovered["existing_spaces"]),
371
+ "cached_translations": len(translation_cache)
372
+ }
373
+
374
+ # =====================================================
375
+ # LANGUAGES
376
+ # =====================================================
377
+ @app.get("/languages")
378
+ def languages():
379
+ return SUPPORTED_LANGS
380
+
381
+ # =====================================================
382
+ # TRANSLATE
383
+ # =====================================================
384
+ @app.post("/translate")
385
+ async def translate(req: TranslateRequest):
386
+ global active_requests
387
+
388
+ # Character limit check (without spaces)
389
+ char_count_without_spaces = len(req.text.replace(" ", ""))
390
+ if char_count_without_spaces > MAX_CHARACTERS:
391
+ return {
392
+ "status": "character_limit_exceeded",
393
+ "message": f"Text exceeds maximum character limit (without spaces). Maximum allowed: {MAX_CHARACTERS} characters, received: {char_count_without_spaces} characters",
394
+ "max_characters": MAX_CHARACTERS,
395
+ "received_characters": char_count_without_spaces
396
+ }
397
+
398
+ # Check cache first
399
+ cached_result = get_from_cache(req.text, req.source_lang, req.target_lang)
400
+ if cached_result is not None:
401
+ return {
402
+ "status": "success",
403
+ "translated_text": cached_result,
404
+ "from_cache": True
405
+ }
406
+
407
+ queue_size = request_queue.qsize()
408
+ if active_requests >= MAX_ACTIVE_REQUESTS and queue_size >= MAX_QUEUE_SIZE:
409
+ discovered = await discover_spaces_parallel()
410
+ return {
411
+ "status": "space_full",
412
+ "empty_space_url": discovered["empty_space_url"]
413
+ }
414
+
415
+ await request_queue.put("req")
416
+ async with lock:
417
+ active_requests += 1
418
+
419
+ try:
420
+ loop = asyncio.get_running_loop()
421
+ result = await loop.run_in_executor(
422
+ executor,
423
+ blocking_translate,
424
+ req.text,
425
+ req.source_lang,
426
+ req.target_lang
427
+ )
428
+
429
+ # Store result in cache
430
+ add_to_cache(req.text, req.source_lang, req.target_lang, result)
431
+
432
+ return {
433
+ "status": "success",
434
+ "translated_text": result,
435
+ "from_cache": False
436
+ }
437
+ finally:
438
+ async with lock:
439
+ active_requests -= 1
440
+ await request_queue.get()
441
+ request_queue.task_done()
442
+
443
+ # =====================================================
444
+ # HOME
445
+ # =====================================================
446
+ @app.get("/")
447
+ def home():
448
+ return {"status": "running"}