File size: 25,939 Bytes
05ccd24
 
 
 
 
 
 
 
 
 
8d5467d
05ccd24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8d5467d
 
 
 
 
 
 
 
 
 
 
05ccd24
 
 
8d5467d
 
 
 
05ccd24
 
 
 
 
 
 
 
8d5467d
05ccd24
 
8d5467d
05ccd24
 
 
 
8d5467d
 
 
 
 
 
 
 
05ccd24
 
8d5467d
05ccd24
 
 
 
 
 
 
8d5467d
 
 
 
 
 
 
 
05ccd24
 
8d5467d
05ccd24
 
 
 
 
 
 
8d5467d
 
 
 
 
 
 
 
05ccd24
 
8d5467d
05ccd24
 
 
 
 
 
 
8d5467d
 
05ccd24
 
8d5467d
05ccd24
 
 
 
 
 
8d5467d
05ccd24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98b6d67
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
# app/services/models.py - TO'LIQ YAXSHILANGAN VERSIYA
"""
AI Modellari (STT, TTS, LLM) bilan ishlash uchun funksiyalar

YAXSHILANISHLAR:
1. ✅ JSON Parsing - ROBUST va xatoliklarga bardoshli
2. ✅ Multi-language - To'liq 3 til qo'llab-quvvatlash (uzb, eng, rus)
3. ✅ TTS - Speed o'chirildi, faqat til parametri
4. ✅ Error handling - Hamma joyda try-except
5. ✅ Fallback responses - Xatolik bo'lsa default javob qaytarish
"""

import subprocess
import numpy as np
import soundfile as sf
import io
import os
import torch
import torchaudio 
import google.generativeai as genai
import logging
import json
import re
from typing import Optional, Generator, Dict
from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor

from app.core.config import GEMINI_API_KEY, SYSTEM_INSTRUCTION
from app.utils.translit import lotin_to_kirill, clean_cyrillic_text


# Logging sozlash
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# --- SOZLAMALAR ---
genai.configure(api_key=GEMINI_API_KEY)
DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"
TORCH_DTYPE = torch.float16 if torch.cuda.is_available() else torch.float32

# Model holati tekshirish
MODELS = {}
MODEL_STATUS = {
    "stt": False,
    "tts_uzb": False,
    "tts_eng": False,
    "tts_rus": False,
    "llm": False
}

# Audio format validatsiyasi
SUPPORTED_AUDIO_FORMATS = {
    'webm': 'audio/webm',
    'wav': 'audio/wav',
    'mp3': 'audio/mpeg',
    'ogg': 'audio/ogg',
    'm4a': 'audio/mp4'
}

MAX_AUDIO_SIZE = 100 * 1024 * 1024  # 100MB
MIN_AUDIO_DURATION = 0.5  # 0.5 sekund
MAX_AUDIO_DURATION = 300  # 5 minut


# ==================== MODEL YUKLASH ====================

def load_models():
    """Barcha modellarni yuklab, statuslarini yangilaydi"""
    logger.info("🚀 Modellar Hugging Face Hub'dan yuklanmoqda...")
    
    # ========== STT MODELI ==========
    try:
        logger.info("   📥 STT (islomov/rubaistt_v2_medium) modeli yuklanmoqda...")
        stt_model_id = "islomov/rubaistt_v2_medium"
        
        stt_model = AutoModelForSpeechSeq2Seq.from_pretrained(
            stt_model_id, 
            torch_dtype=TORCH_DTYPE, 
            low_cpu_mem_usage=True, 
            use_safetensors=True
        )
        stt_model.to(DEVICE)
        stt_processor = AutoProcessor.from_pretrained(stt_model_id)
        
        MODELS["stt_pipeline"] = pipeline(
            "automatic-speech-recognition",
            model=stt_model,
            tokenizer=stt_processor.tokenizer,
            feature_extractor=stt_processor.feature_extractor,
            max_new_tokens=128,
            torch_dtype=TORCH_DTYPE,
            device=DEVICE,
        )
        
        MODEL_STATUS["stt"] = True
        logger.info("   ✅ STT modeli tayyor")
        
    except Exception as e:
        logger.error(f"   ❌ STT modelini yuklashda xatolik: {e}")
        MODEL_STATUS["stt"] = False

    # ========== TTS MODELLARI (3 ta: uzb, eng, rus) ==========
    
    # TTS O'ZBEKCHA
    try:
        logger.info("   🎧 TTS O'ZBEKCHA (facebook/mms-tts-uzb-script_cyrillic) modeli yuklanmoqda...")
        tts_model_path = "facebook/mms-tts-uzb-script_cyrillic"
        
        MODELS["tts_uzb_pipeline"] = pipeline(
            "text-to-speech",
            model=tts_model_path,
            device=DEVICE
        )
        
        MODEL_STATUS["tts_uzb"] = True
        logger.info("   ✅ TTS O'ZBEK modeli tayyor")
        
    except Exception as e:
        logger.error(f"   ❌ TTS O'ZBEK modelini yuklashda xatolik: {e}")
        MODEL_STATUS["tts_uzb"] = False

    # TTS INGLIZCHA
    try:
        logger.info("   🎧 TTS INGLIZCHA (facebook/mms-tts-eng) modeli yuklanmoqda...")
        tts_eng_path = "facebook/mms-tts-eng"
        
        MODELS["tts_eng_pipeline"] = pipeline(
            "text-to-speech",
            model=tts_eng_path,
            device=DEVICE
        )
        
        MODEL_STATUS["tts_eng"] = True
        logger.info("   ✅ TTS INGLIZ modeli tayyor")
        
    except Exception as e:
        logger.error(f"   ❌ TTS INGLIZ modelini yuklashda xatolik: {e}")
        MODEL_STATUS["tts_eng"] = False

    # TTS RUSCHA
    try:
        logger.info("   🎧 TTS RUSCHA (facebook/mms-tts-rus) modeli yuklanmoqda...")
        tts_rus_path = "facebook/mms-tts-rus"
        
        MODELS["tts_rus_pipeline"] = pipeline(
            "text-to-speech",
            model=tts_rus_path,
            device=DEVICE
        )
            
        MODEL_STATUS["tts_rus"] = True
        logger.info("   ✅ TTS RUS modeli tayyor")
        
    except Exception as e:
        logger.error(f"   ❌ TTS RUS modelini yuklashda xatolik: {e}")
        MODEL_STATUS["tts_rus"] = False

    # ========== LLM MODELI (Gemini) ==========
    try:
        logger.info("   🧠 LLM (Gemini) modeli yuklanmoqda...")
        # Sizning kodingizda "gemini-2.0-flash-exp" ishlatilgan ekan, shuni qoldiramiz
        MODELS["llm"] = genai.GenerativeModel("gemini-2.0-flash-exp") 
        MODEL_STATUS["llm"] = True
        logger.info("   ✅ LLM modeli tayyor")
        
    except Exception as e:
        logger.error(f"   ❌ LLM modelini yuklashda xatolik: {e}")
        MODEL_STATUS["llm"] = False

    # ========== NATIJA ==========
    if not any(MODEL_STATUS.values()):
        raise RuntimeError("❌ CRITICAL: Hech qanday model yuklanmadi. Loyiha ishlay olmaydi.")
    
    logger.info("=" * 60)
    logger.info("✅ Modellar yuklash yakunlandi:")
    for model_name, status in MODEL_STATUS.items():
        logger.info(f"   {model_name}: {'✅ Tayyor' if status else '❌ Yuklanmadi'}")
    logger.info("=" * 60)


def check_model_status() -> dict:
    """Model holatlarini qaytaradi"""
    return MODEL_STATUS.copy()


# ==================== TIL ANIQLASH ====================

def detect_language(text: str) -> str:
    """
    Matndan tilni aniqlaydi
    
    Args:
        text: Tahlil qilinadigan matn
        
    Returns:
        "uzb" | "eng" | "rus"
    """
    if not text or len(text.strip()) < 3:
        return "uzb"  # Default o'zbekcha
    
    text_lower = text.lower()
    
    # Ingliz tilining kalit so'zlari
    english_keywords = [
        'hello', 'help', 'my', 'heart', 'pain', 'can\'t', 'breathe', 
        'chest', 'head', 'stomach', 'feel', 'sick', 'please', 
        'i', 'am', 'the', 'and', 'have', 'is', 'it', 'hurts'
    ]
    
    # Rus tilining kalit so'zlari
    russian_keywords = [
        'привет', 'помогите', 'болит', 'сердце', 'голова', 'живот',
        'не могу', 'дышать', 'помощь', 'температура', 'у меня',
        'я', 'мне', 'очень', 'плохо'
    ]
    
    # O'zbek tilining kalit so'zlari
    uzbek_keywords = [
        'salom', 'assalomu', 'yordam', 'yurak', 'bosh', 'qorin',
        'og\'rig\'i', 'nafas', 'harorat', 'yomon', 'bemor',
        'menga', 'men', 'juda'
    ]
    
    # Kirill alifbosini tekshirish
    cyrillic_chars = sum(1 for c in text if '\u0400' <= c <= '\u04FF')
    total_chars = len([c for c in text if c.isalpha()])
    
    if total_chars > 0:
        cyrillic_ratio = cyrillic_chars / total_chars
        
        # Agar 50%+ kirill bo'lsa
        if cyrillic_ratio > 0.5:
            # Rus yoki o'zbek kirill
            rus_count = sum(1 for keyword in russian_keywords if keyword in text_lower)
            uzb_count = sum(1 for keyword in uzbek_keywords if keyword in text_lower)
            
            if rus_count > uzb_count:
                return "rus"
            else:
                return "uzb"
    
    # Lotin alifbosi - ingliz yoki o'zbek
    eng_count = sum(1 for keyword in english_keywords if keyword in text_lower)
    uzb_count = sum(1 for keyword in uzbek_keywords if keyword in text_lower)
    
    if eng_count > uzb_count and eng_count >= 2:
        return "eng"
    
    # Default: o'zbekcha
    return "uzb"


# ==================== STT (Speech-to-Text) ====================

def transcribe_audio_from_bytes(audio_bytes: bytes) -> str:
    """
    Xotiradagi audio baytlarni (WEBM, MP3, etc) qabul qilib,
    FFmpeg yordamida WAV formatiga o'giradi va matnga aylantiradi.
    
    YANGILANGAN: Ruscha transkripsiyani avtomatik kirilga o'tkazadi
    
    Args:
        audio_bytes: Audio baytlar
        
    Returns:
        Transkripsiya qilingan matn (ruscha bo'lsa kirill formatda)
    """
    logger.info(f"🎙️ Audio baytlar transkripsiya uchun qabul qilindi. Hajmi: {len(audio_bytes)} bayt")
    
    if not MODEL_STATUS["stt"]:
        logger.error("STT modeli yuklanmagan.")
        raise RuntimeError("STT modeli ishlamaydi")

    try:
        # 1-QADAM: FFmpeg yordamida formatni o'zgartirish (in-memory)
        # Biz FFmpeg'ga kiruvchi ma'lumotni stdin'dan olishni va
        # natijani stdout'ga 16kHz'li WAV formatida chiqarishni buyuramiz.
        ffmpeg_command = [
            "ffmpeg",
            "-i", "pipe:0",          # Kiruvchi ma'lumot standart kiritishdan (stdin)
            "-f", "wav",             # Chiquvchi format: WAV
            "-ac", "1",              # Kanallar soni: 1 (mono)
            "-ar", "16000",          # Chastota: 16000Hz (Whisper uchun standart)
            "pipe:1"                 # Chiquvchi ma'lumot standart chiqarishga (stdout)
        ]

        logger.info("FFmpeg bilan audio konvertatsiya boshlanmoqda...")
        process = subprocess.run(
            ffmpeg_command, 
            input=audio_bytes, 
            capture_output=True, 
            check=True
        )
        wav_audio_bytes = process.stdout
        logger.info(f"✅ FFmpeg muvaffaqiyatli yakunlandi. WAV hajmi: {len(wav_audio_bytes)} bayt.")

        # 2-QADAM: WAV baytlarini NumPy array'ga o'tkazish
        audio_stream = io.BytesIO(wav_audio_bytes)
        audio, sampling_rate = sf.read(audio_stream)
        logger.debug(f"WAV ma'lumot NumPy array'ga o'girildi. Shape: {audio.shape}, Rate: {sampling_rate}")

        # 3-QADAM: Whisper modeliga uzatish
        generate_kwargs = {"language": "uzbek", "task": "transcribe"}
        logger.info("🚀 Whisper modeliga transkripsiya uchun so'rov yuborilmoqda...")
        
        outputs = MODELS["stt_pipeline"](
            audio,
            chunk_length_s=30,
            generate_kwargs=generate_kwargs
        )
        result_text = outputs.get("text", "").strip()
        logger.info(f"✅ Transkripsiya yakunlandi. Natija: '{result_text}'")
        
        # ========== ✅ YANGI: RUSCHA KIRILGA O'TKAZISH ==========
        detected_lang = detect_language(result_text)
        
        if detected_lang == "rus":
            # Ruscha lotin → kirill konvertatsiya
            from app.utils.translit import russian_latin_to_cyrillic
            result_text_cyrillic = russian_latin_to_cyrillic(result_text)
            logger.info(f"🔄 Ruscha kirilga o'tkazildi: '{result_text_cyrillic}'")
            return result_text_cyrillic
        elif detected_lang == "uzb":
            # O'zbekcha - lotin qoldiramiz (kerak bo'lsa kirilga o'tkazish mumkin)
            return result_text
        else:
            # Ingliz yoki boshqa tillar - o'zgartirmasdan qaytarish
            return result_text

    except subprocess.CalledProcessError as e:
        # FFmpeg xatolik bersa, uni log'ga yozamiz
        logger.error(f"❌ FFmpeg xatoligi: {e.stderr.decode()}", exc_info=True)
        raise RuntimeError(f"FFmpeg audio konvertatsiya qila olmadi.")
    except Exception as e:
        logger.error(f"❌ STT transkripsiya (baytlardan) xatoligi: {e}", exc_info=True)
        raise e


def transcribe_audio(audio_path: str) -> Generator[str, None, None]:
    """
    Audio faylni o'qib, uni matnga aylantiradi
    
    Args:
        audio_path: Audio fayl yo'li
        
    Yields:
        str: Transkripsiya qilingan matn
    """
    try:
        logger.info(f"Fayldan audio o'qilmoqda: {audio_path}")
        with open(audio_path, "rb") as f:
            audio_bytes = f.read()
        
        text_piece = transcribe_audio_from_bytes(audio_bytes)
        
        if text_piece:
            yield text_piece
        else:
            yield "Ovoz aniqlanmadi"

    except FileNotFoundError as e:
        logger.error(f"❌ Fayl topilmadi: {e}")
        yield f"Fayl topilmadi: {str(e)}"
        
    except ValueError as e:
        logger.error(f"❌ Validatsiya xatoligi: {e}")
        yield f"Xatolik: {str(e)}"
        
    except Exception as e:
        logger.error(f"❌ Fayldan STT transkripsiya xatoligi: {e}", exc_info=True)
        yield f"Ovozni tanishda xatolik: {str(e)}"


# ==================== JSON PARSING (ROBUST) ====================

def extract_json_from_response(response_text: str) -> Dict:
    """
    LLM javobidan JSON'ni ajratib oladi (ROBUST va xatoliklarga bardoshli)
    
    VAZIFA-1: Bu funksiya Gemini'dan kelgan javobni har qanday formatda bo'lsa ham
    JSON'ga parse qilishga harakat qiladi. Agar parse qilib bo'lmasa, default javob qaytaradi.
    
    Args:
        response_text: Gemini'dan kelgan raw text
        
    Returns:
        Dict: Parse qilingan JSON yoki default response
    """
    try:
        # 1. To'g'ridan-to'g'ri parse qilishga harakat
        try:
            return json.loads(response_text)
        except json.JSONDecodeError:
            pass
        
        # 2. {...} qavslar ichini topishga harakat (nested brackets ham)
        json_match = re.search(r'\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}', response_text, re.DOTALL)
        
        if json_match:
            json_str = json_match.group(0)
            try:
                return json.loads(json_str)
            except json.JSONDecodeError:
                pass
        
        # 3. ```json ... ``` code block ichini topishga harakat
        code_block_match = re.search(r'```(?:json)?\s*(\{.*?\})\s*```', response_text, re.DOTALL)
        if code_block_match:
            json_str = code_block_match.group(1)
            try:
                return json.loads(json_str)
            except json.JSONDecodeError:
                pass
        
        # 4. Hech narsa topilmasa - default qaytarish
        logger.warning(f"⚠️ JSON topilmadi, default qaytarilyapti. Response: {response_text[:200]}...")
        return {
            "risk_level": "sariq",
            "response_text": "Kechirasiz, javobni qayta ishlashda muammo yuz berdi. Iltimos, boshqacha aytib ko'ring.",
            "language": "uzb",
            "address_extracted": None,
            "district_extracted": None,
            "symptoms_extracted": None,
            "analysis_notes": "JSON parsing failed, returned default"
        }
        
    except Exception as e:
        logger.error(f"❌ JSON parsing xatoligi: {e}")
        return {
            "risk_level": "sariq",
            "response_text": "Texnik xatolik yuz berdi. Iltimos, qayta urinib ko'ring.",
            "language": "uzb",
            "address_extracted": None,
            "district_extracted": None,
            "symptoms_extracted": None,
            "analysis_notes": f"Exception in JSON parsing: {str(e)}"
        }


# ==================== LLM (Gemini) ====================

def get_gemini_response(prompt: str, stream: bool = False) -> Dict:
    """
    Gemini dan javob oladi va uni ROBUST JSON sifatida tahlil qiladi
    
    VAZIFA-1 & VAZIFA-2: Bu funksiya Gemini'ga so'rov yuborib, javobni
    xatoliklarga bardoshli tarzda JSON'ga parse qiladi va kerakli maydonlarni
    to'ldiradi (shu jumladan "language" maydoni).
    
    Args:
        prompt: Bemorning so'rovi va suhbat tarixi
        stream: Stream rejimi (hozircha qo'llab-quvvatlanmaydi)
        
    Returns:
        Dict: Parse qilingan va validatsiya qilingan JSON
    """
    try:
        if not MODEL_STATUS["llm"]:
            raise RuntimeError("LLM modeli ishlamaydi")
        
        if stream:
            raise NotImplementedError("JSON tahlili uchun stream rejimi qo'llab-quvvatlanmaydi")

        full_prompt = f"{SYSTEM_INSTRUCTION}\n\nSuhbat Tarixi:\n{prompt}"
        
        logger.info("🧠 Gemini'ga so'rov yuborilmoqda...")
        response = MODELS["llm"].generate_content(full_prompt)
        
        logger.info(f"✅ Gemini javobi qabul qilindi ({len(response.text)} belgi)")
        logger.debug(f"Raw response: {response.text[:200]}...")
        
        # ROBUST JSON PARSING (VAZIFA-1)
        response_data = extract_json_from_response(response.text)
        
        # VAZIFA-2: Kerakli maydonlar mavjudligini tekshirish va default qiymatlar
        if "risk_level" not in response_data or response_data["risk_level"] not in ["qizil", "sariq", "yashil"]:
            logger.warning(f"⚠️ risk_level noto'g'ri: {response_data.get('risk_level')}, default: sariq")
            response_data["risk_level"] = "sariq"
        
        if "response_text" not in response_data or not response_data["response_text"].strip():
            logger.warning("⚠️ Gemini 'response_text' maydonini qaytarmadi. Fallback javob shakllantirilmoqda.")
            
            risk = response_data.get("risk_level")
            action = response_data.get("action")
            lang = response_data.get("language", "uzb") # Tilni ham hisobga olamiz
            
            # Vaziyatga qarab aqlli javob berish
            if risk == "yashil" and action == "offer_doctor_recommendation":
                if lang == "rus":
                    response_data["response_text"] = "Понятно, не о чем беспокоиться. Хотите, я порекомендую вам подходящего врача?"
                elif lang == "eng":
                    response_data["response_text"] = "I understand, no need to worry. Would you like me to recommend a suitable doctor for you?"
                else: # uzb
                    response_data["response_text"] = "Tushunarli, xavotirga o'rin yo'q. Sizga mos shifokor tavsiya qilishimni xohlaysizmi?"
            
            elif risk in ["qizil", "sariq"]:
                if lang == "rus":
                    response_data["response_text"] = "Понимаю, это серьезно. Пожалуйста, назовите ваш точный адрес, мы отправляем бригаду."
                elif lang == "eng":
                    response_data["response_text"] = "I understand, this is serious. Please tell me your exact address, we are sending a team."
                else: # uzb
                    response_data["response_text"] = "Tushundim, bu jiddiy holat. Iltimos, aniq manzilingizni ayting, brigada yuboryapmiz."

            else:
                # Agar hech qaysi holatga tushmasa, umumiy javob
                if lang == "rus":
                    response_data["response_text"] = "Извините, я не совсем вас поняла. Пожалуйста, повторите."
                elif lang == "eng":
                    response_data["response_text"] = "Sorry, I didn't quite understand. Please repeat."
                else: # uzb
                    response_data["response_text"] = "Kechirasiz, sizni to'liq tushunmadim. Iltimos, qaytadan ayting."
        
        # VAZIFA-2: CRITICAL - "language" maydoni ALBATTA BO'LISHI KERAK
        if "language" not in response_data or response_data["language"] not in ["uzb", "eng", "rus"]:
            # Fallback: response_text dan til aniqlash
            detected_lang = detect_language(response_data.get("response_text", ""))
            logger.warning(f"⚠️ Gemini 'language' qaytarmadi, fallback: {detected_lang}")
            response_data["language"] = detected_lang
        
        # Qolgan maydonlarni tekshirish
        for field in ["address_extracted", "district_extracted", "symptoms_extracted", "pre_arrival_instruction_text", "analysis_notes"]:
            if field not in response_data:
                response_data[field] = None
        
        logger.info(f"✅ Gemini javobi to'liq validatsiya qilindi: risk={response_data['risk_level']}, lang={response_data['language']}")
        
        return response_data
        
    except json.JSONDecodeError as e:
        logger.error(f"❌ JSON decode xatoligi: {e}")
        logger.error(f"Response matn: {response.text if 'response' in locals() else 'N/A'}")
        
        return {
            "risk_level": "sariq",
            "response_text": "Kechirasiz, javobni qayta ishlashda muammo yuz berdi. Iltimos, boshqacha aytib ko'ring.",
            "language": "uzb",
            "address_extracted": None,
            "district_extracted": None,
            "symptoms_extracted": None,
            "analysis_notes": f"JSON decode error: {str(e)}"
        }
            
    except Exception as e:
        logger.error(f"❌ LLM kutilmagan xatolik: {e}", exc_info=True)
        
        return {
            "risk_level": "sariq",
            "response_text": "Texnik xatolik yuz berdi. Iltimos, bir oz kuting va qayta urinib ko'ring.",
            "language": "uzb",
            "address_extracted": None,
            "district_extracted": None,
            "symptoms_extracted": None,
            "analysis_notes": f"Unexpected error: {str(e)}"
        }


# ==================== TTS (Text-to-Speech) ====================

def synthesize_speech(text: str, output_path: str, language: str = "uzb") -> bool:
    """
    Matnni ovozga aylantiradi (KO'P TILLI: uzb, eng, rus)
    
    YANGILANGAN: 
    - output_path validatsiyasi qo'shildi
    - Ruscha lotin → kirill konverter qo'shildi
    
    Args:
        text: Ovozga aylantirilishi kerak bo'lgan matn
        output_path: Saqlash uchun fayl yo'li (masalan: "static/audio/tts_case_025.wav")
        language: "uzb" | "eng" | "rus"
        
    Returns:
        bool: Muvaffaqiyatli bo'lsa True, aks holda False
    """
    try:
        # ========== VALIDATSIYA ==========
        
        # output_path tekshirish
        if not output_path or not output_path.strip():
            logger.error("❌ output_path bo'sh!")
            return False
        
        if not output_path.endswith('.wav'):
            logger.warning(f"⚠️ output_path .wav bilan tugamaydi: {output_path}")
            output_path += '.wav'
        
        # Model mavjudligini tekshirish
        if language == "eng" and not MODEL_STATUS.get("tts_eng", False):
            logger.warning("⚠️ TTS_ENG modeli yo'q, TTS_UZB ishlatilmoqda")
            language = "uzb"
        
        if language == "rus" and not MODEL_STATUS.get("tts_rus", False):
            logger.warning("⚠️ TTS_RUS modeli yo'q, TTS_UZB ishlatilmoqda")
            language = "uzb"
        
        if language == "uzb" and not MODEL_STATUS.get("tts_uzb", False):
            raise RuntimeError("TTS_UZB modeli ishlamaydi")
        
        if not text or not text.strip():
            raise ValueError("Bo'sh matn ovozga aylantirilmaydi")
        
        # Matn uzunligini tekshirish
        if len(text) > 1000:
            logger.warning(f"⚠️ Matn juda uzun ({len(text)} belgi), qisqartirilmoqda...")
            text = text[:1000] + "..."
        
        # ========== MATNNI TAYYORLASH ==========
        
        if language == "uzb":
            # O'zbekcha uchun kirill kerak
            from app.utils.translit import lotin_to_kirill, clean_cyrillic_text
            cyrillic_text = lotin_to_kirill(text)
            cleaned_text = clean_cyrillic_text(cyrillic_text)
            
        elif language == "eng":
            # Inglizcha uchun faqat tozalash
            cleaned_text = text.strip()
            
        elif language == "rus":
            # ✅ YANGI: Ruscha uchun kirill kerak
            from app.utils.translit import russian_latin_to_cyrillic, clean_cyrillic_text
            
            # Kirill nisbatini tekshirish
            cyrillic_count = sum(1 for c in text if '\u0400' <= c <= '\u04FF')
            total_chars = len([c for c in text if c.isalpha()])
            cyrillic_ratio = cyrillic_count / max(total_chars, 1)
            
            if cyrillic_ratio < 0.5:  # Agar 50%dan kam kirill bo'lsa
                logger.info("🔄 Ruscha matn lotindan kirilga o'tkazilmoqda...")
                text = russian_latin_to_cyrillic(text)
            
            cleaned_text = clean_cyrillic_text(text)
            
        else:
            cleaned_text = text.strip()
        
        if not cleaned_text.strip():
            raise ValueError("Tozalangan matn bo'sh")
        
        logger.info(f"🗣️ TTS ({language.upper()}): '{cleaned_text[:50]}{'...' if len(cleaned_text) > 50 else ''}'")

        # ========== MODEL TANLASH ==========
        
        pipeline_key = f"tts_{language}_pipeline"
        if pipeline_key not in MODELS:
            raise RuntimeError(f"{pipeline_key} topilmadi")

        # ========== OVOZ GENERATSIYA ==========
        
        output = MODELS[pipeline_key](cleaned_text)
        
        # Audio formatini to'g'rilash
        import torch
        import torchaudio
        
        audio_data = torch.tensor(output["audio"])
        
        if audio_data.dim() == 3:
            audio_data = audio_data.squeeze(0)
        elif audio_data.dim() == 1:
            audio_data = audio_data.unsqueeze(0)
        
        # ========== FAYLGA SAQLASH ==========
        
        # Papka yaratish (xavfsiz)
        output_dir = os.path.dirname(output_path)
        if output_dir:  # Bo'sh bo'lsa yaratmaydi
            os.makedirs(output_dir, exist_ok=True)
            logger.info(f"📁 Papka tekshirildi: {output_dir}")
        
        # Audio faylni saqlash
        torchaudio.save(
            output_path,
            src=audio_data,
            sample_rate=output["sampling_rate"]
        )
        
        logger.info(f"✅ Ovoz fayli saqlandi: {output_path}")
        return True
        
    except Exception as e:
        logger.error(f"❌ TTS xatoligi: {e}", exc_info=True)
        return False