Spaces:
Paused
Paused
| # app/services/models.py - TO'LIQ YAXSHILANGAN VERSIYA | |
| """ | |
| AI Modellari (STT, TTS, LLM) bilan ishlash uchun funksiyalar | |
| YAXSHILANISHLAR: | |
| 1. β JSON Parsing - ROBUST va xatoliklarga bardoshli | |
| 2. β Multi-language - To'liq 3 til qo'llab-quvvatlash (uzb, eng, rus) | |
| 3. β TTS - Speed o'chirildi, faqat til parametri | |
| 4. β Error handling - Hamma joyda try-except | |
| 5. β Fallback responses - Xatolik bo'lsa default javob qaytarish | |
| """ | |
| import subprocess | |
| import numpy as np | |
| import soundfile as sf | |
| import io | |
| import os | |
| import torch | |
| import torchaudio | |
| import google.generativeai as genai | |
| import logging | |
| import json | |
| import re | |
| from typing import Optional, Generator, Dict | |
| from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor | |
| from app.core.config import GEMINI_API_KEY, SYSTEM_INSTRUCTION | |
| from app.utils.translit import lotin_to_kirill, clean_cyrillic_text | |
| # Logging sozlash | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| # --- SOZLAMALAR --- | |
| genai.configure(api_key=GEMINI_API_KEY) | |
| DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu" | |
| TORCH_DTYPE = torch.float16 if torch.cuda.is_available() else torch.float32 | |
| # Model holati tekshirish | |
| MODELS = {} | |
| MODEL_STATUS = { | |
| "stt": False, | |
| "tts_uzb": False, | |
| "tts_eng": False, | |
| "tts_rus": False, | |
| "llm": False | |
| } | |
| # Audio format validatsiyasi | |
| SUPPORTED_AUDIO_FORMATS = { | |
| 'webm': 'audio/webm', | |
| 'wav': 'audio/wav', | |
| 'mp3': 'audio/mpeg', | |
| 'ogg': 'audio/ogg', | |
| 'm4a': 'audio/mp4' | |
| } | |
| MAX_AUDIO_SIZE = 100 * 1024 * 1024 # 100MB | |
| MIN_AUDIO_DURATION = 0.5 # 0.5 sekund | |
| MAX_AUDIO_DURATION = 300 # 5 minut | |
| # ==================== MODEL YUKLASH ==================== | |
| def load_models(): | |
| """Barcha modellarni yuklab, statuslarini yangilaydi""" | |
| logger.info("π Modellar Hugging Face Hub'dan yuklanmoqda...") | |
| # ========== STT MODELI ========== | |
| try: | |
| logger.info(" π₯ STT (islomov/rubaistt_v2_medium) modeli yuklanmoqda...") | |
| stt_model_id = "islomov/rubaistt_v2_medium" | |
| stt_model = AutoModelForSpeechSeq2Seq.from_pretrained( | |
| stt_model_id, | |
| torch_dtype=TORCH_DTYPE, | |
| low_cpu_mem_usage=True, | |
| use_safetensors=True | |
| ) | |
| stt_model.to(DEVICE) | |
| stt_processor = AutoProcessor.from_pretrained(stt_model_id) | |
| MODELS["stt_pipeline"] = pipeline( | |
| "automatic-speech-recognition", | |
| model=stt_model, | |
| tokenizer=stt_processor.tokenizer, | |
| feature_extractor=stt_processor.feature_extractor, | |
| max_new_tokens=128, | |
| torch_dtype=TORCH_DTYPE, | |
| device=DEVICE, | |
| ) | |
| MODEL_STATUS["stt"] = True | |
| logger.info(" β STT modeli tayyor") | |
| except Exception as e: | |
| logger.error(f" β STT modelini yuklashda xatolik: {e}") | |
| MODEL_STATUS["stt"] = False | |
| # ========== TTS MODELLARI (3 ta: uzb, eng, rus) ========== | |
| # TTS O'ZBEKCHA | |
| try: | |
| logger.info(" π§ TTS O'ZBEKCHA (facebook/mms-tts-uzb-script_cyrillic) modeli yuklanmoqda...") | |
| tts_model_path = "facebook/mms-tts-uzb-script_cyrillic" | |
| MODELS["tts_uzb_pipeline"] = pipeline( | |
| "text-to-speech", | |
| model=tts_model_path, | |
| device=DEVICE | |
| ) | |
| MODEL_STATUS["tts_uzb"] = True | |
| logger.info(" β TTS O'ZBEK modeli tayyor") | |
| except Exception as e: | |
| logger.error(f" β TTS O'ZBEK modelini yuklashda xatolik: {e}") | |
| MODEL_STATUS["tts_uzb"] = False | |
| # TTS INGLIZCHA | |
| try: | |
| logger.info(" π§ TTS INGLIZCHA (facebook/mms-tts-eng) modeli yuklanmoqda...") | |
| tts_eng_path = "facebook/mms-tts-eng" | |
| MODELS["tts_eng_pipeline"] = pipeline( | |
| "text-to-speech", | |
| model=tts_eng_path, | |
| device=DEVICE | |
| ) | |
| MODEL_STATUS["tts_eng"] = True | |
| logger.info(" β TTS INGLIZ modeli tayyor") | |
| except Exception as e: | |
| logger.error(f" β TTS INGLIZ modelini yuklashda xatolik: {e}") | |
| MODEL_STATUS["tts_eng"] = False | |
| # TTS RUSCHA | |
| try: | |
| logger.info(" π§ TTS RUSCHA (facebook/mms-tts-rus) modeli yuklanmoqda...") | |
| tts_rus_path = "facebook/mms-tts-rus" | |
| MODELS["tts_rus_pipeline"] = pipeline( | |
| "text-to-speech", | |
| model=tts_rus_path, | |
| device=DEVICE | |
| ) | |
| MODEL_STATUS["tts_rus"] = True | |
| logger.info(" β TTS RUS modeli tayyor") | |
| except Exception as e: | |
| logger.error(f" β TTS RUS modelini yuklashda xatolik: {e}") | |
| MODEL_STATUS["tts_rus"] = False | |
| # ========== LLM MODELI (Gemini) ========== | |
| try: | |
| logger.info(" π§ LLM (Gemini) modeli yuklanmoqda...") | |
| # Sizning kodingizda "gemini-2.0-flash-exp" ishlatilgan ekan, shuni qoldiramiz | |
| MODELS["llm"] = genai.GenerativeModel("gemini-2.0-flash-exp") | |
| MODEL_STATUS["llm"] = True | |
| logger.info(" β LLM modeli tayyor") | |
| except Exception as e: | |
| logger.error(f" β LLM modelini yuklashda xatolik: {e}") | |
| MODEL_STATUS["llm"] = False | |
| # ========== NATIJA ========== | |
| if not any(MODEL_STATUS.values()): | |
| raise RuntimeError("β CRITICAL: Hech qanday model yuklanmadi. Loyiha ishlay olmaydi.") | |
| logger.info("=" * 60) | |
| logger.info("β Modellar yuklash yakunlandi:") | |
| for model_name, status in MODEL_STATUS.items(): | |
| logger.info(f" {model_name}: {'β Tayyor' if status else 'β Yuklanmadi'}") | |
| logger.info("=" * 60) | |
| def check_model_status() -> dict: | |
| """Model holatlarini qaytaradi""" | |
| return MODEL_STATUS.copy() | |
| # ==================== TIL ANIQLASH ==================== | |
| def detect_language(text: str) -> str: | |
| """ | |
| Matndan tilni aniqlaydi | |
| Args: | |
| text: Tahlil qilinadigan matn | |
| Returns: | |
| "uzb" | "eng" | "rus" | |
| """ | |
| if not text or len(text.strip()) < 3: | |
| return "uzb" # Default o'zbekcha | |
| text_lower = text.lower() | |
| # Ingliz tilining kalit so'zlari | |
| english_keywords = [ | |
| 'hello', 'help', 'my', 'heart', 'pain', 'can\'t', 'breathe', | |
| 'chest', 'head', 'stomach', 'feel', 'sick', 'please', | |
| 'i', 'am', 'the', 'and', 'have', 'is', 'it', 'hurts' | |
| ] | |
| # Rus tilining kalit so'zlari | |
| russian_keywords = [ | |
| 'ΠΏΡΠΈΠ²Π΅Ρ', 'ΠΏΠΎΠΌΠΎΠ³ΠΈΡΠ΅', 'Π±ΠΎΠ»ΠΈΡ', 'ΡΠ΅ΡΠ΄ΡΠ΅', 'Π³ΠΎΠ»ΠΎΠ²Π°', 'ΠΆΠΈΠ²ΠΎΡ', | |
| 'Π½Π΅ ΠΌΠΎΠ³Ρ', 'Π΄ΡΡΠ°ΡΡ', 'ΠΏΠΎΠΌΠΎΡΡ', 'ΡΠ΅ΠΌΠΏΠ΅ΡΠ°ΡΡΡΠ°', 'Ρ ΠΌΠ΅Π½Ρ', | |
| 'Ρ', 'ΠΌΠ½Π΅', 'ΠΎΡΠ΅Π½Ρ', 'ΠΏΠ»ΠΎΡ ΠΎ' | |
| ] | |
| # O'zbek tilining kalit so'zlari | |
| uzbek_keywords = [ | |
| 'salom', 'assalomu', 'yordam', 'yurak', 'bosh', 'qorin', | |
| 'og\'rig\'i', 'nafas', 'harorat', 'yomon', 'bemor', | |
| 'menga', 'men', 'juda' | |
| ] | |
| # Kirill alifbosini tekshirish | |
| cyrillic_chars = sum(1 for c in text if '\u0400' <= c <= '\u04FF') | |
| total_chars = len([c for c in text if c.isalpha()]) | |
| if total_chars > 0: | |
| cyrillic_ratio = cyrillic_chars / total_chars | |
| # Agar 50%+ kirill bo'lsa | |
| if cyrillic_ratio > 0.5: | |
| # Rus yoki o'zbek kirill | |
| rus_count = sum(1 for keyword in russian_keywords if keyword in text_lower) | |
| uzb_count = sum(1 for keyword in uzbek_keywords if keyword in text_lower) | |
| if rus_count > uzb_count: | |
| return "rus" | |
| else: | |
| return "uzb" | |
| # Lotin alifbosi - ingliz yoki o'zbek | |
| eng_count = sum(1 for keyword in english_keywords if keyword in text_lower) | |
| uzb_count = sum(1 for keyword in uzbek_keywords if keyword in text_lower) | |
| if eng_count > uzb_count and eng_count >= 2: | |
| return "eng" | |
| # Default: o'zbekcha | |
| return "uzb" | |
| # ==================== STT (Speech-to-Text) ==================== | |
| def transcribe_audio_from_bytes(audio_bytes: bytes) -> str: | |
| """ | |
| Xotiradagi audio baytlarni (WEBM, MP3, etc) qabul qilib, | |
| FFmpeg yordamida WAV formatiga o'giradi va matnga aylantiradi. | |
| YANGILANGAN: Ruscha transkripsiyani avtomatik kirilga o'tkazadi | |
| Args: | |
| audio_bytes: Audio baytlar | |
| Returns: | |
| Transkripsiya qilingan matn (ruscha bo'lsa kirill formatda) | |
| """ | |
| logger.info(f"ποΈ Audio baytlar transkripsiya uchun qabul qilindi. Hajmi: {len(audio_bytes)} bayt") | |
| if not MODEL_STATUS["stt"]: | |
| logger.error("STT modeli yuklanmagan.") | |
| raise RuntimeError("STT modeli ishlamaydi") | |
| try: | |
| # 1-QADAM: FFmpeg yordamida formatni o'zgartirish (in-memory) | |
| # Biz FFmpeg'ga kiruvchi ma'lumotni stdin'dan olishni va | |
| # natijani stdout'ga 16kHz'li WAV formatida chiqarishni buyuramiz. | |
| ffmpeg_command = [ | |
| "ffmpeg", | |
| "-i", "pipe:0", # Kiruvchi ma'lumot standart kiritishdan (stdin) | |
| "-f", "wav", # Chiquvchi format: WAV | |
| "-ac", "1", # Kanallar soni: 1 (mono) | |
| "-ar", "16000", # Chastota: 16000Hz (Whisper uchun standart) | |
| "pipe:1" # Chiquvchi ma'lumot standart chiqarishga (stdout) | |
| ] | |
| logger.info("FFmpeg bilan audio konvertatsiya boshlanmoqda...") | |
| process = subprocess.run( | |
| ffmpeg_command, | |
| input=audio_bytes, | |
| capture_output=True, | |
| check=True | |
| ) | |
| wav_audio_bytes = process.stdout | |
| logger.info(f"β FFmpeg muvaffaqiyatli yakunlandi. WAV hajmi: {len(wav_audio_bytes)} bayt.") | |
| # 2-QADAM: WAV baytlarini NumPy array'ga o'tkazish | |
| audio_stream = io.BytesIO(wav_audio_bytes) | |
| audio, sampling_rate = sf.read(audio_stream) | |
| logger.debug(f"WAV ma'lumot NumPy array'ga o'girildi. Shape: {audio.shape}, Rate: {sampling_rate}") | |
| # 3-QADAM: Whisper modeliga uzatish | |
| generate_kwargs = {"language": "uzbek", "task": "transcribe"} | |
| logger.info("π Whisper modeliga transkripsiya uchun so'rov yuborilmoqda...") | |
| outputs = MODELS["stt_pipeline"]( | |
| audio, | |
| chunk_length_s=30, | |
| generate_kwargs=generate_kwargs | |
| ) | |
| result_text = outputs.get("text", "").strip() | |
| logger.info(f"β Transkripsiya yakunlandi. Natija: '{result_text}'") | |
| # ========== β YANGI: RUSCHA KIRILGA O'TKAZISH ========== | |
| detected_lang = detect_language(result_text) | |
| if detected_lang == "rus": | |
| # Ruscha lotin β kirill konvertatsiya | |
| from app.utils.translit import russian_latin_to_cyrillic | |
| result_text_cyrillic = russian_latin_to_cyrillic(result_text) | |
| logger.info(f"π Ruscha kirilga o'tkazildi: '{result_text_cyrillic}'") | |
| return result_text_cyrillic | |
| elif detected_lang == "uzb": | |
| # O'zbekcha - lotin qoldiramiz (kerak bo'lsa kirilga o'tkazish mumkin) | |
| return result_text | |
| else: | |
| # Ingliz yoki boshqa tillar - o'zgartirmasdan qaytarish | |
| return result_text | |
| except subprocess.CalledProcessError as e: | |
| # FFmpeg xatolik bersa, uni log'ga yozamiz | |
| logger.error(f"β FFmpeg xatoligi: {e.stderr.decode()}", exc_info=True) | |
| raise RuntimeError(f"FFmpeg audio konvertatsiya qila olmadi.") | |
| except Exception as e: | |
| logger.error(f"β STT transkripsiya (baytlardan) xatoligi: {e}", exc_info=True) | |
| raise e | |
| def transcribe_audio(audio_path: str) -> Generator[str, None, None]: | |
| """ | |
| Audio faylni o'qib, uni matnga aylantiradi | |
| Args: | |
| audio_path: Audio fayl yo'li | |
| Yields: | |
| str: Transkripsiya qilingan matn | |
| """ | |
| try: | |
| logger.info(f"Fayldan audio o'qilmoqda: {audio_path}") | |
| with open(audio_path, "rb") as f: | |
| audio_bytes = f.read() | |
| text_piece = transcribe_audio_from_bytes(audio_bytes) | |
| if text_piece: | |
| yield text_piece | |
| else: | |
| yield "Ovoz aniqlanmadi" | |
| except FileNotFoundError as e: | |
| logger.error(f"β Fayl topilmadi: {e}") | |
| yield f"Fayl topilmadi: {str(e)}" | |
| except ValueError as e: | |
| logger.error(f"β Validatsiya xatoligi: {e}") | |
| yield f"Xatolik: {str(e)}" | |
| except Exception as e: | |
| logger.error(f"β Fayldan STT transkripsiya xatoligi: {e}", exc_info=True) | |
| yield f"Ovozni tanishda xatolik: {str(e)}" | |
| # ==================== JSON PARSING (ROBUST) ==================== | |
| def extract_json_from_response(response_text: str) -> Dict: | |
| """ | |
| LLM javobidan JSON'ni ajratib oladi (ROBUST va xatoliklarga bardoshli) | |
| VAZIFA-1: Bu funksiya Gemini'dan kelgan javobni har qanday formatda bo'lsa ham | |
| JSON'ga parse qilishga harakat qiladi. Agar parse qilib bo'lmasa, default javob qaytaradi. | |
| Args: | |
| response_text: Gemini'dan kelgan raw text | |
| Returns: | |
| Dict: Parse qilingan JSON yoki default response | |
| """ | |
| try: | |
| # 1. To'g'ridan-to'g'ri parse qilishga harakat | |
| try: | |
| return json.loads(response_text) | |
| except json.JSONDecodeError: | |
| pass | |
| # 2. {...} qavslar ichini topishga harakat (nested brackets ham) | |
| json_match = re.search(r'\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}', response_text, re.DOTALL) | |
| if json_match: | |
| json_str = json_match.group(0) | |
| try: | |
| return json.loads(json_str) | |
| except json.JSONDecodeError: | |
| pass | |
| # 3. ```json ... ``` code block ichini topishga harakat | |
| code_block_match = re.search(r'```(?:json)?\s*(\{.*?\})\s*```', response_text, re.DOTALL) | |
| if code_block_match: | |
| json_str = code_block_match.group(1) | |
| try: | |
| return json.loads(json_str) | |
| except json.JSONDecodeError: | |
| pass | |
| # 4. Hech narsa topilmasa - default qaytarish | |
| logger.warning(f"β οΈ JSON topilmadi, default qaytarilyapti. Response: {response_text[:200]}...") | |
| return { | |
| "risk_level": "sariq", | |
| "response_text": "Kechirasiz, javobni qayta ishlashda muammo yuz berdi. Iltimos, boshqacha aytib ko'ring.", | |
| "language": "uzb", | |
| "address_extracted": None, | |
| "district_extracted": None, | |
| "symptoms_extracted": None, | |
| "analysis_notes": "JSON parsing failed, returned default" | |
| } | |
| except Exception as e: | |
| logger.error(f"β JSON parsing xatoligi: {e}") | |
| return { | |
| "risk_level": "sariq", | |
| "response_text": "Texnik xatolik yuz berdi. Iltimos, qayta urinib ko'ring.", | |
| "language": "uzb", | |
| "address_extracted": None, | |
| "district_extracted": None, | |
| "symptoms_extracted": None, | |
| "analysis_notes": f"Exception in JSON parsing: {str(e)}" | |
| } | |
| # ==================== LLM (Gemini) ==================== | |
| def get_gemini_response(prompt: str, stream: bool = False) -> Dict: | |
| """ | |
| Gemini dan javob oladi va uni ROBUST JSON sifatida tahlil qiladi | |
| VAZIFA-1 & VAZIFA-2: Bu funksiya Gemini'ga so'rov yuborib, javobni | |
| xatoliklarga bardoshli tarzda JSON'ga parse qiladi va kerakli maydonlarni | |
| to'ldiradi (shu jumladan "language" maydoni). | |
| Args: | |
| prompt: Bemorning so'rovi va suhbat tarixi | |
| stream: Stream rejimi (hozircha qo'llab-quvvatlanmaydi) | |
| Returns: | |
| Dict: Parse qilingan va validatsiya qilingan JSON | |
| """ | |
| try: | |
| if not MODEL_STATUS["llm"]: | |
| raise RuntimeError("LLM modeli ishlamaydi") | |
| if stream: | |
| raise NotImplementedError("JSON tahlili uchun stream rejimi qo'llab-quvvatlanmaydi") | |
| full_prompt = f"{SYSTEM_INSTRUCTION}\n\nSuhbat Tarixi:\n{prompt}" | |
| logger.info("π§ Gemini'ga so'rov yuborilmoqda...") | |
| response = MODELS["llm"].generate_content(full_prompt) | |
| logger.info(f"β Gemini javobi qabul qilindi ({len(response.text)} belgi)") | |
| logger.debug(f"Raw response: {response.text[:200]}...") | |
| # ROBUST JSON PARSING (VAZIFA-1) | |
| response_data = extract_json_from_response(response.text) | |
| # VAZIFA-2: Kerakli maydonlar mavjudligini tekshirish va default qiymatlar | |
| if "risk_level" not in response_data or response_data["risk_level"] not in ["qizil", "sariq", "yashil"]: | |
| logger.warning(f"β οΈ risk_level noto'g'ri: {response_data.get('risk_level')}, default: sariq") | |
| response_data["risk_level"] = "sariq" | |
| if "response_text" not in response_data or not response_data["response_text"].strip(): | |
| logger.warning("β οΈ Gemini 'response_text' maydonini qaytarmadi. Fallback javob shakllantirilmoqda.") | |
| risk = response_data.get("risk_level") | |
| action = response_data.get("action") | |
| lang = response_data.get("language", "uzb") # Tilni ham hisobga olamiz | |
| # Vaziyatga qarab aqlli javob berish | |
| if risk == "yashil" and action == "offer_doctor_recommendation": | |
| if lang == "rus": | |
| response_data["response_text"] = "ΠΠΎΠ½ΡΡΠ½ΠΎ, Π½Π΅ ΠΎ ΡΠ΅ΠΌ Π±Π΅ΡΠΏΠΎΠΊΠΎΠΈΡΡΡΡ. Π₯ΠΎΡΠΈΡΠ΅, Ρ ΠΏΠΎΡΠ΅ΠΊΠΎΠΌΠ΅Π½Π΄ΡΡ Π²Π°ΠΌ ΠΏΠΎΠ΄Ρ ΠΎΠ΄ΡΡΠ΅Π³ΠΎ Π²ΡΠ°ΡΠ°?" | |
| elif lang == "eng": | |
| response_data["response_text"] = "I understand, no need to worry. Would you like me to recommend a suitable doctor for you?" | |
| else: # uzb | |
| response_data["response_text"] = "Tushunarli, xavotirga o'rin yo'q. Sizga mos shifokor tavsiya qilishimni xohlaysizmi?" | |
| elif risk in ["qizil", "sariq"]: | |
| if lang == "rus": | |
| response_data["response_text"] = "ΠΠΎΠ½ΠΈΠΌΠ°Ρ, ΡΡΠΎ ΡΠ΅ΡΡΠ΅Π·Π½ΠΎ. ΠΠΎΠΆΠ°Π»ΡΠΉΡΡΠ°, Π½Π°Π·ΠΎΠ²ΠΈΡΠ΅ Π²Π°Ρ ΡΠΎΡΠ½ΡΠΉ Π°Π΄ΡΠ΅Ρ, ΠΌΡ ΠΎΡΠΏΡΠ°Π²Π»ΡΠ΅ΠΌ Π±ΡΠΈΠ³Π°Π΄Ρ." | |
| elif lang == "eng": | |
| response_data["response_text"] = "I understand, this is serious. Please tell me your exact address, we are sending a team." | |
| else: # uzb | |
| response_data["response_text"] = "Tushundim, bu jiddiy holat. Iltimos, aniq manzilingizni ayting, brigada yuboryapmiz." | |
| else: | |
| # Agar hech qaysi holatga tushmasa, umumiy javob | |
| if lang == "rus": | |
| response_data["response_text"] = "ΠΠ·Π²ΠΈΠ½ΠΈΡΠ΅, Ρ Π½Π΅ ΡΠΎΠ²ΡΠ΅ΠΌ Π²Π°Ρ ΠΏΠΎΠ½ΡΠ»Π°. ΠΠΎΠΆΠ°Π»ΡΠΉΡΡΠ°, ΠΏΠΎΠ²ΡΠΎΡΠΈΡΠ΅." | |
| elif lang == "eng": | |
| response_data["response_text"] = "Sorry, I didn't quite understand. Please repeat." | |
| else: # uzb | |
| response_data["response_text"] = "Kechirasiz, sizni to'liq tushunmadim. Iltimos, qaytadan ayting." | |
| # VAZIFA-2: CRITICAL - "language" maydoni ALBATTA BO'LISHI KERAK | |
| if "language" not in response_data or response_data["language"] not in ["uzb", "eng", "rus"]: | |
| # Fallback: response_text dan til aniqlash | |
| detected_lang = detect_language(response_data.get("response_text", "")) | |
| logger.warning(f"β οΈ Gemini 'language' qaytarmadi, fallback: {detected_lang}") | |
| response_data["language"] = detected_lang | |
| # Qolgan maydonlarni tekshirish | |
| for field in ["address_extracted", "district_extracted", "symptoms_extracted", "pre_arrival_instruction_text", "analysis_notes"]: | |
| if field not in response_data: | |
| response_data[field] = None | |
| logger.info(f"β Gemini javobi to'liq validatsiya qilindi: risk={response_data['risk_level']}, lang={response_data['language']}") | |
| return response_data | |
| except json.JSONDecodeError as e: | |
| logger.error(f"β JSON decode xatoligi: {e}") | |
| logger.error(f"Response matn: {response.text if 'response' in locals() else 'N/A'}") | |
| return { | |
| "risk_level": "sariq", | |
| "response_text": "Kechirasiz, javobni qayta ishlashda muammo yuz berdi. Iltimos, boshqacha aytib ko'ring.", | |
| "language": "uzb", | |
| "address_extracted": None, | |
| "district_extracted": None, | |
| "symptoms_extracted": None, | |
| "analysis_notes": f"JSON decode error: {str(e)}" | |
| } | |
| except Exception as e: | |
| logger.error(f"β LLM kutilmagan xatolik: {e}", exc_info=True) | |
| return { | |
| "risk_level": "sariq", | |
| "response_text": "Texnik xatolik yuz berdi. Iltimos, bir oz kuting va qayta urinib ko'ring.", | |
| "language": "uzb", | |
| "address_extracted": None, | |
| "district_extracted": None, | |
| "symptoms_extracted": None, | |
| "analysis_notes": f"Unexpected error: {str(e)}" | |
| } | |
| # ==================== TTS (Text-to-Speech) ==================== | |
| def synthesize_speech(text: str, output_path: str, language: str = "uzb") -> bool: | |
| """ | |
| Matnni ovozga aylantiradi (KO'P TILLI: uzb, eng, rus) | |
| YANGILANGAN: | |
| - output_path validatsiyasi qo'shildi | |
| - Ruscha lotin β kirill konverter qo'shildi | |
| Args: | |
| text: Ovozga aylantirilishi kerak bo'lgan matn | |
| output_path: Saqlash uchun fayl yo'li (masalan: "static/audio/tts_case_025.wav") | |
| language: "uzb" | "eng" | "rus" | |
| Returns: | |
| bool: Muvaffaqiyatli bo'lsa True, aks holda False | |
| """ | |
| try: | |
| # ========== VALIDATSIYA ========== | |
| # output_path tekshirish | |
| if not output_path or not output_path.strip(): | |
| logger.error("β output_path bo'sh!") | |
| return False | |
| if not output_path.endswith('.wav'): | |
| logger.warning(f"β οΈ output_path .wav bilan tugamaydi: {output_path}") | |
| output_path += '.wav' | |
| # Model mavjudligini tekshirish | |
| if language == "eng" and not MODEL_STATUS.get("tts_eng", False): | |
| logger.warning("β οΈ TTS_ENG modeli yo'q, TTS_UZB ishlatilmoqda") | |
| language = "uzb" | |
| if language == "rus" and not MODEL_STATUS.get("tts_rus", False): | |
| logger.warning("β οΈ TTS_RUS modeli yo'q, TTS_UZB ishlatilmoqda") | |
| language = "uzb" | |
| if language == "uzb" and not MODEL_STATUS.get("tts_uzb", False): | |
| raise RuntimeError("TTS_UZB modeli ishlamaydi") | |
| if not text or not text.strip(): | |
| raise ValueError("Bo'sh matn ovozga aylantirilmaydi") | |
| # Matn uzunligini tekshirish | |
| if len(text) > 1000: | |
| logger.warning(f"β οΈ Matn juda uzun ({len(text)} belgi), qisqartirilmoqda...") | |
| text = text[:1000] + "..." | |
| # ========== MATNNI TAYYORLASH ========== | |
| if language == "uzb": | |
| # O'zbekcha uchun kirill kerak | |
| from app.utils.translit import lotin_to_kirill, clean_cyrillic_text | |
| cyrillic_text = lotin_to_kirill(text) | |
| cleaned_text = clean_cyrillic_text(cyrillic_text) | |
| elif language == "eng": | |
| # Inglizcha uchun faqat tozalash | |
| cleaned_text = text.strip() | |
| elif language == "rus": | |
| # β YANGI: Ruscha uchun kirill kerak | |
| from app.utils.translit import russian_latin_to_cyrillic, clean_cyrillic_text | |
| # Kirill nisbatini tekshirish | |
| cyrillic_count = sum(1 for c in text if '\u0400' <= c <= '\u04FF') | |
| total_chars = len([c for c in text if c.isalpha()]) | |
| cyrillic_ratio = cyrillic_count / max(total_chars, 1) | |
| if cyrillic_ratio < 0.5: # Agar 50%dan kam kirill bo'lsa | |
| logger.info("π Ruscha matn lotindan kirilga o'tkazilmoqda...") | |
| text = russian_latin_to_cyrillic(text) | |
| cleaned_text = clean_cyrillic_text(text) | |
| else: | |
| cleaned_text = text.strip() | |
| if not cleaned_text.strip(): | |
| raise ValueError("Tozalangan matn bo'sh") | |
| logger.info(f"π£οΈ TTS ({language.upper()}): '{cleaned_text[:50]}{'...' if len(cleaned_text) > 50 else ''}'") | |
| # ========== MODEL TANLASH ========== | |
| pipeline_key = f"tts_{language}_pipeline" | |
| if pipeline_key not in MODELS: | |
| raise RuntimeError(f"{pipeline_key} topilmadi") | |
| # ========== OVOZ GENERATSIYA ========== | |
| output = MODELS[pipeline_key](cleaned_text) | |
| # Audio formatini to'g'rilash | |
| import torch | |
| import torchaudio | |
| audio_data = torch.tensor(output["audio"]) | |
| if audio_data.dim() == 3: | |
| audio_data = audio_data.squeeze(0) | |
| elif audio_data.dim() == 1: | |
| audio_data = audio_data.unsqueeze(0) | |
| # ========== FAYLGA SAQLASH ========== | |
| # Papka yaratish (xavfsiz) | |
| output_dir = os.path.dirname(output_path) | |
| if output_dir: # Bo'sh bo'lsa yaratmaydi | |
| os.makedirs(output_dir, exist_ok=True) | |
| logger.info(f"π Papka tekshirildi: {output_dir}") | |
| # Audio faylni saqlash | |
| torchaudio.save( | |
| output_path, | |
| src=audio_data, | |
| sample_rate=output["sampling_rate"] | |
| ) | |
| logger.info(f"β Ovoz fayli saqlandi: {output_path}") | |
| return True | |
| except Exception as e: | |
| logger.error(f"β TTS xatoligi: {e}", exc_info=True) | |
| return False |