|
|
|
|
|
""" |
|
|
VITO APIλ₯Ό μ¬μ©ν μμ± μΈμ(STT) λͺ¨λ |
|
|
""" |
|
|
|
|
|
import os |
|
|
import logging |
|
|
import requests |
|
|
import json |
|
|
import time |
|
|
from dotenv import load_dotenv |
|
|
|
|
|
|
|
|
load_dotenv() |
|
|
|
|
|
|
|
|
logger = logging.getLogger("VitoSTT") |
|
|
|
|
|
if not logger.hasHandlers(): |
|
|
handler = logging.StreamHandler() |
|
|
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') |
|
|
handler.setFormatter(formatter) |
|
|
logger.addHandler(handler) |
|
|
logger.setLevel(logging.INFO) |
|
|
|
|
|
class VitoSTT: |
|
|
"""VITO STT API λνΌ ν΄λμ€""" |
|
|
|
|
|
def __init__(self): |
|
|
"""VITO STT ν΄λμ€ μ΄κΈ°ν""" |
|
|
self.client_id = os.getenv("VITO_CLIENT_ID") |
|
|
self.client_secret = os.getenv("VITO_CLIENT_SECRET") |
|
|
|
|
|
if not self.client_id or not self.client_secret: |
|
|
logger.warning("VITO API μΈμ¦ μ λ³΄κ° .env νμΌμ μ€μ λμ§ μμμ΅λλ€.") |
|
|
logger.warning("VITO_CLIENT_IDμ VITO_CLIENT_SECRETλ₯Ό νμΈνμΈμ.") |
|
|
|
|
|
|
|
|
else: |
|
|
logger.info("VITO STT API ν΄λΌμ΄μΈνΈ ID/Secret λ‘λ μλ£.") |
|
|
|
|
|
|
|
|
self.token_url = "https://openapi.vito.ai/v1/authenticate" |
|
|
self.stt_url = "https://openapi.vito.ai/v1/transcribe" |
|
|
|
|
|
|
|
|
self.access_token = None |
|
|
self._token_expires_at = 0 |
|
|
|
|
|
def get_access_token(self): |
|
|
"""VITO API μ‘μΈμ€ ν ν° νλ""" |
|
|
|
|
|
now = time.time() |
|
|
if self.access_token and now < self._token_expires_at: |
|
|
logger.debug("κΈ°μ‘΄ VITO API ν ν° μ¬μ©") |
|
|
return self.access_token |
|
|
|
|
|
if not self.client_id or not self.client_secret: |
|
|
logger.error("API ν€κ° μ€μ λμ§ μμ ν ν°μ νλν μ μμ΅λλ€.") |
|
|
raise ValueError("VITO API μΈμ¦ μ λ³΄κ° μ€μ λμ§ μμμ΅λλ€.") |
|
|
|
|
|
logger.info("VITO API μ‘μΈμ€ ν ν° μμ² μ€...") |
|
|
try: |
|
|
response = requests.post( |
|
|
self.token_url, |
|
|
data={"client_id": self.client_id, "client_secret": self.client_secret}, |
|
|
timeout=10 |
|
|
) |
|
|
response.raise_for_status() |
|
|
|
|
|
result = response.json() |
|
|
self.access_token = result.get("access_token") |
|
|
expires_in = result.get("expires_in", 3600) |
|
|
self._token_expires_at = time.time() + expires_in - 60 |
|
|
|
|
|
if not self.access_token: |
|
|
logger.error("VITO API μλ΅μμ ν ν°μ μ°Ύμ μ μμ΅λλ€.") |
|
|
raise ValueError("VITO API ν ν°μ λ°μμ€μ§ λͺ»νμ΅λλ€.") |
|
|
|
|
|
logger.info("VITO API μ‘μΈμ€ ν ν° νλ μ±κ³΅") |
|
|
return self.access_token |
|
|
except requests.exceptions.Timeout: |
|
|
logger.error(f"VITO API ν ν° νλ μκ° μ΄κ³Ό: {self.token_url}") |
|
|
raise TimeoutError("VITO API ν ν° νλ μκ° μ΄κ³Ό") |
|
|
except requests.exceptions.RequestException as e: |
|
|
logger.error(f"VITO API ν ν° νλ μ€ν¨: {e}") |
|
|
if hasattr(e, 'response') and e.response is not None: |
|
|
logger.error(f"μλ΅ μ½λ: {e.response.status_code}, λ΄μ©: {e.response.text}") |
|
|
raise ConnectionError(f"VITO API ν ν° νλ μ€ν¨: {e}") |
|
|
|
|
|
|
|
|
def transcribe_audio(self, audio_bytes, language="ko"): |
|
|
""" |
|
|
μ€λμ€ λ°μ΄νΈ λ°μ΄ν°λ₯Ό ν
μ€νΈλ‘ λ³ν |
|
|
|
|
|
Args: |
|
|
audio_bytes: μ€λμ€ νμΌ λ°μ΄νΈ λ°μ΄ν° |
|
|
language: μΈμ΄ μ½λ (κΈ°λ³Έκ°: 'ko') |
|
|
|
|
|
Returns: |
|
|
μΈμλ ν
μ€νΈ λλ μ€λ₯ λ©μμ§λ₯Ό ν¬ν¨ν λμ
λ리 |
|
|
{'success': True, 'text': 'μΈμλ ν
μ€νΈ'} |
|
|
{'success': False, 'error': 'μ€λ₯ λ©μμ§', 'details': 'μμΈ λ΄μ©'} |
|
|
""" |
|
|
if not self.client_id or not self.client_secret: |
|
|
logger.error("API ν€κ° μ€μ λμ§ μμμ΅λλ€.") |
|
|
return {"success": False, "error": "API ν€κ° μ€μ λμ§ μμμ΅λλ€."} |
|
|
|
|
|
try: |
|
|
|
|
|
|
|
|
if not self.access_token or time.time() >= self._token_expires_at: |
|
|
logger.info("VITO API ν ν° νλ/κ°±μ μλ...") |
|
|
self.get_access_token() |
|
|
|
|
|
headers = { |
|
|
"Authorization": f"Bearer {self.access_token}" |
|
|
} |
|
|
|
|
|
files = { |
|
|
"file": ("audio_file", audio_bytes) |
|
|
} |
|
|
|
|
|
|
|
|
config = { |
|
|
"use_multi_channel": False, |
|
|
"use_itn": True, |
|
|
"use_disfluency_filter": True, |
|
|
"use_profanity_filter": False, |
|
|
"language": language, |
|
|
|
|
|
} |
|
|
data = {"config": json.dumps(config)} |
|
|
|
|
|
logger.info(f"VITO STT API ({self.stt_url}) μμ² μ μ‘ μ€...") |
|
|
response = requests.post( |
|
|
self.stt_url, |
|
|
headers=headers, |
|
|
files=files, |
|
|
data=data, |
|
|
timeout=20 |
|
|
) |
|
|
response.raise_for_status() |
|
|
|
|
|
result = response.json() |
|
|
job_id = result.get("id") |
|
|
|
|
|
if not job_id: |
|
|
logger.error("VITO API μμ
IDλ₯Ό λ°μμ€μ§ λͺ»νμ΅λλ€.") |
|
|
return {"success": False, "error": "VITO API μμ
IDλ₯Ό λ°μμ€μ§ λͺ»νμ΅λλ€."} |
|
|
|
|
|
logger.info(f"VITO STT μμ
ID: {job_id}, κ²°κ³Ό νμΈ μμ...") |
|
|
|
|
|
|
|
|
transcript_url = f"{self.stt_url}/{job_id}" |
|
|
max_tries = 15 |
|
|
wait_time = 2 |
|
|
|
|
|
for try_count in range(max_tries): |
|
|
time.sleep(wait_time) |
|
|
logger.debug(f"κ²°κ³Ό νμΈ μλ ({try_count + 1}/{max_tries}) - URL: {transcript_url}") |
|
|
get_response = requests.get( |
|
|
transcript_url, |
|
|
headers=headers, |
|
|
timeout=10 |
|
|
) |
|
|
get_response.raise_for_status() |
|
|
|
|
|
result = get_response.json() |
|
|
status = result.get("status") |
|
|
logger.debug(f"νμ¬ μν: {status}") |
|
|
|
|
|
if status == "completed": |
|
|
|
|
|
utterances = result.get("results", {}).get("utterances", []) |
|
|
if utterances: |
|
|
|
|
|
transcript = " ".join([seg.get("msg", "") for seg in utterances if seg.get("msg")]).strip() |
|
|
logger.info(f"VITO STT μΈμ μ±κ³΅ (μΌλΆ): {transcript[:50]}...") |
|
|
return { |
|
|
"success": True, |
|
|
"text": transcript |
|
|
|
|
|
} |
|
|
else: |
|
|
logger.warning("VITO STT μλ£λμμΌλ κ²°κ³Ό utterancesκ° λΉμ΄μμ΅λλ€.") |
|
|
return {"success": True, "text": ""} |
|
|
|
|
|
elif status == "failed": |
|
|
error_msg = f"VITO API λ³ν μ€ν¨: {result.get('message', 'μ μ μλ μ€λ₯')}" |
|
|
logger.error(error_msg) |
|
|
return {"success": False, "error": error_msg, "details": result} |
|
|
|
|
|
elif status == "transcribing": |
|
|
logger.info(f"VITO API μ²λ¦¬ μ€... ({try_count + 1}/{max_tries})") |
|
|
else: |
|
|
logger.info(f"VITO API μν '{status}', λκΈ° μ€... ({try_count + 1}/{max_tries})") |
|
|
|
|
|
|
|
|
logger.error(f"VITO API μλ΅ νμμμ ({max_tries * wait_time}μ΄ μ΄κ³Ό)") |
|
|
return {"success": False, "error": "VITO API μλ΅ νμμμ"} |
|
|
|
|
|
except requests.exceptions.HTTPError as e: |
|
|
|
|
|
if e.response.status_code == 401: |
|
|
logger.warning("VITO API ν ν°μ΄ λ§λ£λμκ±°λ μ ν¨νμ§ μμ΅λλ€. ν ν° μ¬λ°κΈ μλ...") |
|
|
self.access_token = None |
|
|
try: |
|
|
|
|
|
self.get_access_token() |
|
|
logger.info("μ ν ν°μΌλ‘ μ¬μλν©λλ€.") |
|
|
|
|
|
return self.transcribe_audio(audio_bytes, language) |
|
|
|
|
|
except Exception as token_e: |
|
|
logger.error(f"ν ν° μ¬νλ μ€ν¨: {token_e}") |
|
|
return {"success": False, "error": f"ν ν° μ¬νλ μ€ν¨: {str(token_e)}"} |
|
|
|
|
|
else: |
|
|
|
|
|
error_body = "" |
|
|
try: |
|
|
error_body = e.response.text |
|
|
except Exception: |
|
|
pass |
|
|
logger.error(f"VITO API HTTP μ€λ₯: {e.response.status_code}, μλ΅: {error_body}") |
|
|
return { |
|
|
"success": False, |
|
|
"error": f"API HTTP μ€λ₯: {e.response.status_code}", |
|
|
"details": error_body |
|
|
} |
|
|
|
|
|
except requests.exceptions.Timeout: |
|
|
logger.error("VITO API μμ² μκ° μ΄κ³Ό") |
|
|
return {"success": False, "error": "API μμ² μκ° μ΄κ³Ό"} |
|
|
except requests.exceptions.RequestException as e: |
|
|
logger.error(f"VITO API μμ² μ€ λ€νΈμν¬ μ€λ₯ λ°μ: {str(e)}") |
|
|
return {"success": False, "error": "API μμ² λ€νΈμν¬ μ€λ₯", "details": str(e)} |
|
|
except Exception as e: |
|
|
logger.error(f"μμ±μΈμ μ²λ¦¬ μ€ μμμΉ λͺ»ν μ€λ₯ λ°μ: {str(e)}", exc_info=True) |
|
|
return { |
|
|
"success": False, |
|
|
"error": "μμ±μΈμ λ΄λΆ μ²λ¦¬ μ€ν¨", |
|
|
"details": str(e) |
|
|
} |
|
|
|