Spaces:

speako
/

wav2vec2-server

Sleeping

App Files Files Community

wav2vec2-server / app.py

bigeco

Revert "feat: 기존의 정확도 대신 CRR로 계산 (#1)"

82307da 10 months ago

raw

history blame contribute delete

8.48 kB

	from fastapi import FastAPI, Request, File, UploadFile, Form
	from fastapi.responses import HTMLResponse
	from pydantic import BaseModel
	import yaml
	import tempfile
	import os
	import traceback
	from model.wav2vec2 import Wav2Vec2

	# ---------------- 설정 로드 ----------------
	with open("config/wav2vec2.yaml", "r") as f:
	config = yaml.safe_load(f)

	# ---------------- 모델 초기화 ----------------
	wav2vec2_model = Wav2Vec2(config)

	# ---------------- FastAPI 앱 ----------------
	app = FastAPI(
	title="Korean Speech Recognition API",
	description="FastAPI + Wav2Vec2 기반 한국어 음성 인식 서버",
	version="1.0.0"
	)

	# ---------------- 입력 모델 ----------------
	class TranscriptionResponse(BaseModel):
	transcription: str
	status: str

	# ---------------- API: 파일 업로드 POST ----------------
	@app.post("/transcribe", response_model=TranscriptionResponse)
	async def transcribe_audio(file: UploadFile = File(...)):
	"""오디오 파일을 업로드하여 음성 인식 수행"""

	# 파일 형식 검증
	if not file.filename.lower().endswith(('.wav', '.mp3', '.flac', '.m4a')):
	return TranscriptionResponse(
	transcription="",
	status="error: 지원되지 않는 파일 형식입니다. wav, mp3, flac, m4a 파일만 지원됩니다."
	)

	try:
	# 파일 내용 읽기
	audio_bytes = await file.read()

	# 음성 인식 수행
	result = wav2vec2_model.transcribe_from_bytes(audio_bytes, file.filename)

	return TranscriptionResponse(
	transcription=result,
	status="success"
	)

	except Exception as e:
	return TranscriptionResponse(
	transcription="",
	status=f"error: {str(e)}"
	)

	# ---------------- HTML UI ----------------
	@app.get("/", response_class=HTMLResponse)
	async def main_ui():
	return """
	<html>
	<head>
	<title>Korean Speech Recognition</title>
	<meta charset="UTF-8">
	<style>
	body {
	font-family: Arial, sans-serif;
	max-width: 800px;
	margin: auto;
	padding: 2rem;
	background-color: #f5f5f5;
	}
	.container {
	background-color: white;
	padding: 2rem;
	border-radius: 10px;
	box-shadow: 0 2px 10px rgba(0,0,0,0.1);
	}
	.form-group {
	margin-bottom: 1.5rem;
	}
	label {
	display: block;
	margin-bottom: 0.5rem;
	font-weight: bold;
	color: #333;
	}
	input[type="file"] {
	padding: 0.5rem;
	border: 2px dashed #ccc;
	border-radius: 5px;
	width: 100%;
	box-sizing: border-box;
	}
	input[type="submit"] {
	background-color: #007bff;
	color: white;
	padding: 1rem 2rem;
	border: none;
	border-radius: 5px;
	cursor: pointer;
	font-size: 1rem;
	}
	input[type="submit"]:hover {
	background-color: #0056b3;
	}
	.info {
	background-color: #e7f3ff;
	padding: 1rem;
	border-radius: 5px;
	margin-bottom: 1rem;
	border-left: 4px solid #007bff;
	}
	</style>
	</head>
	<body>
	<div class="container">
	<h1>🎤 한국어 음성 인식</h1>
	<div class="info">
	<strong>지원 형식:</strong> WAV, MP3, FLAC, M4A<br>
	<strong>모델:</strong> Wav2Vec2 Korean Fine-tuned
	</div>

	<form action="/submit" method="post" enctype="multipart/form-data">
	<div class="form-group">
	<label for="audio_file">🎵 오디오 파일 선택:</label>
	<input type="file" id="audio_file" name="audio_file" accept=".wav,.mp3,.flac,.m4a" required>
	</div>

	<input type="submit" value="음성 인식 실행">
	</form>
	</div>
	</body>
	</html>
	"""

	# ---------------- 결과 렌더링 ----------------
	@app.post("/submit", response_class=HTMLResponse)
	async def handle_form(request: Request, audio_file: UploadFile = File(...)):
	try:
	# 파일 형식 검증
	if not audio_file.filename.lower().endswith(('.wav', '.mp3', '.flac', '.m4a')):
	return f"""
	<html>
	<head><title>에러</title><meta charset="UTF-8"></head>
	<body style="font-family: Arial, sans-serif; max-width: 600px; margin: auto; padding: 2rem;">
	<h1>❌ 파일 형식 오류</h1>
	<p>지원되지 않는 파일 형식입니다.</p>
	<p><strong>지원 형식:</strong> WAV, MP3, FLAC, M4A</p>
	<br>
	<a href="/" style="color: #007bff; text-decoration: none;">← 돌아가기</a>
	</body>
	</html>
	"""

	# 파일 내용 읽기
	audio_bytes = await audio_file.read()

	# 음성 인식 수행
	result = wav2vec2_model.transcribe_from_bytes(audio_bytes, audio_file.filename)

	except Exception as e:
	error_details = traceback.format_exc()
	return f"""
	<html>
	<head><title>에러</title><meta charset="UTF-8"></head>
	<body style="font-family: Arial, sans-serif; max-width: 600px; margin: auto; padding: 2rem;">
	<h1>❌ 서버 오류 발생</h1>
	<p><strong>오류 메시지:</strong></p>
	<pre style="background-color: #f8f9fa; padding: 1rem; border-radius: 5px; overflow-x: auto;">{str(e)}</pre>
	<hr>
	<details>
	<summary><strong>에러 상세 (클릭하여 펼치기)</strong></summary>
	<pre style="background-color: #f8f9fa; padding: 1rem; border-radius: 5px; overflow-x: auto;">{error_details}</pre>
	</details>
	<br>
	<a href="/" style="color: #007bff; text-decoration: none;">← 돌아가기</a>
	</body>
	</html>
	"""

	return f"""
	<html>
	<head><title>결과</title><meta charset="UTF-8"></head>
	<body style="font-family: Arial, sans-serif; max-width: 600px; margin: auto; padding: 2rem;">
	<h1>✅ 음성 인식 결과</h1>
	<div style="background-color: #f8f9fa; padding: 1rem; border-radius: 5px; margin: 1rem 0;">
	<p><strong>업로드된 파일:</strong> {audio_file.filename}</p>
	<p><strong>파일 크기:</strong> {len(audio_bytes):,} bytes</p>
	</div>
	<hr>
	<h2>🎯 인식된 텍스트:</h2>
	<div style="background-color: #e7f3ff; padding: 1.5rem; border-radius: 5px; border-left: 4px solid #007bff;">
	<pre style="font-size: 1.1rem; margin: 0; white-space: pre-wrap; word-wrap: break-word;">{result}</pre>
	</div>
	<br>
	<a href="/" style="color: #007bff; text-decoration: none;">← 다시 시도하기</a>
	</body>
	</html>
	"""

	# ---------------- 헬스 체크 ----------------
	@app.get("/health")
	async def health_check():
	return {
	"status": "ok",
	"model": config["model"]["id"],
	"device": config["model"]["device"],
	"sampling_rate": config["model"]["sampling_rate"]
	}

	# ---------------- 모델 정보 ----------------
	@app.get("/info")
	async def model_info():
	return {
	"model_id": config["model"]["id"],
	"device": config["model"]["device"],
	"sampling_rate": config["model"]["sampling_rate"],
	"supported_formats": ["wav", "mp3", "flac", "m4a"],
	"description": "Korean Speech Recognition using Wav2Vec2"
	}