Spaces:
Sleeping
Sleeping
| from fastapi import FastAPI, Request, File, UploadFile, Form | |
| from fastapi.responses import HTMLResponse | |
| from pydantic import BaseModel | |
| import yaml | |
| import tempfile | |
| import os | |
| import traceback | |
| from model.wav2vec2 import Wav2Vec2 | |
| # ---------------- μ€μ λ‘λ ---------------- | |
| with open("config/wav2vec2.yaml", "r") as f: | |
| config = yaml.safe_load(f) | |
| # ---------------- λͺ¨λΈ μ΄κΈ°ν ---------------- | |
| wav2vec2_model = Wav2Vec2(config) | |
| # ---------------- FastAPI μ± ---------------- | |
| app = FastAPI( | |
| title="Korean Speech Recognition API", | |
| description="FastAPI + Wav2Vec2 κΈ°λ° νκ΅μ΄ μμ± μΈμ μλ²", | |
| version="1.0.0" | |
| ) | |
| # ---------------- μ λ ₯ λͺ¨λΈ ---------------- | |
| class TranscriptionResponse(BaseModel): | |
| transcription: str | |
| status: str | |
| # ---------------- API: νμΌ μ λ‘λ POST ---------------- | |
| async def transcribe_audio(file: UploadFile = File(...)): | |
| """μ€λμ€ νμΌμ μ λ‘λνμ¬ μμ± μΈμ μν""" | |
| # νμΌ νμ κ²μ¦ | |
| if not file.filename.lower().endswith(('.wav', '.mp3', '.flac', '.m4a')): | |
| return TranscriptionResponse( | |
| transcription="", | |
| status="error: μ§μλμ§ μλ νμΌ νμμ λλ€. wav, mp3, flac, m4a νμΌλ§ μ§μλ©λλ€." | |
| ) | |
| try: | |
| # νμΌ λ΄μ© μ½κΈ° | |
| audio_bytes = await file.read() | |
| # μμ± μΈμ μν | |
| result = wav2vec2_model.transcribe_from_bytes(audio_bytes, file.filename) | |
| return TranscriptionResponse( | |
| transcription=result, | |
| status="success" | |
| ) | |
| except Exception as e: | |
| return TranscriptionResponse( | |
| transcription="", | |
| status=f"error: {str(e)}" | |
| ) | |
| # ---------------- HTML UI ---------------- | |
| async def main_ui(): | |
| return """ | |
| <html> | |
| <head> | |
| <title>Korean Speech Recognition</title> | |
| <meta charset="UTF-8"> | |
| <style> | |
| body { | |
| font-family: Arial, sans-serif; | |
| max-width: 800px; | |
| margin: auto; | |
| padding: 2rem; | |
| background-color: #f5f5f5; | |
| } | |
| .container { | |
| background-color: white; | |
| padding: 2rem; | |
| border-radius: 10px; | |
| box-shadow: 0 2px 10px rgba(0,0,0,0.1); | |
| } | |
| .form-group { | |
| margin-bottom: 1.5rem; | |
| } | |
| label { | |
| display: block; | |
| margin-bottom: 0.5rem; | |
| font-weight: bold; | |
| color: #333; | |
| } | |
| input[type="file"] { | |
| padding: 0.5rem; | |
| border: 2px dashed #ccc; | |
| border-radius: 5px; | |
| width: 100%; | |
| box-sizing: border-box; | |
| } | |
| input[type="submit"] { | |
| background-color: #007bff; | |
| color: white; | |
| padding: 1rem 2rem; | |
| border: none; | |
| border-radius: 5px; | |
| cursor: pointer; | |
| font-size: 1rem; | |
| } | |
| input[type="submit"]:hover { | |
| background-color: #0056b3; | |
| } | |
| .info { | |
| background-color: #e7f3ff; | |
| padding: 1rem; | |
| border-radius: 5px; | |
| margin-bottom: 1rem; | |
| border-left: 4px solid #007bff; | |
| } | |
| </style> | |
| </head> | |
| <body> | |
| <div class="container"> | |
| <h1>π€ νκ΅μ΄ μμ± μΈμ</h1> | |
| <div class="info"> | |
| <strong>μ§μ νμ:</strong> WAV, MP3, FLAC, M4A<br> | |
| <strong>λͺ¨λΈ:</strong> Wav2Vec2 Korean Fine-tuned | |
| </div> | |
| <form action="/submit" method="post" enctype="multipart/form-data"> | |
| <div class="form-group"> | |
| <label for="audio_file">π΅ μ€λμ€ νμΌ μ ν:</label> | |
| <input type="file" id="audio_file" name="audio_file" accept=".wav,.mp3,.flac,.m4a" required> | |
| </div> | |
| <input type="submit" value="μμ± μΈμ μ€ν"> | |
| </form> | |
| </div> | |
| </body> | |
| </html> | |
| """ | |
| # ---------------- κ²°κ³Ό λ λλ§ ---------------- | |
| async def handle_form(request: Request, audio_file: UploadFile = File(...)): | |
| try: | |
| # νμΌ νμ κ²μ¦ | |
| if not audio_file.filename.lower().endswith(('.wav', '.mp3', '.flac', '.m4a')): | |
| return f""" | |
| <html> | |
| <head><title>μλ¬</title><meta charset="UTF-8"></head> | |
| <body style="font-family: Arial, sans-serif; max-width: 600px; margin: auto; padding: 2rem;"> | |
| <h1>β νμΌ νμ μ€λ₯</h1> | |
| <p>μ§μλμ§ μλ νμΌ νμμ λλ€.</p> | |
| <p><strong>μ§μ νμ:</strong> WAV, MP3, FLAC, M4A</p> | |
| <br> | |
| <a href="/" style="color: #007bff; text-decoration: none;">β λμκ°κΈ°</a> | |
| </body> | |
| </html> | |
| """ | |
| # νμΌ λ΄μ© μ½κΈ° | |
| audio_bytes = await audio_file.read() | |
| # μμ± μΈμ μν | |
| result = wav2vec2_model.transcribe_from_bytes(audio_bytes, audio_file.filename) | |
| except Exception as e: | |
| error_details = traceback.format_exc() | |
| return f""" | |
| <html> | |
| <head><title>μλ¬</title><meta charset="UTF-8"></head> | |
| <body style="font-family: Arial, sans-serif; max-width: 600px; margin: auto; padding: 2rem;"> | |
| <h1>β μλ² μ€λ₯ λ°μ</h1> | |
| <p><strong>μ€λ₯ λ©μμ§:</strong></p> | |
| <pre style="background-color: #f8f9fa; padding: 1rem; border-radius: 5px; overflow-x: auto;">{str(e)}</pre> | |
| <hr> | |
| <details> | |
| <summary><strong>μλ¬ μμΈ (ν΄λ¦νμ¬ νΌμΉκΈ°)</strong></summary> | |
| <pre style="background-color: #f8f9fa; padding: 1rem; border-radius: 5px; overflow-x: auto;">{error_details}</pre> | |
| </details> | |
| <br> | |
| <a href="/" style="color: #007bff; text-decoration: none;">β λμκ°κΈ°</a> | |
| </body> | |
| </html> | |
| """ | |
| return f""" | |
| <html> | |
| <head><title>κ²°κ³Ό</title><meta charset="UTF-8"></head> | |
| <body style="font-family: Arial, sans-serif; max-width: 600px; margin: auto; padding: 2rem;"> | |
| <h1>β μμ± μΈμ κ²°κ³Ό</h1> | |
| <div style="background-color: #f8f9fa; padding: 1rem; border-radius: 5px; margin: 1rem 0;"> | |
| <p><strong>μ λ‘λλ νμΌ:</strong> {audio_file.filename}</p> | |
| <p><strong>νμΌ ν¬κΈ°:</strong> {len(audio_bytes):,} bytes</p> | |
| </div> | |
| <hr> | |
| <h2>π― μΈμλ ν μ€νΈ:</h2> | |
| <div style="background-color: #e7f3ff; padding: 1.5rem; border-radius: 5px; border-left: 4px solid #007bff;"> | |
| <pre style="font-size: 1.1rem; margin: 0; white-space: pre-wrap; word-wrap: break-word;">{result}</pre> | |
| </div> | |
| <br> | |
| <a href="/" style="color: #007bff; text-decoration: none;">β λ€μ μλνκΈ°</a> | |
| </body> | |
| </html> | |
| """ | |
| # ---------------- ν¬μ€ μ²΄ν¬ ---------------- | |
| async def health_check(): | |
| return { | |
| "status": "ok", | |
| "model": config["model"]["id"], | |
| "device": config["model"]["device"], | |
| "sampling_rate": config["model"]["sampling_rate"] | |
| } | |
| # ---------------- λͺ¨λΈ μ 보 ---------------- | |
| async def model_info(): | |
| return { | |
| "model_id": config["model"]["id"], | |
| "device": config["model"]["device"], | |
| "sampling_rate": config["model"]["sampling_rate"], | |
| "supported_formats": ["wav", "mp3", "flac", "m4a"], | |
| "description": "Korean Speech Recognition using Wav2Vec2" | |
| } |