| from fastapi import FastAPI, UploadFile, File |
| from fastapi.responses import FileResponse |
| from huggingface_hub import InferenceClient |
| import os |
|
|
| |
| app = FastAPI() |
|
|
| |
| HF_API_KEY = os.getenv("HF_API_KEY") |
| if not HF_API_KEY: |
| raise ValueError("Missing HF_API_KEY. Add it to Hugging Face Spaces Secrets.") |
|
|
| client = InferenceClient(token=HF_API_KEY) |
|
|
| |
| TEMP_DIR = "temp" |
| os.makedirs(TEMP_DIR, exist_ok=True) |
|
|
| |
| STT_MODEL = "openai/whisper-tiny.en" |
| TTS_MODEL = "facebook/mms-tts-eng" |
|
|
| |
| @app.post("/tts") |
| async def text_to_speech(text: str): |
| output_path = os.path.join(TEMP_DIR, "output.wav") |
|
|
| try: |
| audio = client.text_to_speech(model=TTS_MODEL, inputs=text) |
| with open(output_path, "wb") as f: |
| f.write(audio) |
| return FileResponse(output_path, media_type="audio/wav", filename="output.wav") |
| except Exception as e: |
| return {"error": str(e)} |
|
|
| |
| @app.post("/stt") |
| async def speech_to_text(file: UploadFile = File(...)): |
| try: |
| audio_data = await file.read() |
| text = client.automatic_speech_recognition(model=STT_MODEL, data=audio_data) |
| return {"text": text} |
| except Exception as e: |
| return {"error": str(e)} |
|
|
| |
| @app.get("/") |
| async def root(): |
| return {"message": "TTS & STT API is running!"} |
|
|