tranquilTrill's picture
Update app.py
f55b4bb verified
from fastapi import FastAPI, UploadFile, File
from fastapi.responses import JSONResponse
import uvicorn
from fastapi.middleware.cors import CORSMiddleware
import whisper
import shutil
import os
import moviepy.editor as mp
import uuid
app = FastAPI(swagger_ui_parameters={"syntaxHighlight": {"theme": "obsidian"}})
origins = [ "*"]
app.add_middleware(CORSMiddleware, allow_origins=origins,allow_credentials=True,allow_methods=["*"], allow_headers=["*"])
model = whisper.load_model("base")
def transcribe_with_whisper(fpath):
try:
transcription = model.transcribe(fpath)
result = transcription["text"]
print("whisper result:")
print(result)
return result
except Exception as e:
return str(e)
@app.post("/transcribe-audio")
async def transcribe(file: UploadFile = File(...)):
if not file:
return {"text": "No file sent"}
try:
file_location = f"newfile.wav"
with open(file_location, "wb") as buffer:
shutil.copyfileobj(file.file, buffer)
result = transcribe_with_whisper(file_location)
os.remove(file_location)
return {"text": result}
except Exception as e:
return {"text" : str(e)}
#region transcribe video
@app.post("/transcribe-video")
async def transcribe_video(file: UploadFile = File(...)):
# Create temporary paths
temp_video_path = f"{uuid.uuid4()}_{file.filename}"
temp_audio_path = temp_video_path.rsplit(".", 1)[0] + ".wav"
# Save uploaded file
with open(temp_video_path, "wb") as f:
content = await file.read()
f.write(content)
try:
# Extract and transcribe
extract_audio_from_video(temp_video_path, temp_audio_path)
transcript = transcribe_audio_to_text(temp_audio_path)
return JSONResponse(content={
"video": file.filename,
"transcript": transcript
})
except Exception as e:
return JSONResponse(status_code=500, content={"error": str(e)})
finally:
# Cleanup
if os.path.exists(temp_video_path):
os.remove(temp_video_path)
if os.path.exists(temp_audio_path):
os.remove(temp_audio_path)
def extract_audio_from_video(video_path: str, audio_path: str):
clip = mp.VideoFileClip(video_path)
clip.audio.write_audiofile(audio_path)
def transcribe_audio_to_text(audio_path: str, model_size: str = "base") -> str:
model = whisper.load_model(model_size)
result = model.transcribe(audio_path)
transcript = "\n".join([seg["text"].strip() for seg in result["segments"]])
return transcript
#endregion transcribe video
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=7860)