File size: 2,769 Bytes
c9d8b75 778ce83 c9d8b75 778ce83 c9d8b75 778ce83 c9d8b75 778ce83 c9d8b75 9924306 c9d8b75 778ce83 9924306 778ce83 f55b4bb 778ce83 c9d8b75 668ba8a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
from fastapi import FastAPI, UploadFile, File
from fastapi.responses import JSONResponse
import uvicorn
from fastapi.middleware.cors import CORSMiddleware
import whisper
import shutil
import os
import moviepy.editor as mp
import uuid
app = FastAPI(swagger_ui_parameters={"syntaxHighlight": {"theme": "obsidian"}})
origins = [ "*"]
app.add_middleware(CORSMiddleware, allow_origins=origins,allow_credentials=True,allow_methods=["*"], allow_headers=["*"])
model = whisper.load_model("base")
def transcribe_with_whisper(fpath):
try:
transcription = model.transcribe(fpath)
result = transcription["text"]
print("whisper result:")
print(result)
return result
except Exception as e:
return str(e)
@app.post("/transcribe-audio")
async def transcribe(file: UploadFile = File(...)):
if not file:
return {"text": "No file sent"}
try:
file_location = f"newfile.wav"
with open(file_location, "wb") as buffer:
shutil.copyfileobj(file.file, buffer)
result = transcribe_with_whisper(file_location)
os.remove(file_location)
return {"text": result}
except Exception as e:
return {"text" : str(e)}
#region transcribe video
@app.post("/transcribe-video")
async def transcribe_video(file: UploadFile = File(...)):
# Create temporary paths
temp_video_path = f"{uuid.uuid4()}_{file.filename}"
temp_audio_path = temp_video_path.rsplit(".", 1)[0] + ".wav"
# Save uploaded file
with open(temp_video_path, "wb") as f:
content = await file.read()
f.write(content)
try:
# Extract and transcribe
extract_audio_from_video(temp_video_path, temp_audio_path)
transcript = transcribe_audio_to_text(temp_audio_path)
return JSONResponse(content={
"video": file.filename,
"transcript": transcript
})
except Exception as e:
return JSONResponse(status_code=500, content={"error": str(e)})
finally:
# Cleanup
if os.path.exists(temp_video_path):
os.remove(temp_video_path)
if os.path.exists(temp_audio_path):
os.remove(temp_audio_path)
def extract_audio_from_video(video_path: str, audio_path: str):
clip = mp.VideoFileClip(video_path)
clip.audio.write_audiofile(audio_path)
def transcribe_audio_to_text(audio_path: str, model_size: str = "base") -> str:
model = whisper.load_model(model_size)
result = model.transcribe(audio_path)
transcript = "\n".join([seg["text"].strip() for seg in result["segments"]])
return transcript
#endregion transcribe video
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=7860)
|