|
|
|
|
|
from fastapi import FastAPI, UploadFile, File |
|
|
from fastapi.responses import JSONResponse |
|
|
import uvicorn |
|
|
from fastapi.middleware.cors import CORSMiddleware |
|
|
import whisper |
|
|
import shutil |
|
|
import os |
|
|
import moviepy.editor as mp |
|
|
import uuid |
|
|
|
|
|
app = FastAPI(swagger_ui_parameters={"syntaxHighlight": {"theme": "obsidian"}}) |
|
|
origins = [ "*"] |
|
|
app.add_middleware(CORSMiddleware, allow_origins=origins,allow_credentials=True,allow_methods=["*"], allow_headers=["*"]) |
|
|
model = whisper.load_model("base") |
|
|
|
|
|
def transcribe_with_whisper(fpath): |
|
|
try: |
|
|
transcription = model.transcribe(fpath) |
|
|
result = transcription["text"] |
|
|
print("whisper result:") |
|
|
print(result) |
|
|
return result |
|
|
except Exception as e: |
|
|
return str(e) |
|
|
|
|
|
@app.post("/transcribe-audio") |
|
|
async def transcribe(file: UploadFile = File(...)): |
|
|
if not file: |
|
|
return {"text": "No file sent"} |
|
|
|
|
|
try: |
|
|
file_location = f"newfile.wav" |
|
|
with open(file_location, "wb") as buffer: |
|
|
shutil.copyfileobj(file.file, buffer) |
|
|
result = transcribe_with_whisper(file_location) |
|
|
os.remove(file_location) |
|
|
return {"text": result} |
|
|
|
|
|
except Exception as e: |
|
|
return {"text" : str(e)} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@app.post("/transcribe-video") |
|
|
async def transcribe_video(file: UploadFile = File(...)): |
|
|
|
|
|
temp_video_path = f"{uuid.uuid4()}_{file.filename}" |
|
|
temp_audio_path = temp_video_path.rsplit(".", 1)[0] + ".wav" |
|
|
|
|
|
|
|
|
with open(temp_video_path, "wb") as f: |
|
|
content = await file.read() |
|
|
f.write(content) |
|
|
|
|
|
try: |
|
|
|
|
|
extract_audio_from_video(temp_video_path, temp_audio_path) |
|
|
transcript = transcribe_audio_to_text(temp_audio_path) |
|
|
|
|
|
return JSONResponse(content={ |
|
|
"video": file.filename, |
|
|
"transcript": transcript |
|
|
}) |
|
|
|
|
|
except Exception as e: |
|
|
return JSONResponse(status_code=500, content={"error": str(e)}) |
|
|
|
|
|
finally: |
|
|
|
|
|
if os.path.exists(temp_video_path): |
|
|
os.remove(temp_video_path) |
|
|
if os.path.exists(temp_audio_path): |
|
|
os.remove(temp_audio_path) |
|
|
|
|
|
def extract_audio_from_video(video_path: str, audio_path: str): |
|
|
clip = mp.VideoFileClip(video_path) |
|
|
clip.audio.write_audiofile(audio_path) |
|
|
|
|
|
def transcribe_audio_to_text(audio_path: str, model_size: str = "base") -> str: |
|
|
model = whisper.load_model(model_size) |
|
|
result = model.transcribe(audio_path) |
|
|
transcript = "\n".join([seg["text"].strip() for seg in result["segments"]]) |
|
|
return transcript |
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
uvicorn.run(app, host="0.0.0.0", port=7860) |
|
|
|