File size: 2,769 Bytes
c9d8b75
 
778ce83
c9d8b75
 
 
 
 
778ce83
 
c9d8b75
 
778ce83
c9d8b75
 
778ce83
c9d8b75
 
 
 
 
 
 
 
 
 
9924306
c9d8b75
 
 
 
 
 
 
 
 
 
 
 
 
 
778ce83
 
 
 
9924306
778ce83
f55b4bb
 
778ce83
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c9d8b75
 
668ba8a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91

from fastapi import FastAPI, UploadFile, File
from fastapi.responses import JSONResponse
import uvicorn
from fastapi.middleware.cors import CORSMiddleware
import whisper
import shutil
import os
import moviepy.editor as mp
import uuid

app = FastAPI(swagger_ui_parameters={"syntaxHighlight": {"theme": "obsidian"}})
origins = [ "*"]
app.add_middleware(CORSMiddleware, allow_origins=origins,allow_credentials=True,allow_methods=["*"], allow_headers=["*"])
model = whisper.load_model("base")

def transcribe_with_whisper(fpath):   
    try:
        transcription = model.transcribe(fpath)
        result = transcription["text"]   
        print("whisper result:")
        print(result)
        return result
    except Exception as e:
        return str(e)

@app.post("/transcribe-audio")
async def transcribe(file: UploadFile = File(...)):  
    if not file:
        return {"text": "No file sent"}   
    
    try:
        file_location = f"newfile.wav"  
        with open(file_location, "wb") as buffer:
            shutil.copyfileobj(file.file, buffer)  
        result = transcribe_with_whisper(file_location)   
        os.remove(file_location) 
        return {"text": result}   

    except Exception as e:
        return {"text" : str(e)}       


#region transcribe video

@app.post("/transcribe-video")
async def transcribe_video(file: UploadFile = File(...)):
    # Create temporary paths   
    temp_video_path = f"{uuid.uuid4()}_{file.filename}"
    temp_audio_path = temp_video_path.rsplit(".", 1)[0] + ".wav"

    # Save uploaded file
    with open(temp_video_path, "wb") as f:
        content = await file.read()
        f.write(content)

    try:
        # Extract and transcribe
        extract_audio_from_video(temp_video_path, temp_audio_path)
        transcript = transcribe_audio_to_text(temp_audio_path)

        return JSONResponse(content={
            "video": file.filename,
            "transcript": transcript
        })

    except Exception as e:
        return JSONResponse(status_code=500, content={"error": str(e)})

    finally:
        # Cleanup
        if os.path.exists(temp_video_path):
            os.remove(temp_video_path)
        if os.path.exists(temp_audio_path):
            os.remove(temp_audio_path)

def extract_audio_from_video(video_path: str, audio_path: str):
    clip = mp.VideoFileClip(video_path)
    clip.audio.write_audiofile(audio_path)

def transcribe_audio_to_text(audio_path: str, model_size: str = "base") -> str:
    model = whisper.load_model(model_size)
    result = model.transcribe(audio_path)
    transcript = "\n".join([seg["text"].strip() for seg in result["segments"]])
    return transcript
#endregion transcribe video


if __name__ == "__main__":
    uvicorn.run(app, host="0.0.0.0", port=7860)