Spaces:

tranquilTrill
/

speech2textWithWhisper

Sleeping

App Files Files Community

tranquilTrill commited on May 19, 2025

Commit

778ce83

verified ·

1 Parent(s): 6f48e41

transcribe video added

Browse files

Files changed (1) hide show

app.py +53 -4

app.py CHANGED Viewed

@@ -1,15 +1,19 @@
 from fastapi import FastAPI, UploadFile, File
 import uvicorn
 from fastapi.middleware.cors import CORSMiddleware
 import whisper
 import shutil
 import os
 app = FastAPI(swagger_ui_parameters={"syntaxHighlight": {"theme": "obsidian"}})
-origins = [ "http://localhost", "http://localhost:3000", "http://localhost:3006"]
 app.add_middleware(CORSMiddleware, allow_origins=origins,allow_credentials=True,allow_methods=["*"], allow_headers=["*"])
 model = whisper.load_model("base")
 def transcribe_with_whisper(fpath):
     try:
         transcription = model.transcribe(fpath)
@@ -20,7 +24,7 @@ def transcribe_with_whisper(fpath):
     except Exception as e:
         return str(e)
-@app.post("/transcribe")
 async def transcribe(file: UploadFile = File(...)):
     if not file:
         return {"text": "No file sent"}
@@ -35,7 +39,52 @@ async def transcribe(file: UploadFile = File(...)):
     except Exception as e:
         return {"text" : str(e)}
 if __name__ == "__main__":
-    uvicorn.run(app, host="0.0.0.0", port=7860)

 from fastapi import FastAPI, UploadFile, File
+from fastapi.responses import JSONResponse
 import uvicorn
 from fastapi.middleware.cors import CORSMiddleware
 import whisper
 import shutil
 import os
+import moviepy.editor as mp
+import uuid
 app = FastAPI(swagger_ui_parameters={"syntaxHighlight": {"theme": "obsidian"}})
+origins = [ "*"]
 app.add_middleware(CORSMiddleware, allow_origins=origins,allow_credentials=True,allow_methods=["*"], allow_headers=["*"])
 model = whisper.load_model("base")
 def transcribe_with_whisper(fpath):
     try:
         transcription = model.transcribe(fpath)
     except Exception as e:
         return str(e)
+@app.post("/transcribe_audio")
 async def transcribe(file: UploadFile = File(...)):
     if not file:
         return {"text": "No file sent"}
     except Exception as e:
         return {"text" : str(e)}
+#region transcribe video
+@app.post("/transcribe_video")
+async def transcribe_video(file: UploadFile = File(...)):
+    # Create temporary paths
+    temp_video_path = f"/tmp/{uuid.uuid4()}_{file.filename}"
+    temp_audio_path = temp_video_path.rsplit(".", 1)[0] + ".wav"
+    # Save uploaded file
+    with open(temp_video_path, "wb") as f:
+        content = await file.read()
+        f.write(content)
+    try:
+        # Extract and transcribe
+        extract_audio_from_video(temp_video_path, temp_audio_path)
+        transcript = transcribe_audio_to_text(temp_audio_path)
+        return JSONResponse(content={
+            "video": file.filename,
+            "transcript": transcript
+        })
+    except Exception as e:
+        return JSONResponse(status_code=500, content={"error": str(e)})
+    finally:
+        # Cleanup
+        if os.path.exists(temp_video_path):
+            os.remove(temp_video_path)
+        if os.path.exists(temp_audio_path):
+            os.remove(temp_audio_path)
+def extract_audio_from_video(video_path: str, audio_path: str):
+    clip = mp.VideoFileClip(video_path)
+    clip.audio.write_audiofile(audio_path)
+def transcribe_audio_to_text(audio_path: str, model_size: str = "base") -> str:
+    model = whisper.load_model(model_size)
+    result = model.transcribe(audio_path)
+    transcript = "\n".join([seg["text"].strip() for seg in result["segments"]])
+    return transcript
+#endregion transcribe video
 if __name__ == "__main__":
+    uvicorn.run(app, host="127.0.0.1", port=7860)