Spaces:

tranquilTrill
/

speech2textWithWhisper

Sleeping

App Files Files Community

speech2textWithWhisper / app.py

tranquilTrill

Update app.py

f55b4bb verified 8 months ago

raw

history blame contribute delete

2.77 kB


	from fastapi import FastAPI, UploadFile, File
	from fastapi.responses import JSONResponse
	import uvicorn
	from fastapi.middleware.cors import CORSMiddleware
	import whisper
	import shutil
	import os
	import moviepy.editor as mp
	import uuid

	app = FastAPI(swagger_ui_parameters={"syntaxHighlight": {"theme": "obsidian"}})
	origins = [ "*"]
	app.add_middleware(CORSMiddleware, allow_origins=origins,allow_credentials=True,allow_methods=[""], allow_headers=[""])
	model = whisper.load_model("base")

	def transcribe_with_whisper(fpath):
	try:
	transcription = model.transcribe(fpath)
	result = transcription["text"]
	print("whisper result:")
	print(result)
	return result
	except Exception as e:
	return str(e)

	@app.post("/transcribe-audio")
	async def transcribe(file: UploadFile = File(...)):
	if not file:
	return {"text": "No file sent"}

	try:
	file_location = f"newfile.wav"
	with open(file_location, "wb") as buffer:
	shutil.copyfileobj(file.file, buffer)
	result = transcribe_with_whisper(file_location)
	os.remove(file_location)
	return {"text": result}

	except Exception as e:
	return {"text" : str(e)}


	#region transcribe video

	@app.post("/transcribe-video")
	async def transcribe_video(file: UploadFile = File(...)):
	# Create temporary paths
	temp_video_path = f"{uuid.uuid4()}_{file.filename}"
	temp_audio_path = temp_video_path.rsplit(".", 1)[0] + ".wav"

	# Save uploaded file
	with open(temp_video_path, "wb") as f:
	content = await file.read()
	f.write(content)

	try:
	# Extract and transcribe
	extract_audio_from_video(temp_video_path, temp_audio_path)
	transcript = transcribe_audio_to_text(temp_audio_path)

	return JSONResponse(content={
	"video": file.filename,
	"transcript": transcript
	})

	except Exception as e:
	return JSONResponse(status_code=500, content={"error": str(e)})

	finally:
	# Cleanup
	if os.path.exists(temp_video_path):
	os.remove(temp_video_path)
	if os.path.exists(temp_audio_path):
	os.remove(temp_audio_path)

	def extract_audio_from_video(video_path: str, audio_path: str):
	clip = mp.VideoFileClip(video_path)
	clip.audio.write_audiofile(audio_path)

	def transcribe_audio_to_text(audio_path: str, model_size: str = "base") -> str:
	model = whisper.load_model(model_size)
	result = model.transcribe(audio_path)
	transcript = "\n".join([seg["text"].strip() for seg in result["segments"]])
	return transcript
	#endregion transcribe video


	if __name__ == "__main__":
	uvicorn.run(app, host="0.0.0.0", port=7860)