Spaces:
Sleeping
Sleeping
| from fastapi import FastAPI, UploadFile, File | |
| from fastapi.responses import JSONResponse | |
| from moviepy.editor import VideoFileClip | |
| from pydub import AudioSegment | |
| from jiwer import wer | |
| from transformers import pipeline | |
| import whisper | |
| import os | |
| app = FastAPI() | |
| # === Core Functions === | |
| def extract_audio(video_path, audio_path="/tmp/extracted_audio.wav"): | |
| video = VideoFileClip(video_path) | |
| video.audio.write_audiofile(audio_path) | |
| return audio_path | |
| def transcribe_audio(audio_path, model_size="large"): | |
| model = whisper.load_model(model_size) | |
| result = model.transcribe(audio_path) | |
| return result["text"] | |
| def video_to_text(video_path, model_size="large"): | |
| audio_path = extract_audio(video_path) | |
| text = transcribe_audio(audio_path, model_size) | |
| os.remove(audio_path) | |
| return text | |
| def chunk_text(text, max_words=500): | |
| words = text.split() | |
| return [" ".join(words[i:i + max_words]) for i in range(0, len(words), max_words)] | |
| def summarize_text(text): | |
| summarizer = pipeline("summarization", model="facebook/bart-large-cnn") | |
| chunks = chunk_text(text) | |
| summaries = [summarizer(chunk, max_length=180, min_length=60, do_sample=False)[0]['summary_text'] for chunk in chunks] | |
| full_summary = " ".join(summaries) | |
| if len(chunks) > 1: | |
| final = summarizer(full_summary, max_length=120, min_length=40, do_sample=False) | |
| return final[0]['summary_text'] | |
| return full_summary | |
| # === FastAPI Routes === | |
| def health_check(): | |
| return {"status": "running"} | |
| async def process_video(file: UploadFile = File(...)): | |
| try: | |
| video_path = f"/tmp/temp_{file.filename}" | |
| with open(video_path, "wb") as f: | |
| while chunk := await file.read(1024 * 1024): # Read in chunks (1MB) | |
| f.write(chunk) | |
| extracted_text = video_to_text(video_path) | |
| summary = summarize_text(extracted_text) | |
| os.remove(video_path) | |
| return { | |
| "transcription": extracted_text, | |
| "summary": summary | |
| } | |
| except Exception as e: | |
| return JSONResponse(status_code=500, content={"error": str(e)}) | |