Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import os | |
| import tempfile | |
| import speech_recognition as sr | |
| from moviepy.editor import VideoFileClip | |
| import cv2 | |
| from PIL import Image | |
| import pytesseract | |
| import nltk | |
| from transformers import pipeline | |
| # Download NLP models | |
| nltk.download("punkt") | |
| summarizer = pipeline("summarization") | |
| # Audio Transcription | |
| def transcribe_audio(audio_path): | |
| recognizer = sr.Recognizer() | |
| with sr.AudioFile(audio_path) as source: | |
| audio = recognizer.record(source) | |
| return recognizer.recognize_google(audio) | |
| # Extract audio from video | |
| def extract_audio(video_path): | |
| video = VideoFileClip(video_path) | |
| audio_path = "temp_audio.wav" | |
| video.audio.write_audiofile(audio_path) | |
| return audio_path | |
| # Extract key frames from video | |
| def extract_frames(video_path, interval=90): # 3 seconds if ~30fps | |
| vidcap = cv2.VideoCapture(video_path) | |
| success, image = vidcap.read() | |
| count = 0 | |
| frames = [] | |
| while success: | |
| if count % interval == 0: | |
| filename = f"frame_{count}.jpg" | |
| cv2.imwrite(filename, image) | |
| frames.append(filename) | |
| success, image = vidcap.read() | |
| count += 1 | |
| return frames[:3] # return top 3 | |
| # OCR on images | |
| def ocr_text_from_frames(frame_paths): | |
| texts = [] | |
| for frame in frame_paths: | |
| img = Image.open(frame) | |
| text = pytesseract.image_to_string(img) | |
| texts.append(text) | |
| return "\n".join(texts) | |
| # Summarize long text | |
| def summarize_text(text): | |
| chunks = [text[i:i+1000] for i in range(0, len(text), 1000)] | |
| summaries = [summarizer(chunk, max_length=100, min_length=30, do_sample=False)[0]['summary_text'] for chunk in chunks] | |
| return "\n".join(summaries) | |
| # Core function | |
| def process_lecture(file): | |
| suffix = os.path.splitext(file.name)[-1] | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp: | |
| tmp.write(file.read()) | |
| input_path = tmp.name | |
| if suffix in [".mp4", ".mkv", ".avi"]: | |
| audio_path = extract_audio(input_path) | |
| frames = extract_frames(input_path) | |
| slide_text = ocr_text_from_frames(frames) | |
| else: | |
| audio_path = input_path | |
| slide_text = "" | |
| try: | |
| transcript = transcribe_audio(audio_path) | |
| except Exception as e: | |
| transcript = f"[Error during transcription: {e}]" | |
| full_text = transcript + "\n" + slide_text | |
| summary = summarize_text(full_text) if full_text.strip() else "No content to summarize." | |
| return transcript, slide_text, summary | |
| # Launch Gradio Interface | |
| iface = gr.Interface( | |
| fn=process_lecture, | |
| inputs=gr.File(label="Upload Lecture Audio or Video"), | |
| outputs=[ | |
| gr.Textbox(label="π€ Transcript"), | |
| gr.Textbox(label="πΌ Slide OCR Text"), | |
| gr.Textbox(label="π Summary Notes") | |
| ], | |
| title="Smart Lecture Notes Generator", | |
| description="Upload a lecture recording (audio or video). It will transcribe speech, extract slide text via OCR, and generate summarized notes." | |
| ) | |
| iface.launch() | |