Spaces:

karthikmn
/

smart-lecture-notes

Runtime error

App Files Files Community

smart-lecture-notes / app.py

karthikmn

Create app.py

6d5e888 verified 7 months ago

raw

history blame contribute delete

3.01 kB

	import gradio as gr
	import os
	import tempfile
	import speech_recognition as sr
	from moviepy.editor import VideoFileClip
	import cv2
	from PIL import Image
	import pytesseract
	import nltk
	from transformers import pipeline

	# Download NLP models
	nltk.download("punkt")
	summarizer = pipeline("summarization")

	# Audio Transcription
	def transcribe_audio(audio_path):
	recognizer = sr.Recognizer()
	with sr.AudioFile(audio_path) as source:
	audio = recognizer.record(source)
	return recognizer.recognize_google(audio)

	# Extract audio from video
	def extract_audio(video_path):
	video = VideoFileClip(video_path)
	audio_path = "temp_audio.wav"
	video.audio.write_audiofile(audio_path)
	return audio_path

	# Extract key frames from video
	def extract_frames(video_path, interval=90): # 3 seconds if ~30fps
	vidcap = cv2.VideoCapture(video_path)
	success, image = vidcap.read()
	count = 0
	frames = []
	while success:
	if count % interval == 0:
	filename = f"frame_{count}.jpg"
	cv2.imwrite(filename, image)
	frames.append(filename)
	success, image = vidcap.read()
	count += 1
	return frames[:3] # return top 3

	# OCR on images
	def ocr_text_from_frames(frame_paths):
	texts = []
	for frame in frame_paths:
	img = Image.open(frame)
	text = pytesseract.image_to_string(img)
	texts.append(text)
	return "\n".join(texts)

	# Summarize long text
	def summarize_text(text):
	chunks = [text[i:i+1000] for i in range(0, len(text), 1000)]
	summaries = [summarizer(chunk, max_length=100, min_length=30, do_sample=False)[0]['summary_text'] for chunk in chunks]
	return "\n".join(summaries)

	# Core function
	def process_lecture(file):
	suffix = os.path.splitext(file.name)[-1]
	with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
	tmp.write(file.read())
	input_path = tmp.name

	if suffix in [".mp4", ".mkv", ".avi"]:
	audio_path = extract_audio(input_path)
	frames = extract_frames(input_path)
	slide_text = ocr_text_from_frames(frames)
	else:
	audio_path = input_path
	slide_text = ""

	try:
	transcript = transcribe_audio(audio_path)
	except Exception as e:
	transcript = f"[Error during transcription: {e}]"

	full_text = transcript + "\n" + slide_text
	summary = summarize_text(full_text) if full_text.strip() else "No content to summarize."

	return transcript, slide_text, summary

	# Launch Gradio Interface
	iface = gr.Interface(
	fn=process_lecture,
	inputs=gr.File(label="Upload Lecture Audio or Video"),
	outputs=[
	gr.Textbox(label="🎤 Transcript"),
	gr.Textbox(label="🖼 Slide OCR Text"),
	gr.Textbox(label="📝 Summary Notes")
	],
	title="Smart Lecture Notes Generator",
	description="Upload a lecture recording (audio or video). It will transcribe speech, extract slide text via OCR, and generate summarized notes."
	)

	iface.launch()