Spaces:

karthikmn
/

smart-notes

Runtime error

App Files Files Community

karthikmn commited on Jun 7, 2025

Commit

16e1a19

verified ·

1 Parent(s): 95573b9

Create app.py

Browse files

Files changed (1) hide show

app.py +98 -0

app.py ADDED Viewed

	@@ -0,0 +1,98 @@

+import gradio as gr
+import os
+import tempfile
+import speech_recognition as sr
+from moviepy.editor import VideoFileClip
+import cv2
+from PIL import Image
+import pytesseract
+import nltk
+from transformers import pipeline
+# Download NLP models
+nltk.download("punkt")
+summarizer = pipeline("summarization")
+# Audio Transcription
+def transcribe_audio(audio_path):
+    recognizer = sr.Recognizer()
+    with sr.AudioFile(audio_path) as source:
+        audio = recognizer.record(source)
+    return recognizer.recognize_google(audio)
+# Extract audio from video
+def extract_audio(video_path):
+    video = VideoFileClip(video_path)
+    audio_path = "temp_audio.wav"
+    video.audio.write_audiofile(audio_path)
+    return audio_path
+# Extract key frames from video
+def extract_frames(video_path, interval=90):  # 3 seconds if ~30fps
+    vidcap = cv2.VideoCapture(video_path)
+    success, image = vidcap.read()
+    count = 0
+    frames = []
+    while success:
+        if count % interval == 0:
+            filename = f"frame_{count}.jpg"
+            cv2.imwrite(filename, image)
+            frames.append(filename)
+        success, image = vidcap.read()
+        count += 1
+    return frames[:3]  # return top 3
+# OCR on images
+def ocr_text_from_frames(frame_paths):
+    texts = []
+    for frame in frame_paths:
+        img = Image.open(frame)
+        text = pytesseract.image_to_string(img)
+        texts.append(text)
+    return "\n".join(texts)
+# Summarize long text
+def summarize_text(text):
+    chunks = [text[i:i+1000] for i in range(0, len(text), 1000)]
+    summaries = [summarizer(chunk, max_length=100, min_length=30, do_sample=False)[0]['summary_text'] for chunk in chunks]
+    return "\n".join(summaries)
+# Core function
+def process_lecture(file):
+    suffix = os.path.splitext(file.name)[-1]
+    with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
+        tmp.write(file.read())
+        input_path = tmp.name
+    if suffix in [".mp4", ".mkv", ".avi"]:
+        audio_path = extract_audio(input_path)
+        frames = extract_frames(input_path)
+        slide_text = ocr_text_from_frames(frames)
+    else:
+        audio_path = input_path
+        slide_text = ""
+    try:
+        transcript = transcribe_audio(audio_path)
+    except Exception as e:
+        transcript = f"[Error during transcription: {e}]"
+    full_text = transcript + "\n" + slide_text
+    summary = summarize_text(full_text) if full_text.strip() else "No content to summarize."
+    return transcript, slide_text, summary
+# Launch Gradio Interface
+iface = gr.Interface(
+    fn=process_lecture,
+    inputs=gr.File(label="Upload Lecture Audio or Video"),
+    outputs=[
+        gr.Textbox(label="🎤 Transcript"),
+        gr.Textbox(label="🖼 Slide OCR Text"),
+        gr.Textbox(label="📝 Summary Notes")
+    ],
+    title="Smart Lecture Notes Generator",
+    description="Upload a lecture recording (audio or video). It will transcribe speech, extract slide text via OCR, and generate summarized notes."
+)
+iface.launch()