karthikmn commited on
Commit
16e1a19
·
verified ·
1 Parent(s): 95573b9

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +98 -0
app.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ import tempfile
4
+ import speech_recognition as sr
5
+ from moviepy.editor import VideoFileClip
6
+ import cv2
7
+ from PIL import Image
8
+ import pytesseract
9
+ import nltk
10
+ from transformers import pipeline
11
+
12
+ # Download NLP models
13
+ nltk.download("punkt")
14
+ summarizer = pipeline("summarization")
15
+
16
+ # Audio Transcription
17
+ def transcribe_audio(audio_path):
18
+ recognizer = sr.Recognizer()
19
+ with sr.AudioFile(audio_path) as source:
20
+ audio = recognizer.record(source)
21
+ return recognizer.recognize_google(audio)
22
+
23
+ # Extract audio from video
24
+ def extract_audio(video_path):
25
+ video = VideoFileClip(video_path)
26
+ audio_path = "temp_audio.wav"
27
+ video.audio.write_audiofile(audio_path)
28
+ return audio_path
29
+
30
+ # Extract key frames from video
31
+ def extract_frames(video_path, interval=90): # 3 seconds if ~30fps
32
+ vidcap = cv2.VideoCapture(video_path)
33
+ success, image = vidcap.read()
34
+ count = 0
35
+ frames = []
36
+ while success:
37
+ if count % interval == 0:
38
+ filename = f"frame_{count}.jpg"
39
+ cv2.imwrite(filename, image)
40
+ frames.append(filename)
41
+ success, image = vidcap.read()
42
+ count += 1
43
+ return frames[:3] # return top 3
44
+
45
+ # OCR on images
46
+ def ocr_text_from_frames(frame_paths):
47
+ texts = []
48
+ for frame in frame_paths:
49
+ img = Image.open(frame)
50
+ text = pytesseract.image_to_string(img)
51
+ texts.append(text)
52
+ return "\n".join(texts)
53
+
54
+ # Summarize long text
55
+ def summarize_text(text):
56
+ chunks = [text[i:i+1000] for i in range(0, len(text), 1000)]
57
+ summaries = [summarizer(chunk, max_length=100, min_length=30, do_sample=False)[0]['summary_text'] for chunk in chunks]
58
+ return "\n".join(summaries)
59
+
60
+ # Core function
61
+ def process_lecture(file):
62
+ suffix = os.path.splitext(file.name)[-1]
63
+ with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
64
+ tmp.write(file.read())
65
+ input_path = tmp.name
66
+
67
+ if suffix in [".mp4", ".mkv", ".avi"]:
68
+ audio_path = extract_audio(input_path)
69
+ frames = extract_frames(input_path)
70
+ slide_text = ocr_text_from_frames(frames)
71
+ else:
72
+ audio_path = input_path
73
+ slide_text = ""
74
+
75
+ try:
76
+ transcript = transcribe_audio(audio_path)
77
+ except Exception as e:
78
+ transcript = f"[Error during transcription: {e}]"
79
+
80
+ full_text = transcript + "\n" + slide_text
81
+ summary = summarize_text(full_text) if full_text.strip() else "No content to summarize."
82
+
83
+ return transcript, slide_text, summary
84
+
85
+ # Launch Gradio Interface
86
+ iface = gr.Interface(
87
+ fn=process_lecture,
88
+ inputs=gr.File(label="Upload Lecture Audio or Video"),
89
+ outputs=[
90
+ gr.Textbox(label="🎤 Transcript"),
91
+ gr.Textbox(label="🖼 Slide OCR Text"),
92
+ gr.Textbox(label="📝 Summary Notes")
93
+ ],
94
+ title="Smart Lecture Notes Generator",
95
+ description="Upload a lecture recording (audio or video). It will transcribe speech, extract slide text via OCR, and generate summarized notes."
96
+ )
97
+
98
+ iface.launch()