jashu827's picture
updated code
3915f46 verified
#!pip install gradio speechrecognition transformers torch nltk pydub matplotlib
# Required installations
# pip install gradio speechrecognition transformers torch nltk pydub matplotlib
# Required libraries (installed via requirements.txt, not here)
import matplotlib
matplotlib.use('Agg') # Use backend suitable for headless environments
import gradio as gr
import speech_recognition as sr
from pydub import AudioSegment
import os
from transformers import pipeline
import nltk
from nltk.tokenize import sent_tokenize
import matplotlib.pyplot as plt
import numpy as np
# Download required NLTK data
nltk.download('punkt')
class LectureSummarizer:
def __init__(self):
# Initialize the summarization pipeline
self.summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
self.recognizer = sr.Recognizer()
def convert_to_wav(self, audio_file):
"""Convert audio file to WAV format if needed"""
try:
audio = AudioSegment.from_file(audio_file)
wav_path = "temp_lecture.wav"
audio.export(wav_path, format="wav")
return wav_path
except Exception as e:
return str(e)
def transcribe_audio(self, audio_path):
"""Transcribe audio to text"""
try:
with sr.AudioFile(audio_path) as source:
# Adjust for ambient noise
self.recognizer.adjust_for_ambient_noise(source)
audio = self.recognizer.record(source)
text = self.recognizer.recognize_google(audio)
return text
except Exception as e:
return f"Transcription error: {str(e)}"
def generate_summary(self, text, max_length=150):
"""Generate summary from text"""
try:
# Split text into manageable chunks if too long
sentences = sent_tokenize(text)
chunk_size = 1000 # characters
chunks = []
current_chunk = ""
for sentence in sentences:
if len(current_chunk) + len(sentence) < chunk_size:
current_chunk += " " + sentence
else:
chunks.append(current_chunk.strip())
current_chunk = sentence
if current_chunk:
chunks.append(current_chunk.strip())
# Summarize each chunk
summaries = []
for chunk in chunks:
summary = self.summarizer(chunk, max_length=max_length, min_length=30, do_sample=False)
summaries.append(summary[0]['summary_text'])
return " ".join(summaries)
except Exception as e:
return f"Summarization error: {str(e)}"
def create_visualization(self, original_text, summary_text):
"""Create visualization comparing original and summary lengths"""
original_len = len(original_text.split())
summary_len = len(summary_text.split())
plt.figure(figsize=(8, 4))
bars = plt.bar(['Original Text', 'Summary'], [original_len, summary_len],
color=['#4287f5', '#42f5a7'])
# Add text labels on bars
for bar in bars:
yval = bar.get_height()
plt.text(bar.get_x() + bar.get_width()/2, yval + 5,
f'{int(yval)} words', ha='center', va='bottom')
plt.title('Text Length Comparison')
plt.ylabel('Word Count')
plt.tight_layout()
viz_path = "length_comparison.png"
plt.savefig(viz_path)
plt.close()
return viz_path
def process_lecture(self, audio_file):
"""Main processing pipeline"""
# Convert to WAV if needed
wav_path = self.convert_to_wav(audio_file)
if "error" in wav_path:
return wav_path, "", ""
# Transcribe
transcription = self.transcribe_audio(wav_path)
if "error" in transcription:
return transcription, "", ""
# Generate summary
summary = self.generate_summary(transcription)
# Create visualization
viz_path = self.create_visualization(transcription, summary)
# Clean up temporary files
if os.path.exists(wav_path):
os.remove(wav_path)
# Format output as structured notes
structured_notes = f"""
# Lecture Summary Notes
## Full Transcription
{transcription}
## Key Points (Summary)
{summary}
"""
return structured_notes, summary, viz_path
# Create summarizer instance
summarizer = LectureSummarizer()
# Gradio interface
def gradio_interface(audio_file):
notes, summary, viz_path = summarizer.process_lecture(audio_file)
return notes, summary, viz_path
# Define Gradio UI
with gr.Blocks(title="AI Lecture Summarizer") as demo:
gr.Markdown("# AI-Powered Lecture Summarizer")
gr.Markdown("Upload an audio lecture file to get structured notes and visualization")
with gr.Row():
with gr.Column():
audio_input = gr.Audio(type="filepath", label="Upload Lecture Audio")
submit_btn = gr.Button("Summarize")
with gr.Column():
notes_output = gr.Textbox(label="Structured Notes", lines=10)
summary_output = gr.Textbox(label="Quick Summary", lines=5)
viz_output = gr.Image(label="Text Length Comparison")
submit_btn.click(
fn=gradio_interface,
inputs=audio_input,
outputs=[notes_output, summary_output, viz_output]
)
# Launch the interface
demo.launch()