abhishekjoel's picture
Update app.py
bdb27e7 verified
import openai
import gradio as gr
import os
import time
from datetime import datetime
from pydub import AudioSegment
import io
import yt_dlp
import PyPDF2
# Define your OpenAI API key using environment variable (recommended for Hugging Face Spaces)
openai.api_key = os.getenv("OPENAI_API_KEY")
# Function to convert audio file to text using OpenAI's Whisper
def transcribe_audio(audio_file):
try:
# Load the audio file
audio = AudioSegment.from_file(audio_file)
# Export as WAV, which Whisper accepts
buffer = io.BytesIO()
audio.export(buffer, format="wav")
buffer.seek(0)
response = openai.Audio.transcribe(
"whisper-1",
file=buffer,
model='whisper',
response_format='verbose_json'
)
return response
except Exception as e:
print(f"Error in transcribe_audio: {str(e)}")
raise
# Function to download audio from YouTube URL
def download_youtube_audio(url):
try:
ydl_opts = {
'format': 'bestaudio/best',
'outtmpl': 'downloaded_audio.%(ext)s',
'postprocessors': [{
'key': 'FFmpegExtractAudio',
'preferredcodec': 'mp3',
'preferredquality': '192',
}],
}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
ydl.download([url])
return 'downloaded_audio.mp3'
except Exception as e:
print(f"Error in download_youtube_audio: {str(e)}")
raise
# Function to extract text from PDF
def extract_text_from_pdf(pdf_file):
try:
pdf_reader = PyPDF2.PdfFileReader(pdf_file)
text = ""
for page_num in range(pdf_reader.numPages):
text += pdf_reader.getPage(page_num).extract_text() + "\n"
return text
except Exception as e:
print(f"Error in extract_text_from_pdf: {str(e)}")
raise
# Function to generate summarised lecture notes using GPT-3.5
def generate_summary(transcription):
try:
transcription_text = "\n".join([f"{segment['start']:.2f}-{segment['end']:.2f}: {segment['text']}" for segment in transcription['segments']])
prompt = f"""
You are an intelligent assistant that will summarize the transcription below.
The transcription text is:
{transcription_text}
Summarize the content into 1000 tokens or less, focusing on the key topics and main points.
"""
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "You are an expert summarizer."},
{"role": "user", "content": prompt}
]
)
summary = response['choices'][0]['message']['content']
return summary
except Exception as e:
print(f"Error in generate_summary: {str(e)}")
raise
# Define the main function to handle transcription and summary generation
def process_lecture(input_type, audio_input, pdf_input, youtube_input, lesson_plan):
transcription = ""
try:
if input_type == "Audio File" and audio_input is not None:
transcription = transcribe_audio(audio_input)
elif input_type == "YouTube URL" and youtube_input:
audio_path = download_youtube_audio(youtube_input)
with open(audio_path, "rb") as f:
transcription = transcribe_audio(f)
elif input_type == "PDF Document" and pdf_input is not None:
transcription = extract_text_from_pdf(pdf_input)
except Exception as e:
return f"Error during processing: {str(e)}", "No summary available."
if transcription:
try:
transcription_text = "\n".join([f"{segment['start']:.2f}-{segment['end']:.2f}: {segment['text']}" for segment in transcription['segments']])
summary = generate_summary(transcription)
return transcription_text, summary
except Exception as e:
return f"Transcription generated, but error during summary generation: {str(e)}", "No summary available."
else:
return "No transcription available.", "No summary available."
# Set up Gradio interface
with gr.Blocks() as demo:
gr.Markdown("# Lecture Notes Generation Tool\nConvert your lecture recordings, PDFs, or YouTube links into detailed and structured notes easily!")
with gr.Tab("Upload"):
input_type = gr.Radio(label="Select Input Type", choices=["Audio File", "PDF Document", "YouTube URL"], type="value")
audio_input = gr.Audio(label="Upload your lecture audio file (mp3/wav)", visible=False)
pdf_input = gr.File(label="Upload PDF Document", visible=False)
youtube_input = gr.Textbox(label="Enter YouTube URL", visible=False)
lesson_plan_input = gr.Textbox(label="Enter the lesson plan for context", lines=5)
def update_visibility(input_choice):
return gr.update(visible=input_choice == "Audio File"), gr.update(visible=input_choice == "PDF Document"), gr.update(visible=input_choice == "YouTube URL")
input_type.change(fn=update_visibility, inputs=[input_type], outputs=[audio_input, pdf_input, youtube_input])
submit_btn = gr.Button("Generate Notes")
with gr.Tab("Transcription and Notes"):
transcription_output = gr.Textbox(label="Lecture Transcription with Timestamps", interactive=False)
summary_output = gr.Textbox(label="Summarized Lecture Notes", interactive=False)
submit_btn.click(fn=process_lecture, inputs=[input_type, audio_input, pdf_input, youtube_input, lesson_plan_input], outputs=[transcription_output, summary_output])
# Launch the interface
if __name__ == "__main__":
demo.launch(share=True)