import openai import gradio as gr import os import time from datetime import datetime from pydub import AudioSegment import io import yt_dlp import PyPDF2 # Define your OpenAI API key using environment variable (recommended for Hugging Face Spaces) openai.api_key = os.getenv("OPENAI_API_KEY") # Function to convert audio file to text using OpenAI's Whisper def transcribe_audio(audio_file): try: # Load the audio file audio = AudioSegment.from_file(audio_file) # Export as WAV, which Whisper accepts buffer = io.BytesIO() audio.export(buffer, format="wav") buffer.seek(0) response = openai.Audio.transcribe( "whisper-1", file=buffer, model='whisper', response_format='verbose_json' ) return response except Exception as e: print(f"Error in transcribe_audio: {str(e)}") raise # Function to download audio from YouTube URL def download_youtube_audio(url): try: ydl_opts = { 'format': 'bestaudio/best', 'outtmpl': 'downloaded_audio.%(ext)s', 'postprocessors': [{ 'key': 'FFmpegExtractAudio', 'preferredcodec': 'mp3', 'preferredquality': '192', }], } with yt_dlp.YoutubeDL(ydl_opts) as ydl: ydl.download([url]) return 'downloaded_audio.mp3' except Exception as e: print(f"Error in download_youtube_audio: {str(e)}") raise # Function to extract text from PDF def extract_text_from_pdf(pdf_file): try: pdf_reader = PyPDF2.PdfFileReader(pdf_file) text = "" for page_num in range(pdf_reader.numPages): text += pdf_reader.getPage(page_num).extract_text() + "\n" return text except Exception as e: print(f"Error in extract_text_from_pdf: {str(e)}") raise # Function to generate summarised lecture notes using GPT-3.5 def generate_summary(transcription): try: transcription_text = "\n".join([f"{segment['start']:.2f}-{segment['end']:.2f}: {segment['text']}" for segment in transcription['segments']]) prompt = f""" You are an intelligent assistant that will summarize the transcription below. The transcription text is: {transcription_text} Summarize the content into 1000 tokens or less, focusing on the key topics and main points. """ response = openai.ChatCompletion.create( model="gpt-3.5-turbo", messages=[ {"role": "system", "content": "You are an expert summarizer."}, {"role": "user", "content": prompt} ] ) summary = response['choices'][0]['message']['content'] return summary except Exception as e: print(f"Error in generate_summary: {str(e)}") raise # Define the main function to handle transcription and summary generation def process_lecture(input_type, audio_input, pdf_input, youtube_input, lesson_plan): transcription = "" try: if input_type == "Audio File" and audio_input is not None: transcription = transcribe_audio(audio_input) elif input_type == "YouTube URL" and youtube_input: audio_path = download_youtube_audio(youtube_input) with open(audio_path, "rb") as f: transcription = transcribe_audio(f) elif input_type == "PDF Document" and pdf_input is not None: transcription = extract_text_from_pdf(pdf_input) except Exception as e: return f"Error during processing: {str(e)}", "No summary available." if transcription: try: transcription_text = "\n".join([f"{segment['start']:.2f}-{segment['end']:.2f}: {segment['text']}" for segment in transcription['segments']]) summary = generate_summary(transcription) return transcription_text, summary except Exception as e: return f"Transcription generated, but error during summary generation: {str(e)}", "No summary available." else: return "No transcription available.", "No summary available." # Set up Gradio interface with gr.Blocks() as demo: gr.Markdown("# Lecture Notes Generation Tool\nConvert your lecture recordings, PDFs, or YouTube links into detailed and structured notes easily!") with gr.Tab("Upload"): input_type = gr.Radio(label="Select Input Type", choices=["Audio File", "PDF Document", "YouTube URL"], type="value") audio_input = gr.Audio(label="Upload your lecture audio file (mp3/wav)", visible=False) pdf_input = gr.File(label="Upload PDF Document", visible=False) youtube_input = gr.Textbox(label="Enter YouTube URL", visible=False) lesson_plan_input = gr.Textbox(label="Enter the lesson plan for context", lines=5) def update_visibility(input_choice): return gr.update(visible=input_choice == "Audio File"), gr.update(visible=input_choice == "PDF Document"), gr.update(visible=input_choice == "YouTube URL") input_type.change(fn=update_visibility, inputs=[input_type], outputs=[audio_input, pdf_input, youtube_input]) submit_btn = gr.Button("Generate Notes") with gr.Tab("Transcription and Notes"): transcription_output = gr.Textbox(label="Lecture Transcription with Timestamps", interactive=False) summary_output = gr.Textbox(label="Summarized Lecture Notes", interactive=False) submit_btn.click(fn=process_lecture, inputs=[input_type, audio_input, pdf_input, youtube_input, lesson_plan_input], outputs=[transcription_output, summary_output]) # Launch the interface if __name__ == "__main__": demo.launch(share=True)