import os import openai import streamlit as st import io from pydub import AudioSegment from youtube_transcript_api import YouTubeTranscriptApi import PyPDF2 from pptx import Presentation # Set OpenAI API key openai.api_key = os.getenv('OPENAI_API_KEY') # Function to transcribe audio using OpenAI Whisper def transcribe_audio(audio_file): try: # Load audio file audio = AudioSegment.from_file(audio_file) # Convert to WAV buffer = io.BytesIO() audio.export(buffer, format="wav") buffer.seek(0) # Set name attribute buffer.name = "audio.wav" # Transcribe audio response = openai.Audio.transcribe( "whisper-1", file=buffer, response_format="verbose_json" ) return response except Exception as e: st.error(f"Error during transcription: {str(e)}") return None # Function to extract text from PDF and split it into chunks def extract_text_from_pdf(pdf_file): reader = PyPDF2.PdfReader(io.BytesIO(pdf_file.read())) text = "" for page in reader.pages: text += page.extract_text() + "\n" # Split text into chunks of approximately 1500 words each words = text.split() chunks = [" ".join(words[i:i + 1500]) for i in range(0, len(words), 1500)] return chunks # Function to get YouTube transcript def get_youtube_transcript(url): try: # Extract video ID from URL if "watch?v=" in url: video_id = url.split("watch?v=")[1].split("&")[0] elif "youtu.be/" in url: video_id = url.split("youtu.be/")[1].split("?")[0] else: st.error("Invalid YouTube URL.") return None # Fetch transcript transcript_list = YouTubeTranscriptApi.list_transcripts(video_id) # Choose a transcript transcript = transcript_list.find_transcript(['en']) # Fetch the actual transcript data transcript_data = transcript.fetch() # Combine transcript texts transcription_text = "\n".join([entry['text'] for entry in transcript_data]) return transcription_text except Exception as e: st.error(f"Error fetching YouTube transcript: {str(e)}") return None # Function to generate notes for each chunk def generate_notes(text): prompt = f"Create comprehensive notes in bullet points from the following text:\n\n{text}" response = openai.ChatCompletion.create( model='gpt-3.5-turbo', messages=[{'role': 'user', 'content': prompt}], max_tokens=1000, ) return response.choices[0].message.content.strip() # Function to generate additional sections def generate_section(title, text): prompt = f"Generate a section titled '{title}' with 3-6 sentences based on the following text:\n\n{text}" response = openai.ChatCompletion.create( model='gpt-3.5-turbo', messages=[{'role': 'user', 'content': prompt}], max_tokens=500, ) return response.choices[0].message.content.strip() # Function to create PowerPoint presentation def create_presentation(summary, key_concepts, key_takeaways, case_studies, glossary, faqs): prs = Presentation() # Add title slide slide = prs.slides.add_slide(prs.slide_layouts[0]) title = slide.shapes.title subtitle = slide.placeholders[1] title.text = "Lecture Notes" subtitle.text = "Generated by AI Notes Generation Bot" # Add slides for each section def add_slide(title, content): slide = prs.slides.add_slide(prs.slide_layouts[1]) slide.shapes.title.text = title textbox = slide.placeholders[1] textbox.text = content add_slide("Summary", summary) add_slide("Key Concepts", key_concepts) add_slide("Key Takeaways", key_takeaways) add_slide("Case Studies/Examples", case_studies) add_slide("Glossary", glossary) add_slide("FAQs", faqs) # Save the presentation to a BytesIO object ppt_buffer = io.BytesIO() prs.save(ppt_buffer) ppt_buffer.seek(0) return ppt_buffer # Main app def main(): st.set_page_config(layout="wide") # Add custom CSS for gradient background st.markdown(""" """, unsafe_allow_html=True) st.markdown("

AI Notes Generation Bot 🤖

", unsafe_allow_html=True) # Left sidebar for upload options st.sidebar.header("Upload your file:") input_type = st.sidebar.selectbox("Select Input Type", ["Audio File", "PDF Document", "YouTube URL"]) st.sidebar.markdown("### Steps to Use the Tool:") st.sidebar.markdown("1. Upload an audio file (25MB max), PDF, or YouTube URL for notes.") st.sidebar.markdown("2. Click on 'Generate Notes' to get AI-generated notes.") st.sidebar.markdown("3. Use the 'Download Presentation' button to save your notes locally.") # File uploader or URL input based on selected type audio_input = pdf_input = youtube_input = None if input_type == "Audio File": audio_input = st.sidebar.file_uploader("Upload audio file", type=["mp3", "wav"], key="audio", help="Supports mp3 and wav formats up to 25MB") elif input_type == "PDF Document": pdf_input = st.sidebar.file_uploader("Upload PDF document", type=["pdf"], key="pdf", help="") elif input_type == "YouTube URL": youtube_input = st.sidebar.text_input("Enter YouTube URL (must have subtitles enabled)", key="youtube") # Place Generate Notes button in the sidebar if st.sidebar.button("Generate Notes", key="generate_notes"): transcription_text = "" if input_type == "Audio File" and audio_input: transcription = transcribe_audio(audio_input) if transcription: transcription_text = "\n".join([seg['text'] for seg in transcription['segments']]) else: st.error("Transcription failed.") elif input_type == "PDF Document" and pdf_input: chunks = extract_text_from_pdf(pdf_input) summaries = [generate_notes(chunk) for chunk in chunks] transcription_text = "\n".join(summaries) elif input_type == "YouTube URL" and youtube_input: transcription_text = get_youtube_transcript(youtube_input) if not transcription_text: st.error("Failed to retrieve YouTube transcript.") else: st.error("Please provide valid input.") if transcription_text: st.session_state['summary'] = transcription_text # Display generated notes if 'summary' in st.session_state: st.markdown("---") st.subheader("Generated Notes") summary = st.session_state['summary'] st.markdown("### Summary:") st.markdown(f"
" + f"

{summary}

" + "
", unsafe_allow_html=True) st.markdown("### Key Concepts:") key_concepts = generate_section("Key Concepts", summary) st.markdown(f"
" + f"

{key_concepts}

" + "
", unsafe_allow_html=True) st.markdown("### Key Takeaways:") key_takeaways = generate_section("Key Takeaways", summary) st.markdown(f"
" + f"

{key_takeaways}

" + "
", unsafe_allow_html=True) case_studies = generate_section("Case Studies/Examples", summary) st.markdown(f"
" f"

{case_studies}

" "
", unsafe_allow_html=True) st.markdown("### Glossary:") glossary = generate_section("Glossary", summary) st.markdown(f"
" f"

{glossary}

" "
", unsafe_allow_html=True) st.markdown("### FAQs:") faqs = generate_section("FAQs", summary) st.markdown(f"
" f"

{faqs}

" "
", unsafe_allow_html=True) # Option to download the PowerPoint presentation ppt_buffer = create_presentation(summary, key_concepts, key_takeaways, case_studies, glossary, faqs) st.download_button( label="Download Presentation", data=ppt_buffer, file_name="Lecture_Notes_Presentation.pptx", mime="application/vnd.openxmlformats-officedocument.presentationml.presentation" ) if __name__ == "__main__": main()