Spaces:
Sleeping
Sleeping
| import os | |
| import openai | |
| import streamlit as st | |
| import io | |
| from pydub import AudioSegment | |
| from youtube_transcript_api import YouTubeTranscriptApi | |
| import PyPDF2 | |
| from pptx import Presentation | |
| # Set OpenAI API key | |
| openai.api_key = os.getenv('OPENAI_API_KEY') | |
| # Function to transcribe audio using OpenAI Whisper | |
| def transcribe_audio(audio_file): | |
| try: | |
| # Load audio file | |
| audio = AudioSegment.from_file(audio_file) | |
| # Convert to WAV | |
| buffer = io.BytesIO() | |
| audio.export(buffer, format="wav") | |
| buffer.seek(0) | |
| # Set name attribute | |
| buffer.name = "audio.wav" | |
| # Transcribe audio | |
| response = openai.Audio.transcribe( | |
| "whisper-1", | |
| file=buffer, | |
| response_format="verbose_json" | |
| ) | |
| return response | |
| except Exception as e: | |
| st.error(f"Error during transcription: {str(e)}") | |
| return None | |
| # Function to extract text from PDF and split it into chunks | |
| def extract_text_from_pdf(pdf_file): | |
| reader = PyPDF2.PdfReader(io.BytesIO(pdf_file.read())) | |
| text = "" | |
| for page in reader.pages: | |
| text += page.extract_text() + "\n" | |
| # Split text into chunks of approximately 1500 words each | |
| words = text.split() | |
| chunks = [" ".join(words[i:i + 1500]) for i in range(0, len(words), 1500)] | |
| return chunks | |
| # Function to get YouTube transcript | |
| def get_youtube_transcript(url): | |
| try: | |
| # Extract video ID from URL | |
| if "watch?v=" in url: | |
| video_id = url.split("watch?v=")[1].split("&")[0] | |
| elif "youtu.be/" in url: | |
| video_id = url.split("youtu.be/")[1].split("?")[0] | |
| else: | |
| st.error("Invalid YouTube URL.") | |
| return None | |
| # Fetch transcript | |
| transcript_list = YouTubeTranscriptApi.list_transcripts(video_id) | |
| # Choose a transcript | |
| transcript = transcript_list.find_transcript(['en']) | |
| # Fetch the actual transcript data | |
| transcript_data = transcript.fetch() | |
| # Combine transcript texts | |
| transcription_text = "\n".join([entry['text'] for entry in transcript_data]) | |
| return transcription_text | |
| except Exception as e: | |
| st.error(f"Error fetching YouTube transcript: {str(e)}") | |
| return None | |
| # Function to generate notes for each chunk | |
| def generate_notes(text): | |
| prompt = f"Create comprehensive notes in bullet points from the following text:\n\n{text}" | |
| response = openai.ChatCompletion.create( | |
| model='gpt-3.5-turbo', | |
| messages=[{'role': 'user', 'content': prompt}], | |
| max_tokens=1000, | |
| ) | |
| return response.choices[0].message.content.strip() | |
| # Function to generate additional sections | |
| def generate_section(title, text): | |
| prompt = f"Generate a section titled '{title}' with 3-6 sentences based on the following text:\n\n{text}" | |
| response = openai.ChatCompletion.create( | |
| model='gpt-3.5-turbo', | |
| messages=[{'role': 'user', 'content': prompt}], | |
| max_tokens=500, | |
| ) | |
| return response.choices[0].message.content.strip() | |
| # Function to create PowerPoint presentation | |
| def create_presentation(summary, key_concepts, key_takeaways, case_studies, glossary, faqs): | |
| prs = Presentation() | |
| # Add title slide | |
| slide = prs.slides.add_slide(prs.slide_layouts[0]) | |
| title = slide.shapes.title | |
| subtitle = slide.placeholders[1] | |
| title.text = "Lecture Notes" | |
| subtitle.text = "Generated by AI Notes Generation Bot" | |
| # Add slides for each section | |
| def add_slide(title, content): | |
| slide = prs.slides.add_slide(prs.slide_layouts[1]) | |
| slide.shapes.title.text = title | |
| textbox = slide.placeholders[1] | |
| textbox.text = content | |
| add_slide("Summary", summary) | |
| add_slide("Key Concepts", key_concepts) | |
| add_slide("Key Takeaways", key_takeaways) | |
| add_slide("Case Studies/Examples", case_studies) | |
| add_slide("Glossary", glossary) | |
| add_slide("FAQs", faqs) | |
| # Save the presentation to a BytesIO object | |
| ppt_buffer = io.BytesIO() | |
| prs.save(ppt_buffer) | |
| ppt_buffer.seek(0) | |
| return ppt_buffer | |
| # Main app | |
| def main(): | |
| st.set_page_config(layout="wide") | |
| # Add custom CSS for gradient background | |
| st.markdown(""" | |
| <style> | |
| .stApp { | |
| background: linear-gradient(180deg, | |
| rgba(64,224,208,0.7) 0%, | |
| rgba(32,112,104,0.4) 35%, | |
| rgba(0,0,0,0) 100% | |
| ); | |
| } | |
| .css-1d391kg { | |
| background: none; | |
| } | |
| .stMarkdown { | |
| color: #ffffff; | |
| } | |
| .css-1y4p8pa { | |
| max-width: 100%; | |
| padding: 2rem; | |
| } | |
| div[data-testid="stSidebarContent"] { | |
| background-color: rgba(255,255,255,0.1); | |
| } | |
| .stTextArea textarea { | |
| background-color: #000000 !important; | |
| color: #ffffff !important; | |
| } | |
| .stButton button { | |
| background-color: #40E0D0; | |
| color: black; | |
| } | |
| .stButton button:hover { | |
| background-color: #48D1CC; | |
| color: black; | |
| } | |
| h1, h2, h3, h4, h5, h6 { | |
| color: white !important; | |
| } | |
| .css-184tjsw p { | |
| color: white !important; | |
| } | |
| .stTextInput input { | |
| color: white !important; | |
| background-color: rgba(0, 0, 0, 0.5) !important; | |
| } | |
| </style> | |
| """, unsafe_allow_html=True) | |
| st.markdown("<h1 style='text-align: center;'>AI Notes Generation Bot 🤖</h1>", unsafe_allow_html=True) | |
| # Left sidebar for upload options | |
| st.sidebar.header("Upload your file:") | |
| input_type = st.sidebar.selectbox("Select Input Type", ["Audio File", "PDF Document", "YouTube URL"]) | |
| st.sidebar.markdown("### Steps to Use the Tool:") | |
| st.sidebar.markdown("1. Upload an audio file (25MB max), PDF, or YouTube URL for notes.") | |
| st.sidebar.markdown("2. Click on 'Generate Notes' to get AI-generated notes.") | |
| st.sidebar.markdown("3. Use the 'Download Presentation' button to save your notes locally.") | |
| # File uploader or URL input based on selected type | |
| audio_input = pdf_input = youtube_input = None | |
| if input_type == "Audio File": | |
| audio_input = st.sidebar.file_uploader("Upload audio file", type=["mp3", "wav"], key="audio", help="Supports mp3 and wav formats up to 25MB") | |
| elif input_type == "PDF Document": | |
| pdf_input = st.sidebar.file_uploader("Upload PDF document", type=["pdf"], key="pdf", help="") | |
| elif input_type == "YouTube URL": | |
| youtube_input = st.sidebar.text_input("Enter YouTube URL (must have subtitles enabled)", key="youtube") | |
| # Place Generate Notes button in the sidebar | |
| if st.sidebar.button("Generate Notes", key="generate_notes"): | |
| transcription_text = "" | |
| if input_type == "Audio File" and audio_input: | |
| transcription = transcribe_audio(audio_input) | |
| if transcription: | |
| transcription_text = "\n".join([seg['text'] for seg in transcription['segments']]) | |
| else: | |
| st.error("Transcription failed.") | |
| elif input_type == "PDF Document" and pdf_input: | |
| chunks = extract_text_from_pdf(pdf_input) | |
| summaries = [generate_notes(chunk) for chunk in chunks] | |
| transcription_text = "\n".join(summaries) | |
| elif input_type == "YouTube URL" and youtube_input: | |
| transcription_text = get_youtube_transcript(youtube_input) | |
| if not transcription_text: | |
| st.error("Failed to retrieve YouTube transcript.") | |
| else: | |
| st.error("Please provide valid input.") | |
| if transcription_text: | |
| st.session_state['summary'] = transcription_text | |
| # Display generated notes | |
| if 'summary' in st.session_state: | |
| st.markdown("---") | |
| st.subheader("Generated Notes") | |
| summary = st.session_state['summary'] | |
| st.markdown("### Summary:") | |
| st.markdown(f"<div style='background-color: black; color: white; padding: 10px; border-radius: 5px;'>" + | |
| f"<p>{summary}</p>" + | |
| "</div>", unsafe_allow_html=True) | |
| st.markdown("### Key Concepts:") | |
| key_concepts = generate_section("Key Concepts", summary) | |
| st.markdown(f"<div style='background-color: black; color: white; padding: 10px; border-radius: 5px;'>" + | |
| f"<p>{key_concepts}</p>" + | |
| "</div>", unsafe_allow_html=True) | |
| st.markdown("### Key Takeaways:") | |
| key_takeaways = generate_section("Key Takeaways", summary) | |
| st.markdown(f"<div style='background-color: black; color: white; padding: 10px; border-radius: 5px;'>" + | |
| f"<p>{key_takeaways}</p>" + | |
| "</div>", unsafe_allow_html=True) | |
| case_studies = generate_section("Case Studies/Examples", summary) | |
| st.markdown(f"<div style='background-color: black; color: white; padding: 10px; border-radius: 5px;'>" | |
| f"<p>{case_studies}</p>" | |
| "</div>", unsafe_allow_html=True) | |
| st.markdown("### Glossary:") | |
| glossary = generate_section("Glossary", summary) | |
| st.markdown(f"<div style='background-color: black; color: white; padding: 10px; border-radius: 5px;'>" | |
| f"<p>{glossary}</p>" | |
| "</div>", unsafe_allow_html=True) | |
| st.markdown("### FAQs:") | |
| faqs = generate_section("FAQs", summary) | |
| st.markdown(f"<div style='background-color: black; color: white; padding: 10px; border-radius: 5px;'>" | |
| f"<p>{faqs}</p>" | |
| "</div>", unsafe_allow_html=True) | |
| # Option to download the PowerPoint presentation | |
| ppt_buffer = create_presentation(summary, key_concepts, key_takeaways, case_studies, glossary, faqs) | |
| st.download_button( | |
| label="Download Presentation", | |
| data=ppt_buffer, | |
| file_name="Lecture_Notes_Presentation.pptx", | |
| mime="application/vnd.openxmlformats-officedocument.presentationml.presentation" | |
| ) | |
| if __name__ == "__main__": | |
| main() |