AI_notes / app.py
abhishekjoel's picture
Update app.py
be22bcb verified
import os
import openai
import streamlit as st
import io
from pydub import AudioSegment
from youtube_transcript_api import YouTubeTranscriptApi
import PyPDF2
from pptx import Presentation
# Set OpenAI API key
openai.api_key = os.getenv('OPENAI_API_KEY')
# Function to transcribe audio using OpenAI Whisper
def transcribe_audio(audio_file):
try:
# Load audio file
audio = AudioSegment.from_file(audio_file)
# Convert to WAV
buffer = io.BytesIO()
audio.export(buffer, format="wav")
buffer.seek(0)
# Set name attribute
buffer.name = "audio.wav"
# Transcribe audio
response = openai.Audio.transcribe(
"whisper-1",
file=buffer,
response_format="verbose_json"
)
return response
except Exception as e:
st.error(f"Error during transcription: {str(e)}")
return None
# Function to extract text from PDF and split it into chunks
def extract_text_from_pdf(pdf_file):
reader = PyPDF2.PdfReader(io.BytesIO(pdf_file.read()))
text = ""
for page in reader.pages:
text += page.extract_text() + "\n"
# Split text into chunks of approximately 1500 words each
words = text.split()
chunks = [" ".join(words[i:i + 1500]) for i in range(0, len(words), 1500)]
return chunks
# Function to get YouTube transcript
def get_youtube_transcript(url):
try:
# Extract video ID from URL
if "watch?v=" in url:
video_id = url.split("watch?v=")[1].split("&")[0]
elif "youtu.be/" in url:
video_id = url.split("youtu.be/")[1].split("?")[0]
else:
st.error("Invalid YouTube URL.")
return None
# Fetch transcript
transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
# Choose a transcript
transcript = transcript_list.find_transcript(['en'])
# Fetch the actual transcript data
transcript_data = transcript.fetch()
# Combine transcript texts
transcription_text = "\n".join([entry['text'] for entry in transcript_data])
return transcription_text
except Exception as e:
st.error(f"Error fetching YouTube transcript: {str(e)}")
return None
# Function to generate notes for each chunk
def generate_notes(text):
prompt = f"Create comprehensive notes in bullet points from the following text:\n\n{text}"
response = openai.ChatCompletion.create(
model='gpt-3.5-turbo',
messages=[{'role': 'user', 'content': prompt}],
max_tokens=1000,
)
return response.choices[0].message.content.strip()
# Function to generate additional sections
def generate_section(title, text):
prompt = f"Generate a section titled '{title}' with 3-6 sentences based on the following text:\n\n{text}"
response = openai.ChatCompletion.create(
model='gpt-3.5-turbo',
messages=[{'role': 'user', 'content': prompt}],
max_tokens=500,
)
return response.choices[0].message.content.strip()
# Function to create PowerPoint presentation
def create_presentation(summary, key_concepts, key_takeaways, case_studies, glossary, faqs):
prs = Presentation()
# Add title slide
slide = prs.slides.add_slide(prs.slide_layouts[0])
title = slide.shapes.title
subtitle = slide.placeholders[1]
title.text = "Lecture Notes"
subtitle.text = "Generated by AI Notes Generation Bot"
# Add slides for each section
def add_slide(title, content):
slide = prs.slides.add_slide(prs.slide_layouts[1])
slide.shapes.title.text = title
textbox = slide.placeholders[1]
textbox.text = content
add_slide("Summary", summary)
add_slide("Key Concepts", key_concepts)
add_slide("Key Takeaways", key_takeaways)
add_slide("Case Studies/Examples", case_studies)
add_slide("Glossary", glossary)
add_slide("FAQs", faqs)
# Save the presentation to a BytesIO object
ppt_buffer = io.BytesIO()
prs.save(ppt_buffer)
ppt_buffer.seek(0)
return ppt_buffer
# Main app
def main():
st.set_page_config(layout="wide")
# Add custom CSS for gradient background
st.markdown("""
<style>
.stApp {
background: linear-gradient(180deg,
rgba(64,224,208,0.7) 0%,
rgba(32,112,104,0.4) 35%,
rgba(0,0,0,0) 100%
);
}
.css-1d391kg {
background: none;
}
.stMarkdown {
color: #ffffff;
}
.css-1y4p8pa {
max-width: 100%;
padding: 2rem;
}
div[data-testid="stSidebarContent"] {
background-color: rgba(255,255,255,0.1);
}
.stTextArea textarea {
background-color: #000000 !important;
color: #ffffff !important;
}
.stButton button {
background-color: #40E0D0;
color: black;
}
.stButton button:hover {
background-color: #48D1CC;
color: black;
}
h1, h2, h3, h4, h5, h6 {
color: white !important;
}
.css-184tjsw p {
color: white !important;
}
.stTextInput input {
color: white !important;
background-color: rgba(0, 0, 0, 0.5) !important;
}
</style>
""", unsafe_allow_html=True)
st.markdown("<h1 style='text-align: center;'>AI Notes Generation Bot 🤖</h1>", unsafe_allow_html=True)
# Left sidebar for upload options
st.sidebar.header("Upload your file:")
input_type = st.sidebar.selectbox("Select Input Type", ["Audio File", "PDF Document", "YouTube URL"])
st.sidebar.markdown("### Steps to Use the Tool:")
st.sidebar.markdown("1. Upload an audio file (25MB max), PDF, or YouTube URL for notes.")
st.sidebar.markdown("2. Click on 'Generate Notes' to get AI-generated notes.")
st.sidebar.markdown("3. Use the 'Download Presentation' button to save your notes locally.")
# File uploader or URL input based on selected type
audio_input = pdf_input = youtube_input = None
if input_type == "Audio File":
audio_input = st.sidebar.file_uploader("Upload audio file", type=["mp3", "wav"], key="audio", help="Supports mp3 and wav formats up to 25MB")
elif input_type == "PDF Document":
pdf_input = st.sidebar.file_uploader("Upload PDF document", type=["pdf"], key="pdf", help="")
elif input_type == "YouTube URL":
youtube_input = st.sidebar.text_input("Enter YouTube URL (must have subtitles enabled)", key="youtube")
# Place Generate Notes button in the sidebar
if st.sidebar.button("Generate Notes", key="generate_notes"):
transcription_text = ""
if input_type == "Audio File" and audio_input:
transcription = transcribe_audio(audio_input)
if transcription:
transcription_text = "\n".join([seg['text'] for seg in transcription['segments']])
else:
st.error("Transcription failed.")
elif input_type == "PDF Document" and pdf_input:
chunks = extract_text_from_pdf(pdf_input)
summaries = [generate_notes(chunk) for chunk in chunks]
transcription_text = "\n".join(summaries)
elif input_type == "YouTube URL" and youtube_input:
transcription_text = get_youtube_transcript(youtube_input)
if not transcription_text:
st.error("Failed to retrieve YouTube transcript.")
else:
st.error("Please provide valid input.")
if transcription_text:
st.session_state['summary'] = transcription_text
# Display generated notes
if 'summary' in st.session_state:
st.markdown("---")
st.subheader("Generated Notes")
summary = st.session_state['summary']
st.markdown("### Summary:")
st.markdown(f"<div style='background-color: black; color: white; padding: 10px; border-radius: 5px;'>" +
f"<p>{summary}</p>" +
"</div>", unsafe_allow_html=True)
st.markdown("### Key Concepts:")
key_concepts = generate_section("Key Concepts", summary)
st.markdown(f"<div style='background-color: black; color: white; padding: 10px; border-radius: 5px;'>" +
f"<p>{key_concepts}</p>" +
"</div>", unsafe_allow_html=True)
st.markdown("### Key Takeaways:")
key_takeaways = generate_section("Key Takeaways", summary)
st.markdown(f"<div style='background-color: black; color: white; padding: 10px; border-radius: 5px;'>" +
f"<p>{key_takeaways}</p>" +
"</div>", unsafe_allow_html=True)
case_studies = generate_section("Case Studies/Examples", summary)
st.markdown(f"<div style='background-color: black; color: white; padding: 10px; border-radius: 5px;'>"
f"<p>{case_studies}</p>"
"</div>", unsafe_allow_html=True)
st.markdown("### Glossary:")
glossary = generate_section("Glossary", summary)
st.markdown(f"<div style='background-color: black; color: white; padding: 10px; border-radius: 5px;'>"
f"<p>{glossary}</p>"
"</div>", unsafe_allow_html=True)
st.markdown("### FAQs:")
faqs = generate_section("FAQs", summary)
st.markdown(f"<div style='background-color: black; color: white; padding: 10px; border-radius: 5px;'>"
f"<p>{faqs}</p>"
"</div>", unsafe_allow_html=True)
# Option to download the PowerPoint presentation
ppt_buffer = create_presentation(summary, key_concepts, key_takeaways, case_studies, glossary, faqs)
st.download_button(
label="Download Presentation",
data=ppt_buffer,
file_name="Lecture_Notes_Presentation.pptx",
mime="application/vnd.openxmlformats-officedocument.presentationml.presentation"
)
if __name__ == "__main__":
main()