Spaces:
Sleeping
Sleeping
File size: 10,174 Bytes
fd9acca 8ddbcfc fd9acca a740371 fd9acca a740371 fd9acca a740371 fd9acca a740371 fd9acca a740371 fd9acca a740371 fd9acca 2927dfd a740371 fd9acca af29e2b fd9acca af29e2b fd9acca a740371 fd9acca a740371 fd9acca af29e2b 1c5b3b4 fd9acca be22bcb 1c5b3b4 fd9acca be22bcb a740371 fd9acca be22bcb fd9acca be22bcb 6d0e017 be22bcb 6d0e017 be22bcb 6d0e017 8ddbcfc fd9acca af29e2b 30d361d af29e2b fd9acca a740371 1c5b3b4 86736b4 a740371 fd9acca a740371 fd9acca a740371 fd9acca a740371 fd9acca 090f602 a740371 fd9acca a740371 1c5b3b4 a740371 fd9acca a740371 1c5b3b4 a740371 fd9acca a740371 1c5b3b4 a740371 1c5b3b4 a740371 1c5b3b4 090f602 6d0e017 090f602 6d0e017 4230ffa 8ddbcfc 6d0e017 8ddbcfc 6d0e017 8ddbcfc 090f602 fd9acca 8ddbcfc fd9acca be22bcb | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 | import os
import openai
import streamlit as st
import io
from pydub import AudioSegment
from youtube_transcript_api import YouTubeTranscriptApi
import PyPDF2
from pptx import Presentation
# Set OpenAI API key
openai.api_key = os.getenv('OPENAI_API_KEY')
# Function to transcribe audio using OpenAI Whisper
def transcribe_audio(audio_file):
try:
# Load audio file
audio = AudioSegment.from_file(audio_file)
# Convert to WAV
buffer = io.BytesIO()
audio.export(buffer, format="wav")
buffer.seek(0)
# Set name attribute
buffer.name = "audio.wav"
# Transcribe audio
response = openai.Audio.transcribe(
"whisper-1",
file=buffer,
response_format="verbose_json"
)
return response
except Exception as e:
st.error(f"Error during transcription: {str(e)}")
return None
# Function to extract text from PDF and split it into chunks
def extract_text_from_pdf(pdf_file):
reader = PyPDF2.PdfReader(io.BytesIO(pdf_file.read()))
text = ""
for page in reader.pages:
text += page.extract_text() + "\n"
# Split text into chunks of approximately 1500 words each
words = text.split()
chunks = [" ".join(words[i:i + 1500]) for i in range(0, len(words), 1500)]
return chunks
# Function to get YouTube transcript
def get_youtube_transcript(url):
try:
# Extract video ID from URL
if "watch?v=" in url:
video_id = url.split("watch?v=")[1].split("&")[0]
elif "youtu.be/" in url:
video_id = url.split("youtu.be/")[1].split("?")[0]
else:
st.error("Invalid YouTube URL.")
return None
# Fetch transcript
transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
# Choose a transcript
transcript = transcript_list.find_transcript(['en'])
# Fetch the actual transcript data
transcript_data = transcript.fetch()
# Combine transcript texts
transcription_text = "\n".join([entry['text'] for entry in transcript_data])
return transcription_text
except Exception as e:
st.error(f"Error fetching YouTube transcript: {str(e)}")
return None
# Function to generate notes for each chunk
def generate_notes(text):
prompt = f"Create comprehensive notes in bullet points from the following text:\n\n{text}"
response = openai.ChatCompletion.create(
model='gpt-3.5-turbo',
messages=[{'role': 'user', 'content': prompt}],
max_tokens=1000,
)
return response.choices[0].message.content.strip()
# Function to generate additional sections
def generate_section(title, text):
prompt = f"Generate a section titled '{title}' with 3-6 sentences based on the following text:\n\n{text}"
response = openai.ChatCompletion.create(
model='gpt-3.5-turbo',
messages=[{'role': 'user', 'content': prompt}],
max_tokens=500,
)
return response.choices[0].message.content.strip()
# Function to create PowerPoint presentation
def create_presentation(summary, key_concepts, key_takeaways, case_studies, glossary, faqs):
prs = Presentation()
# Add title slide
slide = prs.slides.add_slide(prs.slide_layouts[0])
title = slide.shapes.title
subtitle = slide.placeholders[1]
title.text = "Lecture Notes"
subtitle.text = "Generated by AI Notes Generation Bot"
# Add slides for each section
def add_slide(title, content):
slide = prs.slides.add_slide(prs.slide_layouts[1])
slide.shapes.title.text = title
textbox = slide.placeholders[1]
textbox.text = content
add_slide("Summary", summary)
add_slide("Key Concepts", key_concepts)
add_slide("Key Takeaways", key_takeaways)
add_slide("Case Studies/Examples", case_studies)
add_slide("Glossary", glossary)
add_slide("FAQs", faqs)
# Save the presentation to a BytesIO object
ppt_buffer = io.BytesIO()
prs.save(ppt_buffer)
ppt_buffer.seek(0)
return ppt_buffer
# Main app
def main():
st.set_page_config(layout="wide")
# Add custom CSS for gradient background
st.markdown("""
<style>
.stApp {
background: linear-gradient(180deg,
rgba(64,224,208,0.7) 0%,
rgba(32,112,104,0.4) 35%,
rgba(0,0,0,0) 100%
);
}
.css-1d391kg {
background: none;
}
.stMarkdown {
color: #ffffff;
}
.css-1y4p8pa {
max-width: 100%;
padding: 2rem;
}
div[data-testid="stSidebarContent"] {
background-color: rgba(255,255,255,0.1);
}
.stTextArea textarea {
background-color: #000000 !important;
color: #ffffff !important;
}
.stButton button {
background-color: #40E0D0;
color: black;
}
.stButton button:hover {
background-color: #48D1CC;
color: black;
}
h1, h2, h3, h4, h5, h6 {
color: white !important;
}
.css-184tjsw p {
color: white !important;
}
.stTextInput input {
color: white !important;
background-color: rgba(0, 0, 0, 0.5) !important;
}
</style>
""", unsafe_allow_html=True)
st.markdown("<h1 style='text-align: center;'>AI Notes Generation Bot 🤖</h1>", unsafe_allow_html=True)
# Left sidebar for upload options
st.sidebar.header("Upload your file:")
input_type = st.sidebar.selectbox("Select Input Type", ["Audio File", "PDF Document", "YouTube URL"])
st.sidebar.markdown("### Steps to Use the Tool:")
st.sidebar.markdown("1. Upload an audio file (25MB max), PDF, or YouTube URL for notes.")
st.sidebar.markdown("2. Click on 'Generate Notes' to get AI-generated notes.")
st.sidebar.markdown("3. Use the 'Download Presentation' button to save your notes locally.")
# File uploader or URL input based on selected type
audio_input = pdf_input = youtube_input = None
if input_type == "Audio File":
audio_input = st.sidebar.file_uploader("Upload audio file", type=["mp3", "wav"], key="audio", help="Supports mp3 and wav formats up to 25MB")
elif input_type == "PDF Document":
pdf_input = st.sidebar.file_uploader("Upload PDF document", type=["pdf"], key="pdf", help="")
elif input_type == "YouTube URL":
youtube_input = st.sidebar.text_input("Enter YouTube URL (must have subtitles enabled)", key="youtube")
# Place Generate Notes button in the sidebar
if st.sidebar.button("Generate Notes", key="generate_notes"):
transcription_text = ""
if input_type == "Audio File" and audio_input:
transcription = transcribe_audio(audio_input)
if transcription:
transcription_text = "\n".join([seg['text'] for seg in transcription['segments']])
else:
st.error("Transcription failed.")
elif input_type == "PDF Document" and pdf_input:
chunks = extract_text_from_pdf(pdf_input)
summaries = [generate_notes(chunk) for chunk in chunks]
transcription_text = "\n".join(summaries)
elif input_type == "YouTube URL" and youtube_input:
transcription_text = get_youtube_transcript(youtube_input)
if not transcription_text:
st.error("Failed to retrieve YouTube transcript.")
else:
st.error("Please provide valid input.")
if transcription_text:
st.session_state['summary'] = transcription_text
# Display generated notes
if 'summary' in st.session_state:
st.markdown("---")
st.subheader("Generated Notes")
summary = st.session_state['summary']
st.markdown("### Summary:")
st.markdown(f"<div style='background-color: black; color: white; padding: 10px; border-radius: 5px;'>" +
f"<p>{summary}</p>" +
"</div>", unsafe_allow_html=True)
st.markdown("### Key Concepts:")
key_concepts = generate_section("Key Concepts", summary)
st.markdown(f"<div style='background-color: black; color: white; padding: 10px; border-radius: 5px;'>" +
f"<p>{key_concepts}</p>" +
"</div>", unsafe_allow_html=True)
st.markdown("### Key Takeaways:")
key_takeaways = generate_section("Key Takeaways", summary)
st.markdown(f"<div style='background-color: black; color: white; padding: 10px; border-radius: 5px;'>" +
f"<p>{key_takeaways}</p>" +
"</div>", unsafe_allow_html=True)
case_studies = generate_section("Case Studies/Examples", summary)
st.markdown(f"<div style='background-color: black; color: white; padding: 10px; border-radius: 5px;'>"
f"<p>{case_studies}</p>"
"</div>", unsafe_allow_html=True)
st.markdown("### Glossary:")
glossary = generate_section("Glossary", summary)
st.markdown(f"<div style='background-color: black; color: white; padding: 10px; border-radius: 5px;'>"
f"<p>{glossary}</p>"
"</div>", unsafe_allow_html=True)
st.markdown("### FAQs:")
faqs = generate_section("FAQs", summary)
st.markdown(f"<div style='background-color: black; color: white; padding: 10px; border-radius: 5px;'>"
f"<p>{faqs}</p>"
"</div>", unsafe_allow_html=True)
# Option to download the PowerPoint presentation
ppt_buffer = create_presentation(summary, key_concepts, key_takeaways, case_studies, glossary, faqs)
st.download_button(
label="Download Presentation",
data=ppt_buffer,
file_name="Lecture_Notes_Presentation.pptx",
mime="application/vnd.openxmlformats-officedocument.presentationml.presentation"
)
if __name__ == "__main__":
main() |