abhishekjoel commited on
Commit
a961ef9
·
verified ·
1 Parent(s): 18e5f93

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -240
app.py CHANGED
@@ -1,253 +1,48 @@
1
  import streamlit as st
2
- import openai
3
  import os
4
- from datetime import datetime
5
- import json
6
- from dotenv import load_dotenv
7
- from pydub import AudioSegment
8
- import tempfile
9
- import math
10
- from pathlib import Path
11
- import shutil
12
-
13
- # Load environment variables
14
- load_dotenv()
15
-
16
- # Initialize OpenAI client
17
- client = openai.OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
18
 
19
- # Constants
20
- MAX_FILE_SIZE = 25 * 1024 * 1024 # 25MB in bytes
21
- CHUNK_LENGTH = 10 * 60 * 1000 # 10 minutes in milliseconds
22
 
23
- @st.cache_data
24
- def save_uploaded_file(uploaded_file):
25
- """Save uploaded file to a temporary directory and return the path"""
26
- try:
27
- # Create a temporary directory that persists
28
- temp_dir = tempfile.mkdtemp()
29
- # Get the file extension
30
- file_extension = Path(uploaded_file.name).suffix
31
- # Create full path with original extension
32
- temp_path = os.path.join(temp_dir, f"input_audio{file_extension}")
33
-
34
- # Save uploaded file
35
- with open(temp_path, "wb") as f:
36
- f.write(uploaded_file.getvalue())
37
-
38
- return temp_path, temp_dir
39
- except Exception as e:
40
- st.error(f"Error saving file: {str(e)}")
41
- return None, None
42
 
43
- def process_audio_file(file_path, temp_dir):
44
- """Process and potentially chunk the audio file"""
45
- try:
46
- # Load audio file
47
- audio = AudioSegment.from_file(file_path)
48
-
49
- # If file is small enough, return it as is
50
- if os.path.getsize(file_path) <= MAX_FILE_SIZE:
51
- return [file_path]
52
-
53
- # Otherwise, chunk the audio
54
- chunks = []
55
- total_length = len(audio)
56
- num_chunks = math.ceil(total_length / CHUNK_LENGTH)
57
-
58
- for i in range(num_chunks):
59
- start_time = i * CHUNK_LENGTH
60
- end_time = min((i + 1) * CHUNK_LENGTH, total_length)
61
-
62
- chunk = audio[start_time:end_time]
63
- chunk_path = os.path.join(temp_dir, f"chunk_{i}.mp3")
64
-
65
- # Export with specific parameters for better compatibility
66
- chunk = chunk.set_channels(1) # Convert to mono
67
- chunk = chunk.set_frame_rate(16000) # Set sample rate to 16kHz
68
- chunk.export(chunk_path, format="mp3", parameters=["-q:a", "0"])
69
-
70
- chunks.append(chunk_path)
71
-
72
- # Verify file exists and has size
73
- if not os.path.exists(chunk_path) or os.path.getsize(chunk_path) == 0:
74
- raise Exception(f"Failed to create chunk {i}")
75
-
76
- return chunks
77
- except Exception as e:
78
- st.error(f"Error processing audio: {str(e)}")
79
- return None
80
 
81
- def transcribe_audio_chunks(chunks):
82
- """Transcribe audio chunks and combine transcriptions"""
83
- all_segments = []
84
- current_time_offset = 0
85
-
86
- for i, chunk_path in enumerate(chunks):
87
- try:
88
- st.write(f"Processing chunk {i+1} of {len(chunks)}...")
89
-
90
- with open(chunk_path, "rb") as audio:
91
- transcript = client.audio.transcriptions.create(
92
- model="whisper-1",
93
- file=audio,
94
- response_format="verbose_json",
95
- timestamp_granularities=["segment"]
96
- )
97
-
98
- # Adjust timestamps for this chunk
99
- for segment in transcript.segments:
100
- segment.start += current_time_offset
101
- segment.end += current_time_offset
102
- all_segments.extend(transcript.segments)
103
-
104
- # Update time offset for next chunk
105
- current_time_offset += len(AudioSegment.from_file(chunk_path)) / 1000 # Convert to seconds
106
-
107
- except Exception as e:
108
- st.error(f"Error in transcription of chunk {i+1}: {str(e)}")
109
- return None
110
-
111
- # Combine all transcriptions
112
- if transcript and all_segments:
113
- full_transcript = transcript
114
- full_transcript.segments = all_segments
115
- return full_transcript
116
- return None
117
 
118
- def format_timestamp(seconds):
119
- """Convert seconds to HH:MM:SS format"""
120
- hours = int(seconds // 3600)
121
- minutes = int((seconds % 3600) // 60)
122
- seconds = int(seconds % 60)
123
- return f"{hours:02d}:{minutes:02d}:{seconds:02d}"
124
 
125
- def generate_lesson_plan(transcript):
126
- """Generate a structured lesson plan from the transcript"""
127
- try:
128
- system_prompt = """You are an educational content expert. Generate a detailed lesson plan from the lecture transcript.
129
- The lesson plan should include:
130
- 1. Main Topics
131
- 2. Subtopics
132
- 3. Key Learning Objectives
133
- 4. Important Concepts
134
- Format the output in markdown with clear hierarchical structure."""
135
 
136
- response = client.chat.completions.create(
137
- model="gpt-4-turbo-preview",
138
- messages=[
139
- {"role": "system", "content": system_prompt},
140
- {"role": "user", "content": f"Generate a lesson plan from this transcript:\n{transcript}"}
141
- ],
142
- temperature=0.3,
143
- max_tokens=2000
144
- )
145
-
146
- return response.choices[0].message.content
147
- except Exception as e:
148
- st.error(f"Error generating lesson plan: {str(e)}")
149
- return None
150
 
151
- def format_transcript_with_timestamps(transcript_data):
152
- """Format transcript with timestamps in a readable format"""
153
- formatted_text = "# Lecture Transcript with Timestamps\n\n"
154
- for segment in transcript_data.segments:
155
- start_time = format_timestamp(segment.start)
156
- formatted_text += f"**[{start_time}]** {segment.text}\n\n"
157
- return formatted_text
158
 
159
- def cleanup_files(temp_dir):
160
- """Safely clean up temporary files"""
161
- try:
162
- if temp_dir and os.path.exists(temp_dir):
163
- shutil.rmtree(temp_dir)
164
- except Exception as e:
165
- st.warning(f"Warning: Could not clean up temporary files: {str(e)}")
166
 
167
- # Streamlit UI
168
- def main():
169
- st.set_page_config(page_title="Lecture Notes Generator", layout="wide")
170
-
171
- st.title("🎓 Lecture Notes Generator")
172
-
173
- # Create two columns with custom widths
174
- col1, col2 = st.columns([1, 3])
175
-
176
- # Left column for upload (smaller)
177
- with col1:
178
- st.header("Upload Recording")
179
- uploaded_file = st.file_uploader("Choose an audio file", type=['mp3', 'wav', 'm4a'])
180
-
181
- if uploaded_file:
182
- st.audio(uploaded_file)
183
- file_size = uploaded_file.size / (1024 * 1024) # Convert to MB
184
- st.info(f"File size: {file_size:.2f} MB")
185
-
186
- if st.button("Generate Notes", type="primary", use_container_width=True):
187
- # Create tabs in the right column for different outputs
188
- with col2:
189
- tab1, tab2 = st.tabs(["📝 Transcript", "📋 Lesson Plan"])
190
-
191
- with st.spinner("Processing audio..."):
192
- # Save uploaded file and get temporary paths
193
- temp_path, temp_dir = save_uploaded_file(uploaded_file)
194
-
195
- if temp_path and temp_dir:
196
- try:
197
- # Process and potentially chunk the audio file
198
- chunks = process_audio_file(temp_path, temp_dir)
199
-
200
- if chunks:
201
- # Transcribe chunks
202
- transcript_data = transcribe_audio_chunks(chunks)
203
-
204
- if transcript_data:
205
- # Format transcript with timestamps
206
- formatted_transcript = format_transcript_with_timestamps(transcript_data)
207
-
208
- # Generate lesson plan
209
- lesson_plan = generate_lesson_plan(transcript_data.text)
210
-
211
- # Display transcript in first tab
212
- with tab1:
213
- st.markdown(formatted_transcript)
214
- # Download button for transcript
215
- st.download_button(
216
- label="Download Transcript",
217
- data=formatted_transcript,
218
- file_name=f"transcript_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md",
219
- mime="text/markdown"
220
- )
221
-
222
- # Display lesson plan in second tab
223
- with tab2:
224
- if lesson_plan:
225
- st.markdown(lesson_plan)
226
- # Download button for lesson plan
227
- st.download_button(
228
- label="Download Lesson Plan",
229
- data=lesson_plan,
230
- file_name=f"lesson_plan_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md",
231
- mime="text/markdown"
232
- )
233
- finally:
234
- # Clean up temporary files
235
- cleanup_files(temp_dir)
236
-
237
- # Right column instructions when no file is uploaded
238
- if not uploaded_file:
239
- with col2:
240
- st.info("""
241
- 👈 Start by uploading an audio file on the left side.
242
-
243
- The system will automatically:
244
- 1. Transcribe the lecture with timestamps
245
- 2. Generate a structured lesson plan
246
- 3. Provide downloadable versions of both
247
-
248
- Supported formats: MP3, WAV, M4A
249
- Note: Large files will be automatically processed in chunks.
250
- """)
251
 
252
- if __name__ == "__main__":
253
- main()
 
 
 
1
  import streamlit as st
2
+ from utils import split_audio, transcribe_audio, generate_lesson_plan
3
  import os
4
+ import openai
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
+ # Set up OpenAI API key
7
+ openai.api_key = os.getenv("OPENAI_API_KEY")
 
8
 
9
+ st.title("Lecture Notes Generator")
10
+ st.write("Upload an audio recording of the lecture.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
+ # Create a two-column layout
13
+ col1, col2 = st.columns([1, 2])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
+ with col1:
16
+ # File upload for audio
17
+ audio_file = st.file_uploader("Choose an audio file (max 25MB)", type=["mp3", "wav"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
+ if st.button("Generate Notes"):
20
+ if audio_file is not None:
21
+ # Save the uploaded file
22
+ with open("uploaded_audio.mp3", "wb") as f:
23
+ f.write(audio_file.getbuffer())
 
24
 
25
+ # Split audio into chunks
26
+ chunks = split_audio("uploaded_audio.mp3")
 
 
 
 
 
 
 
 
27
 
28
+ # Transcribe audio
29
+ transcriptions, timestamps = transcribe_audio(chunks)
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
+ # Generate lesson plan from the transcription
32
+ lesson_plan = generate_lesson_plan(transcriptions)
 
 
 
 
 
33
 
34
+ # Display results in the second column
35
+ with col2:
36
+ st.subheader("Transcription with Timestamps")
37
+ for ts, text in zip(timestamps, transcriptions):
38
+ st.write(f"{ts}: {text}")
 
 
39
 
40
+ st.subheader("Generated Lesson Plan")
41
+ st.markdown(lesson_plan)
42
+ else:
43
+ st.error("Please upload an audio file.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
 
45
+ with col2:
46
+ # Initially empty
47
+ st.subheader("Lecture Notes and Lesson Plan")
48
+ st.write("Upload an audio file to generate notes.")