abhishekjoel commited on
Commit
9ccf4ff
·
verified ·
1 Parent(s): 03f8f22

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +190 -28
app.py CHANGED
@@ -10,6 +10,16 @@ import math
10
  from pathlib import Path
11
  import shutil
12
 
 
 
 
 
 
 
 
 
 
 
13
  @st.cache_data
14
  def save_uploaded_file(uploaded_file):
15
  """Save uploaded file to a temporary directory and return the path"""
@@ -18,44 +28,23 @@ def save_uploaded_file(uploaded_file):
18
  temp_dir = tempfile.mkdtemp()
19
  # Get the file extension
20
  file_extension = Path(uploaded_file.name).suffix
21
- if not file_extension: # If no extension, default to .wav
22
- file_extension = '.wav'
23
  # Create full path with original extension
24
  temp_path = os.path.join(temp_dir, f"input_audio{file_extension}")
25
 
26
  # Save uploaded file
27
  with open(temp_path, "wb") as f:
28
  f.write(uploaded_file.getvalue())
29
-
30
- # Verify file exists and has content
31
- if not os.path.exists(temp_path):
32
- raise FileNotFoundError(f"Failed to save file at {temp_path}")
33
- if os.path.getsize(temp_path) == 0:
34
- raise ValueError("Saved file is empty")
35
 
36
- st.debug(f"File saved successfully at: {temp_path}")
37
  return temp_path, temp_dir
38
  except Exception as e:
39
  st.error(f"Error saving file: {str(e)}")
40
- if temp_dir and os.path.exists(temp_dir):
41
- shutil.rmtree(temp_dir)
42
  return None, None
43
 
44
  def process_audio_file(file_path, temp_dir):
45
  """Process and potentially chunk the audio file"""
46
  try:
47
- # Verify file exists before processing
48
- if not os.path.exists(file_path):
49
- raise FileNotFoundError(f"Audio file not found at: {file_path}")
50
-
51
- st.debug(f"Processing audio file: {file_path}")
52
-
53
- # Load audio file with explicit format
54
- try:
55
- audio = AudioSegment.from_file(file_path, format=Path(file_path).suffix[1:])
56
- except:
57
- # Fallback to automatic format detection
58
- audio = AudioSegment.from_file(file_path)
59
 
60
  # If file is small enough, return it as is
61
  if os.path.getsize(file_path) <= MAX_FILE_SIZE:
@@ -78,14 +67,187 @@ def process_audio_file(file_path, temp_dir):
78
  chunk = chunk.set_frame_rate(16000) # Set sample rate to 16kHz
79
  chunk.export(chunk_path, format="mp3", parameters=["-q:a", "0"])
80
 
81
- # Verify chunk was created successfully
 
 
82
  if not os.path.exists(chunk_path) or os.path.getsize(chunk_path) == 0:
83
  raise Exception(f"Failed to create chunk {i}")
84
-
85
- chunks.append(chunk_path)
86
- st.debug(f"Created chunk {i+1}/{num_chunks}")
87
 
88
  return chunks
89
  except Exception as e:
90
  st.error(f"Error processing audio: {str(e)}")
91
- return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  from pathlib import Path
11
  import shutil
12
 
13
+ # Load environment variables
14
+ load_dotenv()
15
+
16
+ # Initialize OpenAI client
17
+ client = openai.OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
18
+
19
+ # Constants
20
+ MAX_FILE_SIZE = 25 * 1024 * 1024 # 25MB in bytes
21
+ CHUNK_LENGTH = 10 * 60 * 1000 # 10 minutes in milliseconds
22
+
23
  @st.cache_data
24
  def save_uploaded_file(uploaded_file):
25
  """Save uploaded file to a temporary directory and return the path"""
 
28
  temp_dir = tempfile.mkdtemp()
29
  # Get the file extension
30
  file_extension = Path(uploaded_file.name).suffix
 
 
31
  # Create full path with original extension
32
  temp_path = os.path.join(temp_dir, f"input_audio{file_extension}")
33
 
34
  # Save uploaded file
35
  with open(temp_path, "wb") as f:
36
  f.write(uploaded_file.getvalue())
 
 
 
 
 
 
37
 
 
38
  return temp_path, temp_dir
39
  except Exception as e:
40
  st.error(f"Error saving file: {str(e)}")
 
 
41
  return None, None
42
 
43
  def process_audio_file(file_path, temp_dir):
44
  """Process and potentially chunk the audio file"""
45
  try:
46
+ # Load audio file
47
+ audio = AudioSegment.from_file(file_path)
 
 
 
 
 
 
 
 
 
 
48
 
49
  # If file is small enough, return it as is
50
  if os.path.getsize(file_path) <= MAX_FILE_SIZE:
 
67
  chunk = chunk.set_frame_rate(16000) # Set sample rate to 16kHz
68
  chunk.export(chunk_path, format="mp3", parameters=["-q:a", "0"])
69
 
70
+ chunks.append(chunk_path)
71
+
72
+ # Verify file exists and has size
73
  if not os.path.exists(chunk_path) or os.path.getsize(chunk_path) == 0:
74
  raise Exception(f"Failed to create chunk {i}")
 
 
 
75
 
76
  return chunks
77
  except Exception as e:
78
  st.error(f"Error processing audio: {str(e)}")
79
+ return None
80
+
81
+ def transcribe_audio_chunks(chunks):
82
+ """Transcribe audio chunks and combine transcriptions"""
83
+ all_segments = []
84
+ current_time_offset = 0
85
+
86
+ for i, chunk_path in enumerate(chunks):
87
+ try:
88
+ st.write(f"Processing chunk {i+1} of {len(chunks)}...")
89
+
90
+ with open(chunk_path, "rb") as audio:
91
+ transcript = client.audio.transcriptions.create(
92
+ model="whisper-1",
93
+ file=audio,
94
+ response_format="verbose_json",
95
+ timestamp_granularities=["segment"]
96
+ )
97
+
98
+ # Adjust timestamps for this chunk
99
+ for segment in transcript.segments:
100
+ segment.start += current_time_offset
101
+ segment.end += current_time_offset
102
+ all_segments.extend(transcript.segments)
103
+
104
+ # Update time offset for next chunk
105
+ current_time_offset += len(AudioSegment.from_file(chunk_path)) / 1000 # Convert to seconds
106
+
107
+ except Exception as e:
108
+ st.error(f"Error in transcription of chunk {i+1}: {str(e)}")
109
+ return None
110
+
111
+ # Combine all transcriptions
112
+ if transcript and all_segments:
113
+ full_transcript = transcript
114
+ full_transcript.segments = all_segments
115
+ return full_transcript
116
+ return None
117
+
118
+ def format_timestamp(seconds):
119
+ """Convert seconds to HH:MM:SS format"""
120
+ hours = int(seconds // 3600)
121
+ minutes = int((seconds % 3600) // 60)
122
+ seconds = int(seconds % 60)
123
+ return f"{hours:02d}:{minutes:02d}:{seconds:02d}"
124
+
125
+ def generate_lesson_plan(transcript):
126
+ """Generate a structured lesson plan from the transcript"""
127
+ try:
128
+ system_prompt = """You are an educational content expert. Generate a detailed lesson plan from the lecture transcript.
129
+ The lesson plan should include:
130
+ 1. Main Topics
131
+ 2. Subtopics
132
+ 3. Key Learning Objectives
133
+ 4. Important Concepts
134
+ Format the output in markdown with clear hierarchical structure."""
135
+
136
+ response = client.chat.completions.create(
137
+ model="gpt-4-turbo-preview",
138
+ messages=[
139
+ {"role": "system", "content": system_prompt},
140
+ {"role": "user", "content": f"Generate a lesson plan from this transcript:\n{transcript}"}
141
+ ],
142
+ temperature=0.3,
143
+ max_tokens=2000
144
+ )
145
+
146
+ return response.choices[0].message.content
147
+ except Exception as e:
148
+ st.error(f"Error generating lesson plan: {str(e)}")
149
+ return None
150
+
151
+ def format_transcript_with_timestamps(transcript_data):
152
+ """Format transcript with timestamps in a readable format"""
153
+ formatted_text = "# Lecture Transcript with Timestamps\n\n"
154
+ for segment in transcript_data.segments:
155
+ start_time = format_timestamp(segment.start)
156
+ formatted_text += f"**[{start_time}]** {segment.text}\n\n"
157
+ return formatted_text
158
+
159
+ def cleanup_files(temp_dir):
160
+ """Safely clean up temporary files"""
161
+ try:
162
+ if temp_dir and os.path.exists(temp_dir):
163
+ shutil.rmtree(temp_dir)
164
+ except Exception as e:
165
+ st.warning(f"Warning: Could not clean up temporary files: {str(e)}")
166
+
167
+ # Streamlit UI
168
+ def main():
169
+ st.set_page_config(page_title="Lecture Notes Generator", layout="wide")
170
+
171
+ st.title("🎓 Lecture Notes Generator")
172
+
173
+ # Create two columns with custom widths
174
+ col1, col2 = st.columns([1, 3])
175
+
176
+ # Left column for upload (smaller)
177
+ with col1:
178
+ st.header("Upload Recording")
179
+ uploaded_file = st.file_uploader("Choose an audio file", type=['mp3', 'wav', 'm4a'])
180
+
181
+ if uploaded_file:
182
+ st.audio(uploaded_file)
183
+ file_size = uploaded_file.size / (1024 * 1024) # Convert to MB
184
+ st.info(f"File size: {file_size:.2f} MB")
185
+
186
+ if st.button("Generate Notes", type="primary", use_container_width=True):
187
+ # Create tabs in the right column for different outputs
188
+ with col2:
189
+ tab1, tab2 = st.tabs(["📝 Transcript", "📋 Lesson Plan"])
190
+
191
+ with st.spinner("Processing audio..."):
192
+ # Save uploaded file and get temporary paths
193
+ temp_path, temp_dir = save_uploaded_file(uploaded_file)
194
+
195
+ if temp_path and temp_dir:
196
+ try:
197
+ # Process and potentially chunk the audio file
198
+ chunks = process_audio_file(temp_path, temp_dir)
199
+
200
+ if chunks:
201
+ # Transcribe chunks
202
+ transcript_data = transcribe_audio_chunks(chunks)
203
+
204
+ if transcript_data:
205
+ # Format transcript with timestamps
206
+ formatted_transcript = format_transcript_with_timestamps(transcript_data)
207
+
208
+ # Generate lesson plan
209
+ lesson_plan = generate_lesson_plan(transcript_data.text)
210
+
211
+ # Display transcript in first tab
212
+ with tab1:
213
+ st.markdown(formatted_transcript)
214
+ # Download button for transcript
215
+ st.download_button(
216
+ label="Download Transcript",
217
+ data=formatted_transcript,
218
+ file_name=f"transcript_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md",
219
+ mime="text/markdown"
220
+ )
221
+
222
+ # Display lesson plan in second tab
223
+ with tab2:
224
+ if lesson_plan:
225
+ st.markdown(lesson_plan)
226
+ # Download button for lesson plan
227
+ st.download_button(
228
+ label="Download Lesson Plan",
229
+ data=lesson_plan,
230
+ file_name=f"lesson_plan_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md",
231
+ mime="text/markdown"
232
+ )
233
+ finally:
234
+ # Clean up temporary files
235
+ cleanup_files(temp_dir)
236
+
237
+ # Right column instructions when no file is uploaded
238
+ if not uploaded_file:
239
+ with col2:
240
+ st.info("""
241
+ 👈 Start by uploading an audio file on the left side.
242
+
243
+ The system will automatically:
244
+ 1. Transcribe the lecture with timestamps
245
+ 2. Generate a structured lesson plan
246
+ 3. Provide downloadable versions of both
247
+
248
+ Supported formats: MP3, WAV, M4A
249
+ Note: Large files will be automatically processed in chunks.
250
+ """)
251
+
252
+ if __name__ == "__main__":
253
+ main()