abhishekjoel commited on
Commit
02ba267
·
verified ·
1 Parent(s): e9b2818

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +79 -23
app.py CHANGED
@@ -4,6 +4,9 @@ import os
4
  from datetime import datetime
5
  import json
6
  from dotenv import load_dotenv
 
 
 
7
 
8
  # Load environment variables
9
  load_dotenv()
@@ -11,20 +14,74 @@ load_dotenv()
11
  # Initialize OpenAI client
12
  client = openai.OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
13
 
14
- def transcribe_audio(audio_file):
15
- """Transcribe audio using Whisper API with timestamps"""
16
- try:
17
- with open(audio_file, "rb") as audio:
18
- transcript = client.audio.transcriptions.create(
19
- model="whisper-1",
20
- file=audio,
21
- response_format="verbose_json",
22
- timestamp_granularities=["segment"]
23
- )
24
- return transcript
25
- except Exception as e:
26
- st.error(f"Error in transcription: {str(e)}")
27
- return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
  def format_timestamp(seconds):
30
  """Convert seconds to HH:MM:SS format"""
@@ -83,6 +140,8 @@ def main():
83
 
84
  if uploaded_file:
85
  st.audio(uploaded_file)
 
 
86
 
87
  if st.button("Generate Notes", type="primary", use_container_width=True):
88
  # Create tabs in the right column for different outputs
@@ -90,13 +149,12 @@ def main():
90
  tab1, tab2 = st.tabs(["📝 Transcript", "📋 Lesson Plan"])
91
 
92
  with st.spinner("Processing audio..."):
93
- # Save uploaded file temporarily
94
- temp_path = f"temp_audio_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
95
- with open(temp_path, "wb") as f:
96
- f.write(uploaded_file.getvalue())
 
97
 
98
- # Transcribe audio
99
- transcript_data = transcribe_audio(temp_path)
100
  if transcript_data:
101
  # Format transcript with timestamps
102
  formatted_transcript = format_transcript_with_timestamps(transcript_data)
@@ -126,9 +184,6 @@ def main():
126
  file_name=f"lesson_plan_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md",
127
  mime="text/markdown"
128
  )
129
-
130
- # Cleanup
131
- os.remove(temp_path)
132
 
133
  # Right column instructions when no file is uploaded
134
  if not uploaded_file:
@@ -142,6 +197,7 @@ def main():
142
  3. Provide downloadable versions of both
143
 
144
  Supported formats: MP3, WAV, M4A
 
145
  """)
146
 
147
  if __name__ == "__main__":
 
4
  from datetime import datetime
5
  import json
6
  from dotenv import load_dotenv
7
+ from pydub import AudioSegment
8
+ import tempfile
9
+ import math
10
 
11
  # Load environment variables
12
  load_dotenv()
 
14
  # Initialize OpenAI client
15
  client = openai.OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
16
 
17
+ # Constants
18
+ MAX_FILE_SIZE = 25 * 1024 * 1024 # 25MB in bytes
19
+ CHUNK_LENGTH = 10 * 60 * 1000 # 10 minutes in milliseconds
20
+
21
+ def process_audio_file(uploaded_file):
22
+ """Process and potentially chunk the audio file"""
23
+ # Create a temporary directory
24
+ with tempfile.TemporaryDirectory() as temp_dir:
25
+ # Save uploaded file
26
+ temp_input_path = os.path.join(temp_dir, "input_audio")
27
+ with open(temp_input_path, "wb") as f:
28
+ f.write(uploaded_file.getvalue())
29
+
30
+ # Load audio file
31
+ audio = AudioSegment.from_file(temp_input_path)
32
+
33
+ # If file is small enough, return it as is
34
+ if os.path.getsize(temp_input_path) <= MAX_FILE_SIZE:
35
+ return [temp_input_path]
36
+
37
+ # Otherwise, chunk the audio
38
+ chunks = []
39
+ total_length = len(audio)
40
+ num_chunks = math.ceil(total_length / CHUNK_LENGTH)
41
+
42
+ for i in range(num_chunks):
43
+ start_time = i * CHUNK_LENGTH
44
+ end_time = min((i + 1) * CHUNK_LENGTH, total_length)
45
+
46
+ chunk = audio[start_time:end_time]
47
+ chunk_path = os.path.join(temp_dir, f"chunk_{i}.mp3")
48
+ chunk.export(chunk_path, format="mp3", parameters=["-ac", "1"]) # Convert to mono
49
+ chunks.append(chunk_path)
50
+
51
+ return chunks
52
+
53
+ def transcribe_audio_chunks(chunks):
54
+ """Transcribe audio chunks and combine transcriptions"""
55
+ all_segments = []
56
+ current_time_offset = 0
57
+
58
+ for chunk_path in chunks:
59
+ try:
60
+ with open(chunk_path, "rb") as audio:
61
+ transcript = client.audio.transcriptions.create(
62
+ model="whisper-1",
63
+ file=audio,
64
+ response_format="verbose_json",
65
+ timestamp_granularities=["segment"]
66
+ )
67
+
68
+ # Adjust timestamps for this chunk
69
+ for segment in transcript.segments:
70
+ segment.start += current_time_offset
71
+ segment.end += current_time_offset
72
+ all_segments.extend(transcript.segments)
73
+
74
+ # Update time offset for next chunk
75
+ current_time_offset += len(AudioSegment.from_file(chunk_path)) / 1000 # Convert to seconds
76
+
77
+ except Exception as e:
78
+ st.error(f"Error in transcription: {str(e)}")
79
+ return None
80
+
81
+ # Combine all transcriptions
82
+ full_transcript = transcript
83
+ full_transcript.segments = all_segments
84
+ return full_transcript
85
 
86
  def format_timestamp(seconds):
87
  """Convert seconds to HH:MM:SS format"""
 
140
 
141
  if uploaded_file:
142
  st.audio(uploaded_file)
143
+ file_size = uploaded_file.size / (1024 * 1024) # Convert to MB
144
+ st.info(f"File size: {file_size:.2f} MB")
145
 
146
  if st.button("Generate Notes", type="primary", use_container_width=True):
147
  # Create tabs in the right column for different outputs
 
149
  tab1, tab2 = st.tabs(["📝 Transcript", "📋 Lesson Plan"])
150
 
151
  with st.spinner("Processing audio..."):
152
+ # Process and potentially chunk the audio file
153
+ chunks = process_audio_file(uploaded_file)
154
+
155
+ # Transcribe chunks
156
+ transcript_data = transcribe_audio_chunks(chunks)
157
 
 
 
158
  if transcript_data:
159
  # Format transcript with timestamps
160
  formatted_transcript = format_transcript_with_timestamps(transcript_data)
 
184
  file_name=f"lesson_plan_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md",
185
  mime="text/markdown"
186
  )
 
 
 
187
 
188
  # Right column instructions when no file is uploaded
189
  if not uploaded_file:
 
197
  3. Provide downloadable versions of both
198
 
199
  Supported formats: MP3, WAV, M4A
200
+ Note: Large files will be automatically processed in chunks.
201
  """)
202
 
203
  if __name__ == "__main__":