Spaces:

abhishekjoel
/

Project_lecture_notes

Build error

App Files Files Community

abhishekjoel commited on Oct 24, 2024

Commit

02ba267

verified ·

1 Parent(s): e9b2818

Update app.py

Browse files

Files changed (1) hide show

app.py +79 -23

app.py CHANGED Viewed

@@ -4,6 +4,9 @@ import os
 from datetime import datetime
 import json
 from dotenv import load_dotenv
 # Load environment variables
 load_dotenv()
@@ -11,20 +14,74 @@ load_dotenv()
 # Initialize OpenAI client
 client = openai.OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
-def transcribe_audio(audio_file):
-    """Transcribe audio using Whisper API with timestamps"""
-    try:
-        with open(audio_file, "rb") as audio:
-            transcript = client.audio.transcriptions.create(
-                model="whisper-1",
-                file=audio,
-                response_format="verbose_json",
-                timestamp_granularities=["segment"]
-            )
-        return transcript
-    except Exception as e:
-        st.error(f"Error in transcription: {str(e)}")
-        return None
 def format_timestamp(seconds):
     """Convert seconds to HH:MM:SS format"""
@@ -83,6 +140,8 @@ def main():
         if uploaded_file:
             st.audio(uploaded_file)
             if st.button("Generate Notes", type="primary", use_container_width=True):
                 # Create tabs in the right column for different outputs
@@ -90,13 +149,12 @@ def main():
                     tab1, tab2 = st.tabs(["📝 Transcript", "📋 Lesson Plan"])
                     with st.spinner("Processing audio..."):
-                        # Save uploaded file temporarily
-                        temp_path = f"temp_audio_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
-                        with open(temp_path, "wb") as f:
-                            f.write(uploaded_file.getvalue())
-                        # Transcribe audio
-                        transcript_data = transcribe_audio(temp_path)
                         if transcript_data:
                             # Format transcript with timestamps
                             formatted_transcript = format_transcript_with_timestamps(transcript_data)
@@ -126,9 +184,6 @@ def main():
                                         file_name=f"lesson_plan_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md",
                                         mime="text/markdown"
                                     )
-                        # Cleanup
-                        os.remove(temp_path)
     # Right column instructions when no file is uploaded
     if not uploaded_file:
@@ -142,6 +197,7 @@ def main():
             3. Provide downloadable versions of both
             Supported formats: MP3, WAV, M4A
             """)
 if __name__ == "__main__":

 from datetime import datetime
 import json
 from dotenv import load_dotenv
+from pydub import AudioSegment
+import tempfile
+import math
 # Load environment variables
 load_dotenv()
 # Initialize OpenAI client
 client = openai.OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+# Constants
+MAX_FILE_SIZE = 25 * 1024 * 1024  # 25MB in bytes
+CHUNK_LENGTH = 10 * 60 * 1000  # 10 minutes in milliseconds
+def process_audio_file(uploaded_file):
+    """Process and potentially chunk the audio file"""
+    # Create a temporary directory
+    with tempfile.TemporaryDirectory() as temp_dir:
+        # Save uploaded file
+        temp_input_path = os.path.join(temp_dir, "input_audio")
+        with open(temp_input_path, "wb") as f:
+            f.write(uploaded_file.getvalue())
+        # Load audio file
+        audio = AudioSegment.from_file(temp_input_path)
+        # If file is small enough, return it as is
+        if os.path.getsize(temp_input_path) <= MAX_FILE_SIZE:
+            return [temp_input_path]
+        # Otherwise, chunk the audio
+        chunks = []
+        total_length = len(audio)
+        num_chunks = math.ceil(total_length / CHUNK_LENGTH)
+        for i in range(num_chunks):
+            start_time = i * CHUNK_LENGTH
+            end_time = min((i + 1) * CHUNK_LENGTH, total_length)
+            chunk = audio[start_time:end_time]
+            chunk_path = os.path.join(temp_dir, f"chunk_{i}.mp3")
+            chunk.export(chunk_path, format="mp3", parameters=["-ac", "1"])  # Convert to mono
+            chunks.append(chunk_path)
+        return chunks
+def transcribe_audio_chunks(chunks):
+    """Transcribe audio chunks and combine transcriptions"""
+    all_segments = []
+    current_time_offset = 0
+    for chunk_path in chunks:
+        try:
+            with open(chunk_path, "rb") as audio:
+                transcript = client.audio.transcriptions.create(
+                    model="whisper-1",
+                    file=audio,
+                    response_format="verbose_json",
+                    timestamp_granularities=["segment"]
+                )
+                # Adjust timestamps for this chunk
+                for segment in transcript.segments:
+                    segment.start += current_time_offset
+                    segment.end += current_time_offset
+                all_segments.extend(transcript.segments)
+                # Update time offset for next chunk
+                current_time_offset += len(AudioSegment.from_file(chunk_path)) / 1000  # Convert to seconds
+        except Exception as e:
+            st.error(f"Error in transcription: {str(e)}")
+            return None
+    # Combine all transcriptions
+    full_transcript = transcript
+    full_transcript.segments = all_segments
+    return full_transcript
 def format_timestamp(seconds):
     """Convert seconds to HH:MM:SS format"""
         if uploaded_file:
             st.audio(uploaded_file)
+            file_size = uploaded_file.size / (1024 * 1024)  # Convert to MB
+            st.info(f"File size: {file_size:.2f} MB")
             if st.button("Generate Notes", type="primary", use_container_width=True):
                 # Create tabs in the right column for different outputs
                     tab1, tab2 = st.tabs(["📝 Transcript", "📋 Lesson Plan"])
                     with st.spinner("Processing audio..."):
+                        # Process and potentially chunk the audio file
+                        chunks = process_audio_file(uploaded_file)
+                        # Transcribe chunks
+                        transcript_data = transcribe_audio_chunks(chunks)
                         if transcript_data:
                             # Format transcript with timestamps
                             formatted_transcript = format_transcript_with_timestamps(transcript_data)
                                         file_name=f"lesson_plan_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md",
                                         mime="text/markdown"
                                     )
     # Right column instructions when no file is uploaded
     if not uploaded_file:
             3. Provide downloadable versions of both
             Supported formats: MP3, WAV, M4A
+            Note: Large files will be automatically processed in chunks.
             """)
 if __name__ == "__main__":