Spaces:

abhishekjoel
/

Project_lecture_notes

Build error

App Files Files Community

abhishekjoel commited on Oct 24, 2024

Commit

f224e73

verified ·

1 Parent(s): 8a4a13a

Update app.py

Browse files

Files changed (1) hide show

app.py +99 -50

app.py CHANGED Viewed

@@ -7,6 +7,8 @@ from dotenv import load_dotenv
 from pydub import AudioSegment
 import tempfile
 import math
 # Load environment variables
 load_dotenv()
@@ -18,21 +20,35 @@ client = openai.OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
 MAX_FILE_SIZE = 25 * 1024 * 1024  # 25MB in bytes
 CHUNK_LENGTH = 10 * 60 * 1000  # 10 minutes in milliseconds
-def process_audio_file(uploaded_file):
-    """Process and potentially chunk the audio file"""
-    # Create a temporary directory
-    with tempfile.TemporaryDirectory() as temp_dir:
         # Save uploaded file
-        temp_input_path = os.path.join(temp_dir, "input_audio")
-        with open(temp_input_path, "wb") as f:
             f.write(uploaded_file.getvalue())
         # Load audio file
-        audio = AudioSegment.from_file(temp_input_path)
         # If file is small enough, return it as is
-        if os.path.getsize(temp_input_path) <= MAX_FILE_SIZE:
-            return [temp_input_path]
         # Otherwise, chunk the audio
         chunks = []
@@ -45,18 +61,32 @@ def process_audio_file(uploaded_file):
             chunk = audio[start_time:end_time]
             chunk_path = os.path.join(temp_dir, f"chunk_{i}.mp3")
-            chunk.export(chunk_path, format="mp3", parameters=["-ac", "1"])  # Convert to mono
             chunks.append(chunk_path)
         return chunks
 def transcribe_audio_chunks(chunks):
     """Transcribe audio chunks and combine transcriptions"""
     all_segments = []
     current_time_offset = 0
-    for chunk_path in chunks:
         try:
             with open(chunk_path, "rb") as audio:
                 transcript = client.audio.transcriptions.create(
                     model="whisper-1",
@@ -75,13 +105,15 @@ def transcribe_audio_chunks(chunks):
                 current_time_offset += len(AudioSegment.from_file(chunk_path)) / 1000  # Convert to seconds
         except Exception as e:
-            st.error(f"Error in transcription: {str(e)}")
             return None
     # Combine all transcriptions
-    full_transcript = transcript
-    full_transcript.segments = all_segments
-    return full_transcript
 def format_timestamp(seconds):
     """Convert seconds to HH:MM:SS format"""
@@ -124,6 +156,14 @@ def format_transcript_with_timestamps(transcript_data):
         formatted_text += f"**[{start_time}]** {segment.text}\n\n"
     return formatted_text
 # Streamlit UI
 def main():
     st.set_page_config(page_title="Lecture Notes Generator", layout="wide")
@@ -149,41 +189,50 @@ def main():
                     tab1, tab2 = st.tabs(["📝 Transcript", "📋 Lesson Plan"])
                     with st.spinner("Processing audio..."):
-                        # Process and potentially chunk the audio file
-                        chunks = process_audio_file(uploaded_file)
-                        # Transcribe chunks
-                        transcript_data = transcribe_audio_chunks(chunks)
-                        if transcript_data:
-                            # Format transcript with timestamps
-                            formatted_transcript = format_transcript_with_timestamps(transcript_data)
-                            # Generate lesson plan
-                            lesson_plan = generate_lesson_plan(transcript_data.text)
-                            # Display transcript in first tab
-                            with tab1:
-                                st.markdown(formatted_transcript)
-                                # Download button for transcript
-                                st.download_button(
-                                    label="Download Transcript",
-                                    data=formatted_transcript,
-                                    file_name=f"transcript_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md",
-                                    mime="text/markdown"
-                                )
-                            # Display lesson plan in second tab
-                            with tab2:
-                                if lesson_plan:
-                                    st.markdown(lesson_plan)
-                                    # Download button for lesson plan
-                                    st.download_button(
-                                        label="Download Lesson Plan",
-                                        data=lesson_plan,
-                                        file_name=f"lesson_plan_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md",
-                                        mime="text/markdown"
-                                    )
     # Right column instructions when no file is uploaded
     if not uploaded_file:

 from pydub import AudioSegment
 import tempfile
 import math
+from pathlib import Path
+import shutil
 # Load environment variables
 load_dotenv()
 MAX_FILE_SIZE = 25 * 1024 * 1024  # 25MB in bytes
 CHUNK_LENGTH = 10 * 60 * 1000  # 10 minutes in milliseconds
+@st.cache_data
+def save_uploaded_file(uploaded_file):
+    """Save uploaded file to a temporary directory and return the path"""
+    try:
+        # Create a temporary directory that persists
+        temp_dir = tempfile.mkdtemp()
+        # Get the file extension
+        file_extension = Path(uploaded_file.name).suffix
+        # Create full path with original extension
+        temp_path = os.path.join(temp_dir, f"input_audio{file_extension}")
         # Save uploaded file
+        with open(temp_path, "wb") as f:
             f.write(uploaded_file.getvalue())
+        return temp_path, temp_dir
+    except Exception as e:
+        st.error(f"Error saving file: {str(e)}")
+        return None, None
+def process_audio_file(file_path, temp_dir):
+    """Process and potentially chunk the audio file"""
+    try:
         # Load audio file
+        audio = AudioSegment.from_file(file_path)
         # If file is small enough, return it as is
+        if os.path.getsize(file_path) <= MAX_FILE_SIZE:
+            return [file_path]
         # Otherwise, chunk the audio
         chunks = []
             chunk = audio[start_time:end_time]
             chunk_path = os.path.join(temp_dir, f"chunk_{i}.mp3")
+            # Export with specific parameters for better compatibility
+            chunk = chunk.set_channels(1)  # Convert to mono
+            chunk = chunk.set_frame_rate(16000)  # Set sample rate to 16kHz
+            chunk.export(chunk_path, format="mp3", parameters=["-q:a", "0"])
             chunks.append(chunk_path)
+            # Verify file exists and has size
+            if not os.path.exists(chunk_path) or os.path.getsize(chunk_path) == 0:
+                raise Exception(f"Failed to create chunk {i}")
         return chunks
+    except Exception as e:
+        st.error(f"Error processing audio: {str(e)}")
+        return None
 def transcribe_audio_chunks(chunks):
     """Transcribe audio chunks and combine transcriptions"""
     all_segments = []
     current_time_offset = 0
+    for i, chunk_path in enumerate(chunks):
         try:
+            st.write(f"Processing chunk {i+1} of {len(chunks)}...")
             with open(chunk_path, "rb") as audio:
                 transcript = client.audio.transcriptions.create(
                     model="whisper-1",
                 current_time_offset += len(AudioSegment.from_file(chunk_path)) / 1000  # Convert to seconds
         except Exception as e:
+            st.error(f"Error in transcription of chunk {i+1}: {str(e)}")
             return None
     # Combine all transcriptions
+    if transcript and all_segments:
+        full_transcript = transcript
+        full_transcript.segments = all_segments
+        return full_transcript
+    return None
 def format_timestamp(seconds):
     """Convert seconds to HH:MM:SS format"""
         formatted_text += f"**[{start_time}]** {segment.text}\n\n"
     return formatted_text
+def cleanup_files(temp_dir):
+    """Safely clean up temporary files"""
+    try:
+        if temp_dir and os.path.exists(temp_dir):
+            shutil.rmtree(temp_dir)
+    except Exception as e:
+        st.warning(f"Warning: Could not clean up temporary files: {str(e)}")
 # Streamlit UI
 def main():
     st.set_page_config(page_title="Lecture Notes Generator", layout="wide")
                     tab1, tab2 = st.tabs(["📝 Transcript", "📋 Lesson Plan"])
                     with st.spinner("Processing audio..."):
+                        # Save uploaded file and get temporary paths
+                        temp_path, temp_dir = save_uploaded_file(uploaded_file)
+                        if temp_path and temp_dir:
+                            try:
+                                # Process and potentially chunk the audio file
+                                chunks = process_audio_file(temp_path, temp_dir)
+                                if chunks:
+                                    # Transcribe chunks
+                                    transcript_data = transcribe_audio_chunks(chunks)
+                                    if transcript_data:
+                                        # Format transcript with timestamps
+                                        formatted_transcript = format_transcript_with_timestamps(transcript_data)
+                                        # Generate lesson plan
+                                        lesson_plan = generate_lesson_plan(transcript_data.text)
+                                        # Display transcript in first tab
+                                        with tab1:
+                                            st.markdown(formatted_transcript)
+                                            # Download button for transcript
+                                            st.download_button(
+                                                label="Download Transcript",
+                                                data=formatted_transcript,
+                                                file_name=f"transcript_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md",
+                                                mime="text/markdown"
+                                            )
+                                        # Display lesson plan in second tab
+                                        with tab2:
+                                            if lesson_plan:
+                                                st.markdown(lesson_plan)
+                                                # Download button for lesson plan
+                                                st.download_button(
+                                                    label="Download Lesson Plan",
+                                                    data=lesson_plan,
+                                                    file_name=f"lesson_plan_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md",
+                                                    mime="text/markdown"
+                                                )
+                            finally:
+                                # Clean up temporary files
+                                cleanup_files(temp_dir)
     # Right column instructions when no file is uploaded
     if not uploaded_file: