Spaces:

abhishekjoel
/

Project_lecture_notes

Build error

App Files Files Community

abhishekjoel commited on Oct 24, 2024

Commit

9ccf4ff

verified ·

1 Parent(s): 03f8f22

Update app.py

Browse files

Files changed (1) hide show

app.py +190 -28

app.py CHANGED Viewed

@@ -10,6 +10,16 @@ import math
 from pathlib import Path
 import shutil
 @st.cache_data
 def save_uploaded_file(uploaded_file):
     """Save uploaded file to a temporary directory and return the path"""
@@ -18,44 +28,23 @@ def save_uploaded_file(uploaded_file):
         temp_dir = tempfile.mkdtemp()
         # Get the file extension
         file_extension = Path(uploaded_file.name).suffix
-        if not file_extension:  # If no extension, default to .wav
-            file_extension = '.wav'
         # Create full path with original extension
         temp_path = os.path.join(temp_dir, f"input_audio{file_extension}")
         # Save uploaded file
         with open(temp_path, "wb") as f:
             f.write(uploaded_file.getvalue())
-        # Verify file exists and has content
-        if not os.path.exists(temp_path):
-            raise FileNotFoundError(f"Failed to save file at {temp_path}")
-        if os.path.getsize(temp_path) == 0:
-            raise ValueError("Saved file is empty")
-        st.debug(f"File saved successfully at: {temp_path}")
         return temp_path, temp_dir
     except Exception as e:
         st.error(f"Error saving file: {str(e)}")
-        if temp_dir and os.path.exists(temp_dir):
-            shutil.rmtree(temp_dir)
         return None, None
 def process_audio_file(file_path, temp_dir):
     """Process and potentially chunk the audio file"""
     try:
-        # Verify file exists before processing
-        if not os.path.exists(file_path):
-            raise FileNotFoundError(f"Audio file not found at: {file_path}")
-        st.debug(f"Processing audio file: {file_path}")
-        # Load audio file with explicit format
-        try:
-            audio = AudioSegment.from_file(file_path, format=Path(file_path).suffix[1:])
-        except:
-            # Fallback to automatic format detection
-            audio = AudioSegment.from_file(file_path)
         # If file is small enough, return it as is
         if os.path.getsize(file_path) <= MAX_FILE_SIZE:
@@ -78,14 +67,187 @@ def process_audio_file(file_path, temp_dir):
             chunk = chunk.set_frame_rate(16000)  # Set sample rate to 16kHz
             chunk.export(chunk_path, format="mp3", parameters=["-q:a", "0"])
-            # Verify chunk was created successfully
             if not os.path.exists(chunk_path) or os.path.getsize(chunk_path) == 0:
                 raise Exception(f"Failed to create chunk {i}")
-            chunks.append(chunk_path)
-            st.debug(f"Created chunk {i+1}/{num_chunks}")
         return chunks
     except Exception as e:
         st.error(f"Error processing audio: {str(e)}")
-        return None

 from pathlib import Path
 import shutil
+# Load environment variables
+load_dotenv()
+# Initialize OpenAI client
+client = openai.OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+# Constants
+MAX_FILE_SIZE = 25 * 1024 * 1024  # 25MB in bytes
+CHUNK_LENGTH = 10 * 60 * 1000  # 10 minutes in milliseconds
 @st.cache_data
 def save_uploaded_file(uploaded_file):
     """Save uploaded file to a temporary directory and return the path"""
         temp_dir = tempfile.mkdtemp()
         # Get the file extension
         file_extension = Path(uploaded_file.name).suffix
         # Create full path with original extension
         temp_path = os.path.join(temp_dir, f"input_audio{file_extension}")
         # Save uploaded file
         with open(temp_path, "wb") as f:
             f.write(uploaded_file.getvalue())
         return temp_path, temp_dir
     except Exception as e:
         st.error(f"Error saving file: {str(e)}")
         return None, None
 def process_audio_file(file_path, temp_dir):
     """Process and potentially chunk the audio file"""
     try:
+        # Load audio file
+        audio = AudioSegment.from_file(file_path)
         # If file is small enough, return it as is
         if os.path.getsize(file_path) <= MAX_FILE_SIZE:
             chunk = chunk.set_frame_rate(16000)  # Set sample rate to 16kHz
             chunk.export(chunk_path, format="mp3", parameters=["-q:a", "0"])
+            chunks.append(chunk_path)
+            # Verify file exists and has size
             if not os.path.exists(chunk_path) or os.path.getsize(chunk_path) == 0:
                 raise Exception(f"Failed to create chunk {i}")
         return chunks
     except Exception as e:
         st.error(f"Error processing audio: {str(e)}")
+        return None
+def transcribe_audio_chunks(chunks):
+    """Transcribe audio chunks and combine transcriptions"""
+    all_segments = []
+    current_time_offset = 0
+    for i, chunk_path in enumerate(chunks):
+        try:
+            st.write(f"Processing chunk {i+1} of {len(chunks)}...")
+            with open(chunk_path, "rb") as audio:
+                transcript = client.audio.transcriptions.create(
+                    model="whisper-1",
+                    file=audio,
+                    response_format="verbose_json",
+                    timestamp_granularities=["segment"]
+                )
+                # Adjust timestamps for this chunk
+                for segment in transcript.segments:
+                    segment.start += current_time_offset
+                    segment.end += current_time_offset
+                all_segments.extend(transcript.segments)
+                # Update time offset for next chunk
+                current_time_offset += len(AudioSegment.from_file(chunk_path)) / 1000  # Convert to seconds
+        except Exception as e:
+            st.error(f"Error in transcription of chunk {i+1}: {str(e)}")
+            return None
+    # Combine all transcriptions
+    if transcript and all_segments:
+        full_transcript = transcript
+        full_transcript.segments = all_segments
+        return full_transcript
+    return None
+def format_timestamp(seconds):
+    """Convert seconds to HH:MM:SS format"""
+    hours = int(seconds // 3600)
+    minutes = int((seconds % 3600) // 60)
+    seconds = int(seconds % 60)
+    return f"{hours:02d}:{minutes:02d}:{seconds:02d}"
+def generate_lesson_plan(transcript):
+    """Generate a structured lesson plan from the transcript"""
+    try:
+        system_prompt = """You are an educational content expert. Generate a detailed lesson plan from the lecture transcript.
+        The lesson plan should include:
+        1. Main Topics
+        2. Subtopics
+        3. Key Learning Objectives
+        4. Important Concepts
+        Format the output in markdown with clear hierarchical structure."""
+        response = client.chat.completions.create(
+            model="gpt-4-turbo-preview",
+            messages=[
+                {"role": "system", "content": system_prompt},
+                {"role": "user", "content": f"Generate a lesson plan from this transcript:\n{transcript}"}
+            ],
+            temperature=0.3,
+            max_tokens=2000
+        )
+        return response.choices[0].message.content
+    except Exception as e:
+        st.error(f"Error generating lesson plan: {str(e)}")
+        return None
+def format_transcript_with_timestamps(transcript_data):
+    """Format transcript with timestamps in a readable format"""
+    formatted_text = "# Lecture Transcript with Timestamps\n\n"
+    for segment in transcript_data.segments:
+        start_time = format_timestamp(segment.start)
+        formatted_text += f"**[{start_time}]** {segment.text}\n\n"
+    return formatted_text
+def cleanup_files(temp_dir):
+    """Safely clean up temporary files"""
+    try:
+        if temp_dir and os.path.exists(temp_dir):
+            shutil.rmtree(temp_dir)
+    except Exception as e:
+        st.warning(f"Warning: Could not clean up temporary files: {str(e)}")
+# Streamlit UI
+def main():
+    st.set_page_config(page_title="Lecture Notes Generator", layout="wide")
+    st.title("🎓 Lecture Notes Generator")
+    # Create two columns with custom widths
+    col1, col2 = st.columns([1, 3])
+    # Left column for upload (smaller)
+    with col1:
+        st.header("Upload Recording")
+        uploaded_file = st.file_uploader("Choose an audio file", type=['mp3', 'wav', 'm4a'])
+        if uploaded_file:
+            st.audio(uploaded_file)
+            file_size = uploaded_file.size / (1024 * 1024)  # Convert to MB
+            st.info(f"File size: {file_size:.2f} MB")
+            if st.button("Generate Notes", type="primary", use_container_width=True):
+                # Create tabs in the right column for different outputs
+                with col2:
+                    tab1, tab2 = st.tabs(["📝 Transcript", "📋 Lesson Plan"])
+                    with st.spinner("Processing audio..."):
+                        # Save uploaded file and get temporary paths
+                        temp_path, temp_dir = save_uploaded_file(uploaded_file)
+                        if temp_path and temp_dir:
+                            try:
+                                # Process and potentially chunk the audio file
+                                chunks = process_audio_file(temp_path, temp_dir)
+                                if chunks:
+                                    # Transcribe chunks
+                                    transcript_data = transcribe_audio_chunks(chunks)
+                                    if transcript_data:
+                                        # Format transcript with timestamps
+                                        formatted_transcript = format_transcript_with_timestamps(transcript_data)
+                                        # Generate lesson plan
+                                        lesson_plan = generate_lesson_plan(transcript_data.text)
+                                        # Display transcript in first tab
+                                        with tab1:
+                                            st.markdown(formatted_transcript)
+                                            # Download button for transcript
+                                            st.download_button(
+                                                label="Download Transcript",
+                                                data=formatted_transcript,
+                                                file_name=f"transcript_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md",
+                                                mime="text/markdown"
+                                            )
+                                        # Display lesson plan in second tab
+                                        with tab2:
+                                            if lesson_plan:
+                                                st.markdown(lesson_plan)
+                                                # Download button for lesson plan
+                                                st.download_button(
+                                                    label="Download Lesson Plan",
+                                                    data=lesson_plan,
+                                                    file_name=f"lesson_plan_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md",
+                                                    mime="text/markdown"
+                                                )
+                            finally:
+                                # Clean up temporary files
+                                cleanup_files(temp_dir)
+    # Right column instructions when no file is uploaded
+    if not uploaded_file:
+        with col2:
+            st.info("""
+            👈 Start by uploading an audio file on the left side.
+            The system will automatically:
+            1. Transcribe the lecture with timestamps
+            2. Generate a structured lesson plan
+            3. Provide downloadable versions of both
+            Supported formats: MP3, WAV, M4A
+            Note: Large files will be automatically processed in chunks.
+            """)
+if __name__ == "__main__":
+    main()