Spaces:

abhishekjoel
/

Project_lecture_notes

Build error

App Files Files Community

abhishekjoel commited on Oct 24, 2024

Commit

a961ef9

verified ·

1 Parent(s): 18e5f93

Update app.py

Browse files

Files changed (1) hide show

app.py +35 -240

app.py CHANGED Viewed

@@ -1,253 +1,48 @@
 import streamlit as st
-import openai
 import os
-from datetime import datetime
-import json
-from dotenv import load_dotenv
-from pydub import AudioSegment
-import tempfile
-import math
-from pathlib import Path
-import shutil
-# Load environment variables
-load_dotenv()
-# Initialize OpenAI client
-client = openai.OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
-# Constants
-MAX_FILE_SIZE = 25 * 1024 * 1024  # 25MB in bytes
-CHUNK_LENGTH = 10 * 60 * 1000  # 10 minutes in milliseconds
-@st.cache_data
-def save_uploaded_file(uploaded_file):
-    """Save uploaded file to a temporary directory and return the path"""
-    try:
-        # Create a temporary directory that persists
-        temp_dir = tempfile.mkdtemp()
-        # Get the file extension
-        file_extension = Path(uploaded_file.name).suffix
-        # Create full path with original extension
-        temp_path = os.path.join(temp_dir, f"input_audio{file_extension}")
-        # Save uploaded file
-        with open(temp_path, "wb") as f:
-            f.write(uploaded_file.getvalue())
-        return temp_path, temp_dir
-    except Exception as e:
-        st.error(f"Error saving file: {str(e)}")
-        return None, None
-def process_audio_file(file_path, temp_dir):
-    """Process and potentially chunk the audio file"""
-    try:
-        # Load audio file
-        audio = AudioSegment.from_file(file_path)
-        # If file is small enough, return it as is
-        if os.path.getsize(file_path) <= MAX_FILE_SIZE:
-            return [file_path]
-        # Otherwise, chunk the audio
-        chunks = []
-        total_length = len(audio)
-        num_chunks = math.ceil(total_length / CHUNK_LENGTH)
-        for i in range(num_chunks):
-            start_time = i * CHUNK_LENGTH
-            end_time = min((i + 1) * CHUNK_LENGTH, total_length)
-            chunk = audio[start_time:end_time]
-            chunk_path = os.path.join(temp_dir, f"chunk_{i}.mp3")
-            # Export with specific parameters for better compatibility
-            chunk = chunk.set_channels(1)  # Convert to mono
-            chunk = chunk.set_frame_rate(16000)  # Set sample rate to 16kHz
-            chunk.export(chunk_path, format="mp3", parameters=["-q:a", "0"])
-            chunks.append(chunk_path)
-            # Verify file exists and has size
-            if not os.path.exists(chunk_path) or os.path.getsize(chunk_path) == 0:
-                raise Exception(f"Failed to create chunk {i}")
-        return chunks
-    except Exception as e:
-        st.error(f"Error processing audio: {str(e)}")
-        return None
-def transcribe_audio_chunks(chunks):
-    """Transcribe audio chunks and combine transcriptions"""
-    all_segments = []
-    current_time_offset = 0
-    for i, chunk_path in enumerate(chunks):
-        try:
-            st.write(f"Processing chunk {i+1} of {len(chunks)}...")
-            with open(chunk_path, "rb") as audio:
-                transcript = client.audio.transcriptions.create(
-                    model="whisper-1",
-                    file=audio,
-                    response_format="verbose_json",
-                    timestamp_granularities=["segment"]
-                )
-                # Adjust timestamps for this chunk
-                for segment in transcript.segments:
-                    segment.start += current_time_offset
-                    segment.end += current_time_offset
-                all_segments.extend(transcript.segments)
-                # Update time offset for next chunk
-                current_time_offset += len(AudioSegment.from_file(chunk_path)) / 1000  # Convert to seconds
-        except Exception as e:
-            st.error(f"Error in transcription of chunk {i+1}: {str(e)}")
-            return None
-    # Combine all transcriptions
-    if transcript and all_segments:
-        full_transcript = transcript
-        full_transcript.segments = all_segments
-        return full_transcript
-    return None
-def format_timestamp(seconds):
-    """Convert seconds to HH:MM:SS format"""
-    hours = int(seconds // 3600)
-    minutes = int((seconds % 3600) // 60)
-    seconds = int(seconds % 60)
-    return f"{hours:02d}:{minutes:02d}:{seconds:02d}"
-def generate_lesson_plan(transcript):
-    """Generate a structured lesson plan from the transcript"""
-    try:
-        system_prompt = """You are an educational content expert. Generate a detailed lesson plan from the lecture transcript.
-        The lesson plan should include:
-        1. Main Topics
-        2. Subtopics
-        3. Key Learning Objectives
-        4. Important Concepts
-        Format the output in markdown with clear hierarchical structure."""
-        response = client.chat.completions.create(
-            model="gpt-4-turbo-preview",
-            messages=[
-                {"role": "system", "content": system_prompt},
-                {"role": "user", "content": f"Generate a lesson plan from this transcript:\n{transcript}"}
-            ],
-            temperature=0.3,
-            max_tokens=2000
-        )
-        return response.choices[0].message.content
-    except Exception as e:
-        st.error(f"Error generating lesson plan: {str(e)}")
-        return None
-def format_transcript_with_timestamps(transcript_data):
-    """Format transcript with timestamps in a readable format"""
-    formatted_text = "# Lecture Transcript with Timestamps\n\n"
-    for segment in transcript_data.segments:
-        start_time = format_timestamp(segment.start)
-        formatted_text += f"**[{start_time}]** {segment.text}\n\n"
-    return formatted_text
-def cleanup_files(temp_dir):
-    """Safely clean up temporary files"""
-    try:
-        if temp_dir and os.path.exists(temp_dir):
-            shutil.rmtree(temp_dir)
-    except Exception as e:
-        st.warning(f"Warning: Could not clean up temporary files: {str(e)}")
-# Streamlit UI
-def main():
-    st.set_page_config(page_title="Lecture Notes Generator", layout="wide")
-    st.title("🎓 Lecture Notes Generator")
-    # Create two columns with custom widths
-    col1, col2 = st.columns([1, 3])
-    # Left column for upload (smaller)
-    with col1:
-        st.header("Upload Recording")
-        uploaded_file = st.file_uploader("Choose an audio file", type=['mp3', 'wav', 'm4a'])
-        if uploaded_file:
-            st.audio(uploaded_file)
-            file_size = uploaded_file.size / (1024 * 1024)  # Convert to MB
-            st.info(f"File size: {file_size:.2f} MB")
-            if st.button("Generate Notes", type="primary", use_container_width=True):
-                # Create tabs in the right column for different outputs
-                with col2:
-                    tab1, tab2 = st.tabs(["📝 Transcript", "📋 Lesson Plan"])
-                    with st.spinner("Processing audio..."):
-                        # Save uploaded file and get temporary paths
-                        temp_path, temp_dir = save_uploaded_file(uploaded_file)
-                        if temp_path and temp_dir:
-                            try:
-                                # Process and potentially chunk the audio file
-                                chunks = process_audio_file(temp_path, temp_dir)
-                                if chunks:
-                                    # Transcribe chunks
-                                    transcript_data = transcribe_audio_chunks(chunks)
-                                    if transcript_data:
-                                        # Format transcript with timestamps
-                                        formatted_transcript = format_transcript_with_timestamps(transcript_data)
-                                        # Generate lesson plan
-                                        lesson_plan = generate_lesson_plan(transcript_data.text)
-                                        # Display transcript in first tab
-                                        with tab1:
-                                            st.markdown(formatted_transcript)
-                                            # Download button for transcript
-                                            st.download_button(
-                                                label="Download Transcript",
-                                                data=formatted_transcript,
-                                                file_name=f"transcript_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md",
-                                                mime="text/markdown"
-                                            )
-                                        # Display lesson plan in second tab
-                                        with tab2:
-                                            if lesson_plan:
-                                                st.markdown(lesson_plan)
-                                                # Download button for lesson plan
-                                                st.download_button(
-                                                    label="Download Lesson Plan",
-                                                    data=lesson_plan,
-                                                    file_name=f"lesson_plan_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md",
-                                                    mime="text/markdown"
-                                                )
-                            finally:
-                                # Clean up temporary files
-                                cleanup_files(temp_dir)
-    # Right column instructions when no file is uploaded
-    if not uploaded_file:
-        with col2:
-            st.info("""
-            👈 Start by uploading an audio file on the left side.
-            The system will automatically:
-            1. Transcribe the lecture with timestamps
-            2. Generate a structured lesson plan
-            3. Provide downloadable versions of both
-            Supported formats: MP3, WAV, M4A
-            Note: Large files will be automatically processed in chunks.
-            """)
-if __name__ == "__main__":
-    main()

 import streamlit as st
+from utils import split_audio, transcribe_audio, generate_lesson_plan
 import os
+import openai
+# Set up OpenAI API key
+openai.api_key = os.getenv("OPENAI_API_KEY")
+st.title("Lecture Notes Generator")
+st.write("Upload an audio recording of the lecture.")
+# Create a two-column layout
+col1, col2 = st.columns([1, 2])
+with col1:
+    # File upload for audio
+    audio_file = st.file_uploader("Choose an audio file (max 25MB)", type=["mp3", "wav"])
+    if st.button("Generate Notes"):
+        if audio_file is not None:
+            # Save the uploaded file
+            with open("uploaded_audio.mp3", "wb") as f:
+                f.write(audio_file.getbuffer())
+            # Split audio into chunks
+            chunks = split_audio("uploaded_audio.mp3")
+            # Transcribe audio
+            transcriptions, timestamps = transcribe_audio(chunks)
+            # Generate lesson plan from the transcription
+            lesson_plan = generate_lesson_plan(transcriptions)
+            # Display results in the second column
+            with col2:
+                st.subheader("Transcription with Timestamps")
+                for ts, text in zip(timestamps, transcriptions):
+                    st.write(f"{ts}: {text}")
+                st.subheader("Generated Lesson Plan")
+                st.markdown(lesson_plan)
+        else:
+            st.error("Please upload an audio file.")
+with col2:
+    # Initially empty
+    st.subheader("Lecture Notes and Lesson Plan")
+    st.write("Upload an audio file to generate notes.")