Spaces:

abhishekjoel
/

Project_lecture_notes

Build error

App Files Files Community

abhishekjoel commited on Oct 24, 2024

Commit

b986f20

verified ·

1 Parent(s): f224e73

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -190

app.py CHANGED Viewed

@@ -10,16 +10,6 @@ import math
 from pathlib import Path
 import shutil
-# Load environment variables
-load_dotenv()
-# Initialize OpenAI client
-client = openai.OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
-# Constants
-MAX_FILE_SIZE = 25 * 1024 * 1024  # 25MB in bytes
-CHUNK_LENGTH = 10 * 60 * 1000  # 10 minutes in milliseconds
 @st.cache_data
 def save_uploaded_file(uploaded_file):
     """Save uploaded file to a temporary directory and return the path"""
@@ -28,23 +18,44 @@ def save_uploaded_file(uploaded_file):
         temp_dir = tempfile.mkdtemp()
         # Get the file extension
         file_extension = Path(uploaded_file.name).suffix
         # Create full path with original extension
         temp_path = os.path.join(temp_dir, f"input_audio{file_extension}")
         # Save uploaded file
         with open(temp_path, "wb") as f:
             f.write(uploaded_file.getvalue())
         return temp_path, temp_dir
     except Exception as e:
         st.error(f"Error saving file: {str(e)}")
         return None, None
 def process_audio_file(file_path, temp_dir):
     """Process and potentially chunk the audio file"""
     try:
-        # Load audio file
-        audio = AudioSegment.from_file(file_path)
         # If file is small enough, return it as is
         if os.path.getsize(file_path) <= MAX_FILE_SIZE:
@@ -67,187 +78,14 @@ def process_audio_file(file_path, temp_dir):
             chunk = chunk.set_frame_rate(16000)  # Set sample rate to 16kHz
             chunk.export(chunk_path, format="mp3", parameters=["-q:a", "0"])
-            chunks.append(chunk_path)
-            # Verify file exists and has size
             if not os.path.exists(chunk_path) or os.path.getsize(chunk_path) == 0:
                 raise Exception(f"Failed to create chunk {i}")
         return chunks
     except Exception as e:
         st.error(f"Error processing audio: {str(e)}")
-        return None
-def transcribe_audio_chunks(chunks):
-    """Transcribe audio chunks and combine transcriptions"""
-    all_segments = []
-    current_time_offset = 0
-    for i, chunk_path in enumerate(chunks):
-        try:
-            st.write(f"Processing chunk {i+1} of {len(chunks)}...")
-            with open(chunk_path, "rb") as audio:
-                transcript = client.audio.transcriptions.create(
-                    model="whisper-1",
-                    file=audio,
-                    response_format="verbose_json",
-                    timestamp_granularities=["segment"]
-                )
-                # Adjust timestamps for this chunk
-                for segment in transcript.segments:
-                    segment.start += current_time_offset
-                    segment.end += current_time_offset
-                all_segments.extend(transcript.segments)
-                # Update time offset for next chunk
-                current_time_offset += len(AudioSegment.from_file(chunk_path)) / 1000  # Convert to seconds
-        except Exception as e:
-            st.error(f"Error in transcription of chunk {i+1}: {str(e)}")
-            return None
-    # Combine all transcriptions
-    if transcript and all_segments:
-        full_transcript = transcript
-        full_transcript.segments = all_segments
-        return full_transcript
-    return None
-def format_timestamp(seconds):
-    """Convert seconds to HH:MM:SS format"""
-    hours = int(seconds // 3600)
-    minutes = int((seconds % 3600) // 60)
-    seconds = int(seconds % 60)
-    return f"{hours:02d}:{minutes:02d}:{seconds:02d}"
-def generate_lesson_plan(transcript):
-    """Generate a structured lesson plan from the transcript"""
-    try:
-        system_prompt = """You are an educational content expert. Generate a detailed lesson plan from the lecture transcript.
-        The lesson plan should include:
-        1. Main Topics
-        2. Subtopics
-        3. Key Learning Objectives
-        4. Important Concepts
-        Format the output in markdown with clear hierarchical structure."""
-        response = client.chat.completions.create(
-            model="gpt-4-turbo-preview",
-            messages=[
-                {"role": "system", "content": system_prompt},
-                {"role": "user", "content": f"Generate a lesson plan from this transcript:\n{transcript}"}
-            ],
-            temperature=0.3,
-            max_tokens=2000
-        )
-        return response.choices[0].message.content
-    except Exception as e:
-        st.error(f"Error generating lesson plan: {str(e)}")
-        return None
-def format_transcript_with_timestamps(transcript_data):
-    """Format transcript with timestamps in a readable format"""
-    formatted_text = "# Lecture Transcript with Timestamps\n\n"
-    for segment in transcript_data.segments:
-        start_time = format_timestamp(segment.start)
-        formatted_text += f"**[{start_time}]** {segment.text}\n\n"
-    return formatted_text
-def cleanup_files(temp_dir):
-    """Safely clean up temporary files"""
-    try:
-        if temp_dir and os.path.exists(temp_dir):
-            shutil.rmtree(temp_dir)
-    except Exception as e:
-        st.warning(f"Warning: Could not clean up temporary files: {str(e)}")
-# Streamlit UI
-def main():
-    st.set_page_config(page_title="Lecture Notes Generator", layout="wide")
-    st.title("🎓 Lecture Notes Generator")
-    # Create two columns with custom widths
-    col1, col2 = st.columns([1, 3])
-    # Left column for upload (smaller)
-    with col1:
-        st.header("Upload Recording")
-        uploaded_file = st.file_uploader("Choose an audio file", type=['mp3', 'wav', 'm4a'])
-        if uploaded_file:
-            st.audio(uploaded_file)
-            file_size = uploaded_file.size / (1024 * 1024)  # Convert to MB
-            st.info(f"File size: {file_size:.2f} MB")
-            if st.button("Generate Notes", type="primary", use_container_width=True):
-                # Create tabs in the right column for different outputs
-                with col2:
-                    tab1, tab2 = st.tabs(["📝 Transcript", "📋 Lesson Plan"])
-                    with st.spinner("Processing audio..."):
-                        # Save uploaded file and get temporary paths
-                        temp_path, temp_dir = save_uploaded_file(uploaded_file)
-                        if temp_path and temp_dir:
-                            try:
-                                # Process and potentially chunk the audio file
-                                chunks = process_audio_file(temp_path, temp_dir)
-                                if chunks:
-                                    # Transcribe chunks
-                                    transcript_data = transcribe_audio_chunks(chunks)
-                                    if transcript_data:
-                                        # Format transcript with timestamps
-                                        formatted_transcript = format_transcript_with_timestamps(transcript_data)
-                                        # Generate lesson plan
-                                        lesson_plan = generate_lesson_plan(transcript_data.text)
-                                        # Display transcript in first tab
-                                        with tab1:
-                                            st.markdown(formatted_transcript)
-                                            # Download button for transcript
-                                            st.download_button(
-                                                label="Download Transcript",
-                                                data=formatted_transcript,
-                                                file_name=f"transcript_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md",
-                                                mime="text/markdown"
-                                            )
-                                        # Display lesson plan in second tab
-                                        with tab2:
-                                            if lesson_plan:
-                                                st.markdown(lesson_plan)
-                                                # Download button for lesson plan
-                                                st.download_button(
-                                                    label="Download Lesson Plan",
-                                                    data=lesson_plan,
-                                                    file_name=f"lesson_plan_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md",
-                                                    mime="text/markdown"
-                                                )
-                            finally:
-                                # Clean up temporary files
-                                cleanup_files(temp_dir)
-    # Right column instructions when no file is uploaded
-    if not uploaded_file:
-        with col2:
-            st.info("""
-            👈 Start by uploading an audio file on the left side.
-            The system will automatically:
-            1. Transcribe the lecture with timestamps
-            2. Generate a structured lesson plan
-            3. Provide downloadable versions of both
-            Supported formats: MP3, WAV, M4A
-            Note: Large files will be automatically processed in chunks.
-            """)
-if __name__ == "__main__":
-    main()

 from pathlib import Path
 import shutil
 @st.cache_data
 def save_uploaded_file(uploaded_file):
     """Save uploaded file to a temporary directory and return the path"""
         temp_dir = tempfile.mkdtemp()
         # Get the file extension
         file_extension = Path(uploaded_file.name).suffix
+        if not file_extension:  # If no extension, default to .wav
+            file_extension = '.wav'
         # Create full path with original extension
         temp_path = os.path.join(temp_dir, f"input_audio{file_extension}")
         # Save uploaded file
         with open(temp_path, "wb") as f:
             f.write(uploaded_file.getvalue())
+        # Verify file exists and has content
+        if not os.path.exists(temp_path):
+            raise FileNotFoundError(f"Failed to save file at {temp_path}")
+        if os.path.getsize(temp_path) == 0:
+            raise ValueError("Saved file is empty")
+        st.debug(f"File saved successfully at: {temp_path}")
         return temp_path, temp_dir
     except Exception as e:
         st.error(f"Error saving file: {str(e)}")
+        if temp_dir and os.path.exists(temp_dir):
+            shutil.rmtree(temp_dir)
         return None, None
 def process_audio_file(file_path, temp_dir):
     """Process and potentially chunk the audio file"""
     try:
+        # Verify file exists before processing
+        if not os.path.exists(file_path):
+            raise FileNotFoundError(f"Audio file not found at: {file_path}")
+        st.debug(f"Processing audio file: {file_path}")
+        # Load audio file with explicit format
+        try:
+            audio = AudioSegment.from_file(file_path, format=Path(file_path).suffix[1:])
+        except:
+            # Fallback to automatic format detection
+            audio = AudioSegment.from_file(file_path)
         # If file is small enough, return it as is
         if os.path.getsize(file_path) <= MAX_FILE_SIZE:
             chunk = chunk.set_frame_rate(16000)  # Set sample rate to 16kHz
             chunk.export(chunk_path, format="mp3", parameters=["-q:a", "0"])
+            # Verify chunk was created successfully
             if not os.path.exists(chunk_path) or os.path.getsize(chunk_path) == 0:
                 raise Exception(f"Failed to create chunk {i}")
+            chunks.append(chunk_path)
+            st.debug(f"Created chunk {i+1}/{num_chunks}")
         return chunks
     except Exception as e:
         st.error(f"Error processing audio: {str(e)}")
+        return None