Spaces:

abhishekjoel
/

lecture-notes-generator

Build error

App Files Files Community

abhishekjoel commited on Oct 29, 2024

Commit

f62b780

verified ·

1 Parent(s): 4e34760

Update app.py

Browse files

Files changed (1) hide show

app.py +59 -15

app.py CHANGED Viewed

@@ -15,7 +15,7 @@ app = FastAPI()
 app.add_middleware(
     CORSMiddleware,
-    allow_origins=["https://studyscribe.framer.ai/"],  # Replace "*" with your Framer website URL in production
     allow_credentials=True,
     allow_methods=["*"],
     allow_headers=["*"],
@@ -51,20 +51,45 @@ async def log_requests(request: Request, call_next):
 def transcribe_audio(audio_file_path):
     try:
         with open(audio_file_path, "rb") as audio_file:
-            transcript = openai.Audio.transcribe("whisper-1", audio_file)
-            return transcript['text']
     except Exception as e:
         logger.error(f"Error in transcribe_audio: {e}")
         raise HTTPException(status_code=500, detail="Error during audio transcription.")
-def split_audio_file(audio_file_path, chunk_length_ms):
     audio = AudioSegment.from_file(audio_file_path)
     duration_ms = len(audio)
     chunks = []
-    for start_ms in range(0, duration_ms, chunk_length_ms):
-        end_ms = min(start_ms + chunk_length_ms, duration_ms)
-        chunk = audio[start_ms:end_ms]
         chunks.append(chunk)
     return chunks
 def summarize_text(text, lesson_plan):
@@ -100,13 +125,13 @@ def summarize_text(text, lesson_plan):
 def generate_lecture_notes(summaries, lesson_plan):
     try:
-        summaries_text = "\n".join([f"Chunk {index+1}: {summary}" for index, summary in enumerate(summaries)])
         system_prompt = "You are an assistant that generates detailed lecture notes based on summaries and a lesson plan."
         user_prompt = f"""
-        Using the summarized text chunks below and the lesson plan, create detailed lecture notes.
-        Summarized Chunks:
         {summaries_text}
         Lesson Plan:
@@ -168,19 +193,38 @@ async def process_files(
         else:
             raise HTTPException(status_code=400, detail="No valid audio input provided.")
-        chunk_length_ms = 5 * 60 * 1000  # 5 minutes in milliseconds
-        audio_chunks = split_audio_file(tmp_file_path, chunk_length_ms)
         summarized_texts = []
         for index, chunk in enumerate(audio_chunks):
             with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as chunk_file:
                 chunk.export(chunk_file.name, format="wav")
                 chunk_file_path = chunk_file.name
-            transcript_text = transcribe_audio(chunk_file_path)
-            summary = summarize_text(transcript_text, lesson_plan)
-            summarized_texts.append(summary)
             os.unlink(chunk_file_path)

 app.add_middleware(
     CORSMiddleware,
+    allow_origins=["https://studyscribe.framer.ai/"],  # Replace "*" with your frontend URL in production
     allow_credentials=True,
     allow_methods=["*"],
     allow_headers=["*"],
 def transcribe_audio(audio_file_path):
     try:
         with open(audio_file_path, "rb") as audio_file:
+            transcript = openai.Audio.transcribe("whisper-1", audio_file, response_format="verbose_json")
+            return transcript
     except Exception as e:
         logger.error(f"Error in transcribe_audio: {e}")
         raise HTTPException(status_code=500, detail="Error during audio transcription.")
+def split_audio_file(audio_file_path, max_chunk_size_mb=24):
     audio = AudioSegment.from_file(audio_file_path)
     duration_ms = len(audio)
     chunks = []
+    start_ms = 0
+    while start_ms < duration_ms:
+        chunk_duration_ms = min(5 * 60 * 1000, duration_ms - start_ms)  # Start with 5 minutes
+        chunk = audio[start_ms:start_ms + chunk_duration_ms]
+        while True:
+            with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_chunk_file:
+                chunk.export(temp_chunk_file.name, format="wav")
+                temp_chunk_file.flush()
+                file_size_bytes = os.path.getsize(temp_chunk_file.name)
+                file_size_mb = file_size_bytes / (1024 * 1024)
+                temp_chunk_file.close()
+                os.unlink(temp_chunk_file.name)
+            if file_size_mb <= max_chunk_size_mb:
+                # Chunk size is acceptable
+                break
+            else:
+                # Reduce chunk duration
+                if chunk_duration_ms <= 60 * 1000:
+                    # Minimum chunk duration reached (1 minute), cannot reduce further
+                    raise Exception("Cannot split audio into chunks small enough to meet the size limit.")
+                chunk_duration_ms -= 60 * 1000  # Reduce by 1 minute
+                chunk = audio[start_ms:start_ms + chunk_duration_ms]
         chunks.append(chunk)
+        start_ms += chunk_duration_ms
     return chunks
 def summarize_text(text, lesson_plan):
 def generate_lecture_notes(summaries, lesson_plan):
     try:
+        summaries_text = "\n".join([f"At {item['timestamp']}: {item['summary']}" for item in summaries])
         system_prompt = "You are an assistant that generates detailed lecture notes based on summaries and a lesson plan."
         user_prompt = f"""
+        Using the summarized text segments below and the lesson plan, create detailed lecture notes.
+        Summarized Segments:
         {summaries_text}
         Lesson Plan:
         else:
             raise HTTPException(status_code=400, detail="No valid audio input provided.")
+        # Use the updated split_audio_file function
+        audio_chunks = split_audio_file(tmp_file_path, max_chunk_size_mb=24)
         summarized_texts = []
+        current_chunk_start_time = 0
         for index, chunk in enumerate(audio_chunks):
             with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as chunk_file:
                 chunk.export(chunk_file.name, format="wav")
                 chunk_file_path = chunk_file.name
+            # Transcribe chunk
+            transcript = transcribe_audio(chunk_file_path)
+            segments = transcript.get('segments', [])
+            for segment in segments:
+                # Adjust the segment timestamps to account for the chunk's position in the full audio
+                segment_start = segment['start'] + current_chunk_start_time
+                segment_end = segment['end'] + current_chunk_start_time
+                segment_text = segment['text']
+                # Summarize the segment
+                summary = summarize_text(segment_text, lesson_plan)
+                summarized_texts.append({
+                    'timestamp': f"{segment_start:.2f} - {segment_end:.2f}",
+                    'summary': summary
+                })
+            # Update the chunk start time
+            chunk_duration = len(chunk) / 1000.0  # duration in seconds
+            current_chunk_start_time += chunk_duration
             os.unlink(chunk_file_path)