abhishekjoel commited on
Commit
3251b20
·
verified ·
1 Parent(s): 5d16f74

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +115 -99
app.py CHANGED
@@ -1,106 +1,122 @@
1
- from fastapi import FastAPI, File, UploadFile, Form
2
- from fastapi.responses import JSONResponse
3
- from fastapi.middleware.cors import CORSMiddleware
4
- from deepgram import Deepgram
5
  import openai
6
- import asyncio
7
  import os
8
-
9
- app = FastAPI()
10
-
11
- # Enable CORS
12
- app.add_middleware(
13
- CORSMiddleware,
14
- allow_origins=["*"], # For production, specify your Framer website URL
15
- allow_credentials=True,
16
- allow_methods=["*"],
17
- allow_headers=["*"],
18
- )
19
-
20
- # Retrieve API keys from environment variables
21
- DEEPGRAM_API_KEY = os.getenv('DEEPGRAM_API_KEY')
22
- OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
23
-
24
- if not DEEPGRAM_API_KEY or not OPENAI_API_KEY:
25
- raise Exception("API keys for Deepgram and OpenAI must be set as environment variables.")
26
-
27
- deepgram_client = Deepgram(DEEPGRAM_API_KEY)
28
- openai.api_key = OPENAI_API_KEY
29
-
30
- async def transcribe_audio(audio_bytes):
31
- source = {'buffer': audio_bytes, 'mimetype': 'audio/wav'}
32
- options = {'punctuate': True, 'utterances': True}
33
- response = await deepgram_client.transcription.prerecorded(source, options)
34
- return response['results']['utterances']
35
-
36
- def summarize_segment(segment, lesson_plan):
37
- prompt = f"""
38
- Lecture Segment (Timestamp: {segment['start']} - {segment['end']} seconds):
39
-
40
- "{segment['transcript']}"
41
-
42
- Based on the lesson plan below, summarize the key points discussed in this segment:
43
-
44
- Lesson Plan:
45
- {lesson_plan}
46
-
47
- Provide a concise summary with key takeaways.
48
- """
49
-
50
- response = openai.Completion.create(
51
- engine='text-davinci-003',
52
- prompt=prompt,
53
- max_tokens=150,
54
- temperature=0.5,
55
  )
56
-
57
- return response.choices[0].text.strip()
58
-
59
- def generate_lecture_notes(summaries, lesson_plan):
60
- summaries_text = "\n".join([f"At {item['timestamp']}: {item['summary']}" for item in summaries])
61
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  prompt = f"""
63
- Using the summarized lecture segments below and the lesson plan, create detailed lecture notes.
 
 
64
 
65
- Summarized Segments:
66
- {summaries_text}
67
-
68
- Lesson Plan:
69
- {lesson_plan}
70
-
71
- Provide comprehensive lecture notes in a structured format.
72
  """
73
-
74
- response = openai.Completion.create(
75
- engine='text-davinci-003',
76
- prompt=prompt,
77
- max_tokens=1000,
78
- temperature=0.5,
79
  )
80
-
81
- return response.choices[0].text.strip()
82
-
83
- @app.post("/process")
84
- async def process_files(audio_file: UploadFile = File(...), lesson_plan: str = Form(...)):
85
- audio_bytes = await audio_file.read()
86
-
87
- # Transcribe audio
88
- utterances = await transcribe_audio(audio_bytes)
89
-
90
- # Summarize each segment
91
- summarized_texts = []
92
- for segment in utterances:
93
- summary = summarize_segment(segment, lesson_plan)
94
- summarized_texts.append({
95
- 'timestamp': f"{segment['start']} - {segment['end']}",
96
- 'summary': summary
97
- })
98
-
99
- # Generate detailed lecture notes
100
- lecture_notes = generate_lecture_notes(summarized_texts, lesson_plan)
101
-
102
- # Prepare the response
103
- return JSONResponse(content={
104
- 'summarized_texts': summarized_texts,
105
- 'lecture_notes': lecture_notes
106
- })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import openai
2
+ import gradio as gr
3
  import os
4
+ import time
5
+ from datetime import datetime
6
+ from pydub import AudioSegment
7
+ import io
8
+ import yt_dlp
9
+ import PyPDF2
10
+
11
+ # Define your OpenAI API key
12
+ openai.api_key = "YOUR_OPENAI_API_KEY"
13
+
14
+ # Function to convert audio file to text using OpenAI's Whisper
15
+
16
+ def transcribe_audio(audio_file):
17
+ # Load the audio file
18
+ audio = AudioSegment.from_file(audio_file.name)
19
+ # Export as WAV, which Whisper accepts
20
+ buffer = io.BytesIO()
21
+ audio.export(buffer, format="wav")
22
+ buffer.seek(0)
23
+
24
+ response = openai.Audio.transcribe(
25
+ "whisper-1",
26
+ file=buffer,
27
+ model='whisper',
28
+ response_format='verbose_json'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  )
30
+ return response
31
+
32
+ # Function to download audio from YouTube URL
33
+
34
+ def download_youtube_audio(url):
35
+ ydl_opts = {
36
+ 'format': 'bestaudio/best',
37
+ 'outtmpl': 'downloaded_audio.%(ext)s',
38
+ 'postprocessors': [{
39
+ 'key': 'FFmpegExtractAudio',
40
+ 'preferredcodec': 'mp3',
41
+ 'preferredquality': '192',
42
+ }],
43
+ }
44
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
45
+ ydl.download([url])
46
+ return 'downloaded_audio.mp3'
47
+
48
+ # Function to extract text from PDF
49
+
50
+ def extract_text_from_pdf(pdf_file):
51
+ pdf_reader = PyPDF2.PdfFileReader(pdf_file)
52
+ text = ""
53
+ for page_num in range(pdf_reader.numPages):
54
+ text += pdf_reader.getPage(page_num).extract_text() + "\n"
55
+ return text
56
+
57
+ # Function to generate summarised lecture notes using GPT-3.5
58
+
59
+ def generate_summary(transcription):
60
+ transcription_text = "\n".join([f"{segment['start']:.2f}-{segment['end']:.2f}: {segment['text']}" for segment in transcription['segments']])
61
  prompt = f"""
62
+ You are an intelligent assistant that will summarize the transcription below.
63
+ The transcription text is:
64
+ {transcription_text}
65
 
66
+ Summarize the content into 1000 tokens or less, focusing on the key topics and main points.
 
 
 
 
 
 
67
  """
68
+ response = openai.ChatCompletion.create(
69
+ model="gpt-3.5-turbo",
70
+ messages=[
71
+ {"role": "system", "content": "You are an expert summarizer."},
72
+ {"role": "user", "content": prompt}
73
+ ]
74
  )
75
+ summary = response['choices'][0]['message']['content']
76
+ return summary
77
+
78
+ # Define the main function to handle transcription and summary generation
79
+
80
+ def process_lecture(input_data, lesson_plan):
81
+ transcription = ""
82
+ if isinstance(input_data, tuple): # Audio input
83
+ input_type, input_value = input_data
84
+ if input_type == "audio":
85
+ with gr.File(input_value) as f:
86
+ transcription = transcribe_audio(f)
87
+ elif input_type == "youtube":
88
+ audio_path = download_youtube_audio(input_value)
89
+ with open(audio_path, "rb") as f:
90
+ transcription = transcribe_audio(f)
91
+ elif input_type == "pdf":
92
+ transcription = extract_text_from_pdf(input_value)
93
+
94
+ transcription_text = "\n".join([f"{segment['start']:.2f}-{segment['end']:.2f}: {segment['text']}" for segment in transcription['segments']])
95
+ summary = generate_summary(transcription)
96
+ return transcription_text, summary
97
+
98
+ # Set up Gradio interface
99
+ with gr.Blocks() as demo:
100
+ gr.Markdown("# Lecture Notes Generation Tool\nConvert your lecture recordings, PDFs, or YouTube links into detailed and structured notes easily!")
101
+
102
+ with gr.Tab("Upload"):
103
+ input_type = gr.Radio(label="Select Input Type", choices=["Audio File", "PDF Document", "YouTube URL"], type="value")
104
+ audio_input = gr.Audio(label="Upload your lecture audio file (mp3/wav)", visible=False)
105
+ pdf_input = gr.File(label="Upload PDF Document", visible=False)
106
+ youtube_input = gr.Textbox(label="Enter YouTube URL", visible=False)
107
+ lesson_plan_input = gr.Textbox(label="Enter the lesson plan for context", lines=5)
108
+
109
+ def update_visibility(input_choice):
110
+ return gr.update(visible=input_choice == "Audio File"), gr.update(visible=input_choice == "PDF Document"), gr.update(visible=input_choice == "YouTube URL")
111
+
112
+ input_type.change(fn=update_visibility, inputs=[input_type], outputs=[audio_input, pdf_input, youtube_input])
113
+
114
+ submit_btn = gr.Button("Generate Notes")
115
+
116
+ with gr.Tab("Transcription and Notes"):
117
+ transcription_output = gr.Textbox(label="Lecture Transcription with Timestamps", interactive=False)
118
+ summary_output = gr.Textbox(label="Summarized Lecture Notes", interactive=False)
119
+
120
+ submit_btn.click(fn=process_lecture, inputs=[(input_type, [audio_input, pdf_input, youtube_input]), lesson_plan_input], outputs=[transcription_output, summary_output])
121
+
122
+ # Launch the interface demo.launch(share=True)