File size: 5,788 Bytes
a961ef9
3251b20
b4c4e5e
3251b20
 
 
 
 
 
 
bdb27e7
 
3251b20
 
 
bdb27e7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3251b20
 
 
bdb27e7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3251b20
 
 
bdb27e7
 
 
 
 
 
 
 
 
3251b20
 
 
bdb27e7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3251b20
 
a983102
3251b20
68822d5
bdb27e7
 
 
 
 
 
 
 
68822d5
 
3251b20
a983102
68822d5
 
 
 
 
bdb27e7
a983102
 
3251b20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a983102
3251b20
a983102
bdb27e7
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
import openai
import gradio as gr
import os
import time
from datetime import datetime
from pydub import AudioSegment
import io
import yt_dlp
import PyPDF2

# Define your OpenAI API key using environment variable (recommended for Hugging Face Spaces)
openai.api_key = os.getenv("OPENAI_API_KEY")

# Function to convert audio file to text using OpenAI's Whisper
def transcribe_audio(audio_file):
    try:
        # Load the audio file
        audio = AudioSegment.from_file(audio_file)
        # Export as WAV, which Whisper accepts
        buffer = io.BytesIO()
        audio.export(buffer, format="wav")
        buffer.seek(0)

        response = openai.Audio.transcribe(
            "whisper-1",
            file=buffer,
            model='whisper',
            response_format='verbose_json'
        )
        return response
    except Exception as e:
        print(f"Error in transcribe_audio: {str(e)}")
        raise

# Function to download audio from YouTube URL
def download_youtube_audio(url):
    try:
        ydl_opts = {
            'format': 'bestaudio/best',
            'outtmpl': 'downloaded_audio.%(ext)s',
            'postprocessors': [{
                'key': 'FFmpegExtractAudio',
                'preferredcodec': 'mp3',
                'preferredquality': '192',
            }],
        }
        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
            ydl.download([url])
        return 'downloaded_audio.mp3'
    except Exception as e:
        print(f"Error in download_youtube_audio: {str(e)}")
        raise

# Function to extract text from PDF
def extract_text_from_pdf(pdf_file):
    try:
        pdf_reader = PyPDF2.PdfFileReader(pdf_file)
        text = ""
        for page_num in range(pdf_reader.numPages):
            text += pdf_reader.getPage(page_num).extract_text() + "\n"
        return text
    except Exception as e:
        print(f"Error in extract_text_from_pdf: {str(e)}")
        raise

# Function to generate summarised lecture notes using GPT-3.5
def generate_summary(transcription):
    try:
        transcription_text = "\n".join([f"{segment['start']:.2f}-{segment['end']:.2f}: {segment['text']}" for segment in transcription['segments']])
        prompt = f"""
        You are an intelligent assistant that will summarize the transcription below.
        The transcription text is:
        {transcription_text}

        Summarize the content into 1000 tokens or less, focusing on the key topics and main points.
        """
        response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system", "content": "You are an expert summarizer."},
                {"role": "user", "content": prompt}
            ]
        )
        summary = response['choices'][0]['message']['content']
        return summary
    except Exception as e:
        print(f"Error in generate_summary: {str(e)}")
        raise

# Define the main function to handle transcription and summary generation
def process_lecture(input_type, audio_input, pdf_input, youtube_input, lesson_plan):
    transcription = ""
    try:
        if input_type == "Audio File" and audio_input is not None:
            transcription = transcribe_audio(audio_input)
        elif input_type == "YouTube URL" and youtube_input:
            audio_path = download_youtube_audio(youtube_input)
            with open(audio_path, "rb") as f:
                transcription = transcribe_audio(f)
        elif input_type == "PDF Document" and pdf_input is not None:
            transcription = extract_text_from_pdf(pdf_input)
    except Exception as e:
        return f"Error during processing: {str(e)}", "No summary available."
    
    if transcription:
        try:
            transcription_text = "\n".join([f"{segment['start']:.2f}-{segment['end']:.2f}: {segment['text']}" for segment in transcription['segments']])
            summary = generate_summary(transcription)
            return transcription_text, summary
        except Exception as e:
            return f"Transcription generated, but error during summary generation: {str(e)}", "No summary available."
    else:
        return "No transcription available.", "No summary available."

# Set up Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("# Lecture Notes Generation Tool\nConvert your lecture recordings, PDFs, or YouTube links into detailed and structured notes easily!")

    with gr.Tab("Upload"):
        input_type = gr.Radio(label="Select Input Type", choices=["Audio File", "PDF Document", "YouTube URL"], type="value")
        audio_input = gr.Audio(label="Upload your lecture audio file (mp3/wav)", visible=False)
        pdf_input = gr.File(label="Upload PDF Document", visible=False)
        youtube_input = gr.Textbox(label="Enter YouTube URL", visible=False)
        lesson_plan_input = gr.Textbox(label="Enter the lesson plan for context", lines=5)

        def update_visibility(input_choice):
            return gr.update(visible=input_choice == "Audio File"), gr.update(visible=input_choice == "PDF Document"), gr.update(visible=input_choice == "YouTube URL")

        input_type.change(fn=update_visibility, inputs=[input_type], outputs=[audio_input, pdf_input, youtube_input])

        submit_btn = gr.Button("Generate Notes")

        with gr.Tab("Transcription and Notes"):
            transcription_output = gr.Textbox(label="Lecture Transcription with Timestamps", interactive=False)
            summary_output = gr.Textbox(label="Summarized Lecture Notes", interactive=False)

    submit_btn.click(fn=process_lecture, inputs=[input_type, audio_input, pdf_input, youtube_input, lesson_plan_input], outputs=[transcription_output, summary_output])

# Launch the interface
if __name__ == "__main__":
    demo.launch(share=True)