abhishekjoel commited on
Commit
af29e2b
·
verified ·
1 Parent(s): 2e97d93

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -7
app.py CHANGED
@@ -13,11 +13,16 @@ openai.api_key = os.getenv('OPENAI_API_KEY')
13
  # Function to transcribe audio using OpenAI Whisper
14
  def transcribe_audio(audio_file):
15
  try:
16
- audio = AudioSegment.from_file(audio_file)
 
 
 
 
17
  buffer = io.BytesIO()
18
  audio.export(buffer, format="wav")
19
  buffer.seek(0)
20
  buffer.name = "audio.wav"
 
21
  response = openai.Audio.transcribe(
22
  "whisper-1",
23
  file=buffer,
@@ -30,15 +35,23 @@ def transcribe_audio(audio_file):
30
 
31
  # Function to extract text from PDF
32
  def extract_text_from_pdf(pdf_file):
33
- reader = PyPDF2.PdfReader(pdf_file)
34
- text = ""
35
- for page in reader.pages:
36
- text += page.extract_text() + "\n"
37
- return text
 
 
 
 
 
 
 
38
 
39
  # Function to get YouTube transcript
40
  def get_youtube_transcript(url):
41
  try:
 
42
  if "watch?v=" in url:
43
  video_id = url.split("watch?v=")[1].split("&")[0]
44
  elif "youtu.be/" in url:
@@ -46,9 +59,11 @@ def get_youtube_transcript(url):
46
  else:
47
  st.error("Invalid YouTube URL.")
48
  return None
 
49
  transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
50
  transcript = transcript_list.find_transcript(['en'])
51
  transcript_data = transcript.fetch()
 
52
  transcription_text = " ".join([entry['text'] for entry in transcript_data])
53
  return transcription_text
54
  except Exception as e:
@@ -77,7 +92,8 @@ def create_pdf(notes):
77
 
78
  # Main app
79
  def main():
80
- st.title("AI Notes Generation bot")
 
81
 
82
  st.markdown("---")
83
  st.subheader("Upload your file:")
@@ -101,6 +117,9 @@ def main():
101
  return
102
  elif input_type == "PDF Document" and pdf_input:
103
  transcription_text = extract_text_from_pdf(pdf_input)
 
 
 
104
  elif input_type == "YouTube URL" and youtube_input:
105
  transcription_text = get_youtube_transcript(youtube_input)
106
  if not transcription_text:
 
13
  # Function to transcribe audio using OpenAI Whisper
14
  def transcribe_audio(audio_file):
15
  try:
16
+ # Read the bytes from the uploaded audio file
17
+ audio_bytes = audio_file.read()
18
+ # Use io.BytesIO to create a file-like object
19
+ audio = AudioSegment.from_file(io.BytesIO(audio_bytes))
20
+ # Convert to WAV and prepare for transcription
21
  buffer = io.BytesIO()
22
  audio.export(buffer, format="wav")
23
  buffer.seek(0)
24
  buffer.name = "audio.wav"
25
+ # Transcribe audio using OpenAI Whisper
26
  response = openai.Audio.transcribe(
27
  "whisper-1",
28
  file=buffer,
 
35
 
36
  # Function to extract text from PDF
37
  def extract_text_from_pdf(pdf_file):
38
+ try:
39
+ # Read the bytes from the uploaded PDF file
40
+ pdf_bytes = pdf_file.read()
41
+ # Use io.BytesIO to create a file-like object
42
+ reader = PyPDF2.PdfReader(io.BytesIO(pdf_bytes))
43
+ text = ""
44
+ for page in reader.pages:
45
+ text += page.extract_text() + "\n"
46
+ return text
47
+ except Exception as e:
48
+ st.error(f"Error processing PDF: {str(e)}")
49
+ return None
50
 
51
  # Function to get YouTube transcript
52
  def get_youtube_transcript(url):
53
  try:
54
+ # Extract video ID from URL
55
  if "watch?v=" in url:
56
  video_id = url.split("watch?v=")[1].split("&")[0]
57
  elif "youtu.be/" in url:
 
59
  else:
60
  st.error("Invalid YouTube URL.")
61
  return None
62
+ # Fetch transcript
63
  transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
64
  transcript = transcript_list.find_transcript(['en'])
65
  transcript_data = transcript.fetch()
66
+ # Combine transcript texts
67
  transcription_text = " ".join([entry['text'] for entry in transcript_data])
68
  return transcription_text
69
  except Exception as e:
 
92
 
93
  # Main app
94
  def main():
95
+ st.set_page_config(layout="wide")
96
+ st.markdown("<h1 style='text-align: center;'>AI Notes Generation Bot 🤖</h1>", unsafe_allow_html=True)
97
 
98
  st.markdown("---")
99
  st.subheader("Upload your file:")
 
117
  return
118
  elif input_type == "PDF Document" and pdf_input:
119
  transcription_text = extract_text_from_pdf(pdf_input)
120
+ if not transcription_text:
121
+ st.error("Failed to extract text from PDF.")
122
+ return
123
  elif input_type == "YouTube URL" and youtube_input:
124
  transcription_text = get_youtube_transcript(youtube_input)
125
  if not transcription_text: