ABAO77 commited on
Commit
bac94fa
·
verified ·
1 Parent(s): 172064c

Update src/utils/helper.py

Browse files
Files changed (1) hide show
  1. src/utils/helper.py +39 -9
src/utils/helper.py CHANGED
@@ -107,16 +107,46 @@ async def preprocess_messages(query: str, attachs: list[UploadFile]):
107
  return messages
108
 
109
 
 
 
 
 
 
 
 
 
 
 
 
 
110
  def extract_transcript(video_link: str):
111
- ytt_api = YouTubeTranscriptApi()
112
- # extract video id from video link
113
- video_id = video_link.split("v=")[1]
114
- transcript = ytt_api.fetch(video_id)
115
- transcript_str = ""
116
- for trans in transcript:
117
- transcript_str += trans.text + " "
118
- logger.info(f"Transcript: {transcript_str}")
119
- return transcript_str
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
 
121
 
122
  def extract_comment(video_link: str):
 
107
  return messages
108
 
109
 
110
+ # def extract_transcript(video_link: str):
111
+ # ytt_api = YouTubeTranscriptApi()
112
+ # # extract video id from video link
113
+ # video_id = video_link.split("v=")[1]
114
+ # transcript = ytt_api.fetch(video_id)
115
+ # transcript_str = ""
116
+ # for trans in transcript:
117
+ # transcript_str += trans.text + " "
118
+ # logger.info(f"Transcript: {transcript_str}")
119
+ # return transcript_str
120
+
121
+ import os
122
  def extract_transcript(video_link: str):
123
+ try:
124
+ # extract video id from video link
125
+ video_id = video_link.split("v=")[1]
126
+
127
+ # Call Supadata API
128
+ url = f"https://api.supadata.ai/v1/youtube/transcript"
129
+ headers = {
130
+ "x-api-key": os.getenv("SUPADATA_API_KEY")
131
+ }
132
+ params = {
133
+ "videoId": video_id
134
+ }
135
+
136
+ response = requests.get(url, headers=headers, params=params)
137
+ response.raise_for_status() # Raise exception for non-200 status codes
138
+
139
+ data = response.json()
140
+ text = ""
141
+ for item in data["content"]:
142
+ if "text" in item:
143
+ text += item["text"] + " "
144
+
145
+ logger.info(f"Transcript: {text}")
146
+ return text
147
+ except Exception as e:
148
+ logger.error(f"Failed to extract transcript: {str(e)}")
149
+ raise Exception(f"Failed to extract transcript: {str(e)}")
150
 
151
 
152
  def extract_comment(video_link: str):