Spaces:
Sleeping
Sleeping
Update src/utils/helper.py
Browse files- src/utils/helper.py +39 -9
src/utils/helper.py
CHANGED
|
@@ -107,16 +107,46 @@ async def preprocess_messages(query: str, attachs: list[UploadFile]):
|
|
| 107 |
return messages
|
| 108 |
|
| 109 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
def extract_transcript(video_link: str):
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 120 |
|
| 121 |
|
| 122 |
def extract_comment(video_link: str):
|
|
|
|
| 107 |
return messages
|
| 108 |
|
| 109 |
|
| 110 |
+
# def extract_transcript(video_link: str):
|
| 111 |
+
# ytt_api = YouTubeTranscriptApi()
|
| 112 |
+
# # extract video id from video link
|
| 113 |
+
# video_id = video_link.split("v=")[1]
|
| 114 |
+
# transcript = ytt_api.fetch(video_id)
|
| 115 |
+
# transcript_str = ""
|
| 116 |
+
# for trans in transcript:
|
| 117 |
+
# transcript_str += trans.text + " "
|
| 118 |
+
# logger.info(f"Transcript: {transcript_str}")
|
| 119 |
+
# return transcript_str
|
| 120 |
+
|
| 121 |
+
import os
|
| 122 |
def extract_transcript(video_link: str):
|
| 123 |
+
try:
|
| 124 |
+
# extract video id from video link
|
| 125 |
+
video_id = video_link.split("v=")[1]
|
| 126 |
+
|
| 127 |
+
# Call Supadata API
|
| 128 |
+
url = f"https://api.supadata.ai/v1/youtube/transcript"
|
| 129 |
+
headers = {
|
| 130 |
+
"x-api-key": os.getenv("SUPADATA_API_KEY")
|
| 131 |
+
}
|
| 132 |
+
params = {
|
| 133 |
+
"videoId": video_id
|
| 134 |
+
}
|
| 135 |
+
|
| 136 |
+
response = requests.get(url, headers=headers, params=params)
|
| 137 |
+
response.raise_for_status() # Raise exception for non-200 status codes
|
| 138 |
+
|
| 139 |
+
data = response.json()
|
| 140 |
+
text = ""
|
| 141 |
+
for item in data["content"]:
|
| 142 |
+
if "text" in item:
|
| 143 |
+
text += item["text"] + " "
|
| 144 |
+
|
| 145 |
+
logger.info(f"Transcript: {text}")
|
| 146 |
+
return text
|
| 147 |
+
except Exception as e:
|
| 148 |
+
logger.error(f"Failed to extract transcript: {str(e)}")
|
| 149 |
+
raise Exception(f"Failed to extract transcript: {str(e)}")
|
| 150 |
|
| 151 |
|
| 152 |
def extract_comment(video_link: str):
|