Spaces:

ABAO77
/

AutoGenScript

Sleeping

ABAO77 commited on Jul 3, 2025

Commit

bac94fa

verified ·

1 Parent(s): 172064c

Update src/utils/helper.py

Files changed (1) hide show

src/utils/helper.py CHANGED Viewed

@@ -107,16 +107,46 @@ async def preprocess_messages(query: str, attachs: list[UploadFile]):
     return messages
 def extract_transcript(video_link: str):
-    ytt_api = YouTubeTranscriptApi()
-    # extract video id from video link
-    video_id = video_link.split("v=")[1]
-    transcript = ytt_api.fetch(video_id)
-    transcript_str = ""
-    for trans in transcript:
-        transcript_str += trans.text + " "
-    logger.info(f"Transcript: {transcript_str}")
-    return transcript_str
 def extract_comment(video_link: str):

     return messages
+# def extract_transcript(video_link: str):
+#     ytt_api = YouTubeTranscriptApi()
+#     # extract video id from video link
+#     video_id = video_link.split("v=")[1]
+#     transcript = ytt_api.fetch(video_id)
+#     transcript_str = ""
+#     for trans in transcript:
+#         transcript_str += trans.text + " "
+#     logger.info(f"Transcript: {transcript_str}")
+#     return transcript_str
+import os
 def extract_transcript(video_link: str):
+    try:
+        # extract video id from video link
+        video_id = video_link.split("v=")[1]
+        # Call Supadata API
+        url = f"https://api.supadata.ai/v1/youtube/transcript"
+        headers = {
+            "x-api-key": os.getenv("SUPADATA_API_KEY")
+        }
+        params = {
+            "videoId": video_id
+        }
+        response = requests.get(url, headers=headers, params=params)
+        response.raise_for_status()  # Raise exception for non-200 status codes
+        data = response.json()
+        text = ""
+        for item in data["content"]:
+            if "text" in item:
+                text += item["text"] + " "
+        logger.info(f"Transcript: {text}")
+        return text
+    except Exception as e:
+        logger.error(f"Failed to extract transcript: {str(e)}")
+        raise Exception(f"Failed to extract transcript: {str(e)}")
 def extract_comment(video_link: str):