MHMisinfo

Sleeping

App Files Files Community

rocky250 commited on Apr 20

Commit

6ecb9bd

verified ·

1 Parent(s): 82e6add

Update fetcher.py

Browse files

Files changed (1) hide show

fetcher.py +18 -18

fetcher.py CHANGED Viewed

@@ -7,9 +7,9 @@ import requests
 import pandas as pd
-# ─────────────────────────────────────────────────────────────────────────────
 #  Video ID extraction
-# ─────────────────────────────────────────────────────────────────────────────
 def extract_video_id(url_or_id: str) -> str | None:
     """Return an 11-char YouTube video ID, or None if not found."""
@@ -24,9 +24,9 @@ def extract_video_id(url_or_id: str) -> str | None:
     return None
-# ─────────────────────────────────────────────────────────────────────────────
 #  Duration parser
-# ─────────────────────────────────────────────────────────────────────────────
 def _parse_duration(iso: str) -> str:
     m = re.match(r"PT(?:(\d+)H)?(?:(\d+)M)?(?:(\d+)S)?", iso or "PT0S")
@@ -36,9 +36,9 @@ def _parse_duration(iso: str) -> str:
     return f"{h}:{mn:02d}:{s:02d}" if h else f"{mn}:{s:02d}"
-# ─────────────────────────────────────────────────────────────────────────────
 #  Metadata
-# ─────────────────────────────────────────────────────────────────────────────
 def fetch_video_metadata(video_id: str, api_key: str) -> tuple[dict | None, str | None]:
     """Return (meta_dict, error_string).  One will be None."""
@@ -88,9 +88,9 @@ def fetch_video_metadata(video_id: str, api_key: str) -> tuple[dict | None, str
         return None, str(exc)
-# ─────────────────────────────────────────────────────────────────────────────
 #  Transcript
-# ─────────────────────────────────────────────────────────────────────────────
 def fetch_transcript(video_id: str) -> tuple[str, str]:
     """Return (text, status_message)."""
@@ -98,15 +98,15 @@ def fetch_transcript(video_id: str) -> tuple[str, str]:
         from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
         segments = YouTubeTranscriptApi.get_transcript(video_id)
         text = " ".join(s["text"] for s in segments)
-        return text, f"✅ Transcript: {len(text.split())} words"
     except Exception as exc:
         short = str(exc)[:80]
-        return "", f"⚠️ Transcript unavailable: {short}"
-# ─────────────────────────────────────────────────────────────────────────────
 #  Comments
-# ─────────────────────────────────────────────────────────────────────────────
 def fetch_comments(
     video_id: str,
@@ -155,20 +155,20 @@ def fetch_comments(
                 break
         if not rows:
-            return pd.DataFrame(), "⚠️ No comments fetched (comments may be disabled)"
         df = pd.DataFrame(rows)
-        return df, f"✅ Comments: {len(df)} fetched"
     except requests.exceptions.Timeout:
-        return pd.DataFrame(), "❌ Comments request timed out"
     except Exception as exc:
-        return pd.DataFrame(), f"❌ Comments error: {str(exc)[:80]}"
-# ─────────────────────────────────────────────────────────────────────────────
 #  Search by keyword
-# ───────────────────────────────��─────────────────────────────────────────────
 def search_videos_by_title(
     keyword: str,

 import pandas as pd
 #  Video ID extraction
 def extract_video_id(url_or_id: str) -> str | None:
     """Return an 11-char YouTube video ID, or None if not found."""
     return None
 #  Duration parser
 def _parse_duration(iso: str) -> str:
     m = re.match(r"PT(?:(\d+)H)?(?:(\d+)M)?(?:(\d+)S)?", iso or "PT0S")
     return f"{h}:{mn:02d}:{s:02d}" if h else f"{mn}:{s:02d}"
 #  Metadata
 def fetch_video_metadata(video_id: str, api_key: str) -> tuple[dict | None, str | None]:
     """Return (meta_dict, error_string).  One will be None."""
         return None, str(exc)
 #  Transcript
 def fetch_transcript(video_id: str) -> tuple[str, str]:
     """Return (text, status_message)."""
         from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
         segments = YouTubeTranscriptApi.get_transcript(video_id)
         text = " ".join(s["text"] for s in segments)
+        return text, f" Transcript: {len(text.split())} words"
     except Exception as exc:
         short = str(exc)[:80]
+        return "", f" Transcript unavailable: {short}"
 #  Comments
 def fetch_comments(
     video_id: str,
                 break
         if not rows:
+            return pd.DataFrame(), " No comments fetched (comments may be disabled)"
         df = pd.DataFrame(rows)
+        return df, f" Comments: {len(df)} fetched"
     except requests.exceptions.Timeout:
+        return pd.DataFrame(), " Comments request timed out"
     except Exception as exc:
+        return pd.DataFrame(), f" Comments error: {str(exc)[:80]}"
 #  Search by keyword
 def search_videos_by_title(
     keyword: str,