rocky250 commited on
Commit
6ecb9bd
Β·
verified Β·
1 Parent(s): 82e6add

Update fetcher.py

Browse files
Files changed (1) hide show
  1. fetcher.py +18 -18
fetcher.py CHANGED
@@ -7,9 +7,9 @@ import requests
7
  import pandas as pd
8
 
9
 
10
- # ─────────────────────────────────────────────────────────────────────────────
11
  # Video ID extraction
12
- # ─────────────────────────────────────────────────────────────────────────────
13
 
14
  def extract_video_id(url_or_id: str) -> str | None:
15
  """Return an 11-char YouTube video ID, or None if not found."""
@@ -24,9 +24,9 @@ def extract_video_id(url_or_id: str) -> str | None:
24
  return None
25
 
26
 
27
- # ─────────────────────────────────────────────────────────────────────────────
28
  # Duration parser
29
- # ─────────────────────────────────────────────────────────────────────────────
30
 
31
  def _parse_duration(iso: str) -> str:
32
  m = re.match(r"PT(?:(\d+)H)?(?:(\d+)M)?(?:(\d+)S)?", iso or "PT0S")
@@ -36,9 +36,9 @@ def _parse_duration(iso: str) -> str:
36
  return f"{h}:{mn:02d}:{s:02d}" if h else f"{mn}:{s:02d}"
37
 
38
 
39
- # ─────────────────────────────────────────────────────────────────────────────
40
  # Metadata
41
- # ─────────────────────────────────────────────────────────────────────────────
42
 
43
  def fetch_video_metadata(video_id: str, api_key: str) -> tuple[dict | None, str | None]:
44
  """Return (meta_dict, error_string). One will be None."""
@@ -88,9 +88,9 @@ def fetch_video_metadata(video_id: str, api_key: str) -> tuple[dict | None, str
88
  return None, str(exc)
89
 
90
 
91
- # ─────────────────────────────────────────────────────────────────────────────
92
  # Transcript
93
- # ─────────────────────────────────────────────────────────────────────────────
94
 
95
  def fetch_transcript(video_id: str) -> tuple[str, str]:
96
  """Return (text, status_message)."""
@@ -98,15 +98,15 @@ def fetch_transcript(video_id: str) -> tuple[str, str]:
98
  from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
99
  segments = YouTubeTranscriptApi.get_transcript(video_id)
100
  text = " ".join(s["text"] for s in segments)
101
- return text, f"βœ… Transcript: {len(text.split())} words"
102
  except Exception as exc:
103
  short = str(exc)[:80]
104
- return "", f"⚠️ Transcript unavailable: {short}"
 
105
 
106
 
107
- # ─────────────────────────────────────────────────────────────────────────────
108
  # Comments
109
- # ─────────────────────────────────────────────────────────────────────────────
110
 
111
  def fetch_comments(
112
  video_id: str,
@@ -155,20 +155,20 @@ def fetch_comments(
155
  break
156
 
157
  if not rows:
158
- return pd.DataFrame(), "⚠️ No comments fetched (comments may be disabled)"
159
 
160
  df = pd.DataFrame(rows)
161
- return df, f"βœ… Comments: {len(df)} fetched"
162
 
163
  except requests.exceptions.Timeout:
164
- return pd.DataFrame(), "❌ Comments request timed out"
165
  except Exception as exc:
166
- return pd.DataFrame(), f"❌ Comments error: {str(exc)[:80]}"
 
167
 
168
 
169
- # ─────────────────────────────────────────────────────────────────────────────
170
  # Search by keyword
171
- # ───────────────────────────────��─────────────────────────────────────────────
172
 
173
  def search_videos_by_title(
174
  keyword: str,
 
7
  import pandas as pd
8
 
9
 
10
+
11
  # Video ID extraction
12
+
13
 
14
  def extract_video_id(url_or_id: str) -> str | None:
15
  """Return an 11-char YouTube video ID, or None if not found."""
 
24
  return None
25
 
26
 
27
+
28
  # Duration parser
29
+
30
 
31
  def _parse_duration(iso: str) -> str:
32
  m = re.match(r"PT(?:(\d+)H)?(?:(\d+)M)?(?:(\d+)S)?", iso or "PT0S")
 
36
  return f"{h}:{mn:02d}:{s:02d}" if h else f"{mn}:{s:02d}"
37
 
38
 
39
+
40
  # Metadata
41
+
42
 
43
  def fetch_video_metadata(video_id: str, api_key: str) -> tuple[dict | None, str | None]:
44
  """Return (meta_dict, error_string). One will be None."""
 
88
  return None, str(exc)
89
 
90
 
91
+
92
  # Transcript
93
+
94
 
95
  def fetch_transcript(video_id: str) -> tuple[str, str]:
96
  """Return (text, status_message)."""
 
98
  from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
99
  segments = YouTubeTranscriptApi.get_transcript(video_id)
100
  text = " ".join(s["text"] for s in segments)
101
+ return text, f" Transcript: {len(text.split())} words"
102
  except Exception as exc:
103
  short = str(exc)[:80]
104
+ return "", f" Transcript unavailable: {short}"
105
+
106
 
107
 
 
108
  # Comments
109
+
110
 
111
  def fetch_comments(
112
  video_id: str,
 
155
  break
156
 
157
  if not rows:
158
+ return pd.DataFrame(), " No comments fetched (comments may be disabled)"
159
 
160
  df = pd.DataFrame(rows)
161
+ return df, f" Comments: {len(df)} fetched"
162
 
163
  except requests.exceptions.Timeout:
164
+ return pd.DataFrame(), " Comments request timed out"
165
  except Exception as exc:
166
+ return pd.DataFrame(), f" Comments error: {str(exc)[:80]}"
167
+
168
 
169
 
 
170
  # Search by keyword
171
+
172
 
173
  def search_videos_by_title(
174
  keyword: str,