GenAIDevTOProd commited on
Commit
7afaf6d
·
verified ·
1 Parent(s): b661309

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -15,15 +15,15 @@ from huggingface_hub import HfApi
15
  # Load token from Hugging Face Secrets
16
  HF_TOKEN = os.environ.get("RedditSemanticSearch")
17
 
18
- # Define target subreddits
19
- target_subreddits = ["askscience", "gaming", "technology", "todayilearned", "programming"]
20
-
21
  # Function to stream JSONL Reddit files from HF Hub
22
  from datasets import load_dataset
23
 
24
  # Load full Reddit dataset (assumes it's pre-split by subreddit or has a field)
25
  dataset = load_dataset("HuggingFaceGECLM/REDDIT_comments", split="train")
26
 
 
 
 
27
  # Filter only relevant subreddits
28
  dataset = dataset.filter(lambda x: x["subreddit"] in target_subreddits)
29
 
 
15
  # Load token from Hugging Face Secrets
16
  HF_TOKEN = os.environ.get("RedditSemanticSearch")
17
 
 
 
 
18
  # Function to stream JSONL Reddit files from HF Hub
19
  from datasets import load_dataset
20
 
21
  # Load full Reddit dataset (assumes it's pre-split by subreddit or has a field)
22
  dataset = load_dataset("HuggingFaceGECLM/REDDIT_comments", split="train")
23
 
24
+ # Define target subreddits
25
+ target_subreddits = ["askscience", "gaming", "technology", "todayilearned", "programming"]
26
+
27
  # Filter only relevant subreddits
28
  dataset = dataset.filter(lambda x: x["subreddit"] in target_subreddits)
29