Spaces:

Quivara
/

alisto-project

Running

App Files Files Community

Quivara commited on 3 days ago

Commit

97ebfdc

verified ·

1 Parent(s): 387a925

Update alisto_project/backend/ingest_reddit.py

Browse files

Files changed (1) hide show

alisto_project/backend/ingest_reddit.py +14 -19

alisto_project/backend/ingest_reddit.py CHANGED Viewed

@@ -385,46 +385,41 @@ def is_credible_user(post):
         # allows posts to pass if Reddit API fails to get user info
         return True
-# 4. Main Scraper Loop
 # orchestrates the entire scraping process (historical scan + real-time stream)
 async def scrape_reddit():
     print("Connecting to Reddit API...")
-    # --- MISSING LINES ADDED HERE ---
     client_id = os.getenv("REDDIT_CLIENT_ID")
     client_secret = os.getenv("REDDIT_CLIENT_SECRET")
-    # --------------------------------
     if not client_id or not client_secret:
         print("❌ Error: Client ID or Secret missing in .env")
         return
     reddit = asyncpraw.Reddit(
-        client_id=os.getenv("REDDIT_CLIENT_ID"),
-        client_secret=os.getenv("REDDIT_CLIENT_SECRET"),
         user_agent=os.getenv("REDDIT_USER_AGENT"),
         username=os.getenv("REDDIT_USERNAME"),
         password=os.getenv("REDDIT_PASSWORD")
-)
     try:
         subreddit = await reddit.subreddit(SUBREDDITS)
-        print(f"👁️  ALISTO ACTIVE: Monitoring r/{SUBREDDITS}...")
-        # --- PHASE 1: FETCH LATEST EXISTING POSTS (e.g., last 500) ---
-        print("🔍 Scanning last 5 posts for missed alerts...")
-        # iterates over the last 5 posts asynchronously
-        async for post in subreddit.new(limit=5):
-            await process_post(post)
-        print("✅ Historical scan complete")
-        # --- PHASE 2: START REAL-TIME STREAM (Forever Loop) ---
         print("📡 Starting real-time stream for new submissions...")
-        # starts the continuous loop to monitor for new submissions
-        async for post in subreddit.stream.submissions(skip_existing=False):
             await process_post(post)
     except Exception as e:

         # allows posts to pass if Reddit API fails to get user info
         return True
 # orchestrates the entire scraping process (historical scan + real-time stream)
+# 4. Main Scraper Loop (LIVE MODE ONLY)
 async def scrape_reddit():
     print("Connecting to Reddit API...")
+    # Load credentials
     client_id = os.getenv("REDDIT_CLIENT_ID")
     client_secret = os.getenv("REDDIT_CLIENT_SECRET")
     if not client_id or not client_secret:
         print("❌ Error: Client ID or Secret missing in .env")
         return
     reddit = asyncpraw.Reddit(
+        client_id=client_id,
+        client_secret=client_secret,
         user_agent=os.getenv("REDDIT_USER_AGENT"),
         username=os.getenv("REDDIT_USERNAME"),
         password=os.getenv("REDDIT_PASSWORD")
+    )
     try:
         subreddit = await reddit.subreddit(SUBREDDITS)
+        print(f"👁️  ALISTO ACTIVE: Monitoring r/{SUBREDDITS}...")
+        # --- DELETED PHASE 1 (History Scan) ---
+        # We removed the code that caused the 403 error.
+        print("⚠️ Skipped history scan to avoid Cloud Ban.")
+        # --- PHASE 2: START REAL-TIME STREAM ---
         print("📡 Starting real-time stream for new submissions...")
+        # skip_existing=True is VITAL. It ignores old data and only waits for NEW posts.
+        async for post in subreddit.stream.submissions(skip_existing=True):
+            print(f"📥 New Post Detected: {post.title}")
             await process_post(post)
     except Exception as e: