datamk commited on
Commit
11e364e
·
verified ·
1 Parent(s): 2416e87

Upload 3 files

Browse files
Files changed (3) hide show
  1. Dockerfile +11 -29
  2. app.py +34 -54
  3. requirements.txt +1 -2
Dockerfile CHANGED
@@ -1,44 +1,26 @@
1
- # Use a robust, official Python image
2
- FROM python:3.10-slim
3
 
4
- # Set environment variables for non-interactive installs
5
- ENV DEBIAN_FRONTEND=noninteractive
6
- ENV PYTHONUNBUFFERED=1
7
-
8
- # Install essential system dependencies for Chrome/Selenium
9
- RUN apt-get update && apt-get install -y \
10
- wget \
11
- gnupg \
12
- unzip \
13
- curl \
14
- --no-install-recommends \
15
- && rm -rf /var/lib/apt/lists/*
16
-
17
- # Add Google Chrome Repository and Install Chrome
18
- RUN wget -q -O - https://dl.google.com/linux/linux_signing_key.pub | gpg --dearmor -o /usr/share/keyrings/google-chrome.gpg \
19
- && echo "deb [arch=amd64 signed-by=/usr/share/keyrings/google-chrome.gpg] http://dl.google.com/linux/chrome/deb/ stable main" > /etc/apt/sources.list.d/google-chrome.list \
20
- && apt-get update \
21
- && apt-get install -y google-chrome-stable \
22
- --no-install-recommends \
23
- && rm -rf /var/lib/apt/lists/*
24
-
25
- # Set up user for Hugging Face (UID 1000 is required)
26
  RUN useradd -m -u 1000 user
27
  USER user
28
  ENV HOME=/home/user \
29
- PATH=/home/user/.local/bin:$PATH
 
30
 
31
  WORKDIR $HOME/app
32
 
33
- # Copy and install requirements
34
  COPY --chown=user requirements.txt .
35
  RUN pip install --no-cache-dir -r requirements.txt
36
 
37
- # Copy application code
 
 
 
38
  COPY --chown=user app.py .
39
 
40
- # Standard HF Port
41
  EXPOSE 7860
42
 
43
- # Start command
44
  CMD ["python", "app.py"]
 
1
+ # Official Microsoft Playwright image - pre-compiled and lightning-fast!
2
+ FROM mcr.microsoft.com/playwright/python:v1.40.0-jammy
3
 
4
+ # Hugging Face UID 1000 is required for proper permissions
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  RUN useradd -m -u 1000 user
6
  USER user
7
  ENV HOME=/home/user \
8
+ PATH=/home/user/.local/bin:$PATH \
9
+ PYTHONUNBUFFERED=1
10
 
11
  WORKDIR $HOME/app
12
 
13
+ # Only copy requirements first to cache layers (fast builds!)
14
  COPY --chown=user requirements.txt .
15
  RUN pip install --no-cache-dir -r requirements.txt
16
 
17
+ # Playwright images have Chrome pre-baked. Just install the executable.
18
+ RUN playwright install chromium
19
+
20
+ # Copy our logic
21
  COPY --chown=user app.py .
22
 
 
23
  EXPOSE 7860
24
 
25
+ # Starting the Gradio dashboard and background task
26
  CMD ["python", "app.py"]
app.py CHANGED
@@ -4,18 +4,15 @@ import time
4
  import schedule
5
  import random
6
  import datetime
7
- from selenium import webdriver
8
- from selenium.webdriver.chrome.options import Options
9
- from selenium.webdriver.chrome.service import Service
10
- from webdriver_manager.chrome import ChromeDriverManager
11
 
12
  URL = "https://streamer1-gvbgwaea2w2qmwgjey7ksg.streamlit.app/"
13
- logs = ["System initialized. Waiting for first run..."]
14
 
15
  def add_log(message):
16
- timestamp = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
17
  log_entry = f"[{timestamp}] {message}"
18
- print(log_entry) # Also print to container logs
19
  logs.append(log_entry)
20
  if len(logs) > 50:
21
  logs.pop(0)
@@ -23,76 +20,59 @@ def add_log(message):
23
  def visit_and_scroll():
24
  add_log(f"Starting keep-alive visit to: {URL}")
25
 
26
- options = Options()
27
- options.add_argument("--headless=new")
28
- options.add_argument("--no-sandbox")
29
- options.add_argument("--disable-dev-shm-usage")
30
- options.add_argument("--disable-gpu")
31
- options.add_argument("--window-size=1920,1080")
32
- # Add a real-looking user agent to prevent blocks
33
- options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36")
34
-
35
- driver = None
36
  try:
37
- # Use webdriver-manager to handle the driver automatically
38
- service = Service(ChromeDriverManager().install())
39
- driver = webdriver.Chrome(service=service, options=options)
40
-
41
- driver.get(URL)
42
- add_log("Page loaded. Waiting 10s for Streamlit hydration...")
43
- time.sleep(10)
44
-
45
- # Natural scrolling logic
46
- total_height = driver.execute_script("return document.body.scrollHeight")
47
- add_log(f"Page height detected: {total_height}px. Beginning scroll...")
48
-
49
- current_scroll = 0
50
- while current_scroll < total_height:
51
- step = random.randint(200, 600)
52
- current_scroll += step
53
- driver.execute_script(f"window.scrollTo(0, {current_scroll});")
54
- time.sleep(random.uniform(0.5, 1.5))
55
- # Update height in case of lazy loading
56
- total_height = driver.execute_script("return document.body.scrollHeight")
57
 
58
- add_log("Reached bottom. Scrolling back to top...")
59
- driver.execute_script("window.scrollTo({top: 0, behavior: 'smooth'});")
60
- time.sleep(3)
61
-
62
- add_log("Visit completed successfully.")
63
-
64
  except Exception as e:
65
  add_log(f"CRITICAL ERROR: {str(e)}")
66
- finally:
67
- if driver:
68
- driver.quit()
69
 
70
  def run_scheduler():
71
- # Run immediately on startup
72
  visit_and_scroll()
73
 
74
  schedule.every(1).hours.do(visit_and_scroll)
75
  while True:
76
  schedule.run_pending()
77
- time.sleep(30)
78
 
79
- # Start the background thread
80
  threading.Thread(target=run_scheduler, daemon=True).start()
81
 
82
  def get_logs():
83
  return "\n".join(reversed(logs))
84
 
85
- # Build the Gradio UI
86
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
87
- gr.Markdown("# 🚀 Streamlit Keep-Alive Bot")
88
  gr.Markdown(f"Targeting: `{URL}`")
89
 
90
  with gr.Row():
91
  status_box = gr.Textbox(
92
- label="Live Activity Logs (Newest First)",
93
  value=get_logs,
94
- every=5, # Auto-refresh every 5 seconds
95
- lines=20,
96
  interactive=False
97
  )
98
 
 
4
  import schedule
5
  import random
6
  import datetime
7
+ from playwright.sync_api import sync_playwright
 
 
 
8
 
9
  URL = "https://streamer1-gvbgwaea2w2qmwgjey7ksg.streamlit.app/"
10
+ logs = ["System initialized. Waiting for build to finalize..."]
11
 
12
  def add_log(message):
13
+ timestamp = datetime.datetime.now().strftime('%H:%M:%S')
14
  log_entry = f"[{timestamp}] {message}"
15
+ print(log_entry)
16
  logs.append(log_entry)
17
  if len(logs) > 50:
18
  logs.pop(0)
 
20
  def visit_and_scroll():
21
  add_log(f"Starting keep-alive visit to: {URL}")
22
 
 
 
 
 
 
 
 
 
 
 
23
  try:
24
+ with sync_playwright() as p:
25
+ # Playwright is built specifically for containers and is very stable
26
+ browser = p.chromium.launch(headless=True, args=['--no-sandbox', '--disable-dev-shm-usage'])
27
+ context = browser.new_context(user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36")
28
+ page = context.new_page()
29
+
30
+ # Go to site
31
+ page.goto(URL, wait_until='networkidle', timeout=60000)
32
+ add_log("Page loaded. Waiting 10s for Streamlit setup...")
33
+ time.sleep(10)
34
+
35
+ # Smart scrolling logic
36
+ for i in range(1, 6):
37
+ scroll_y = i * 800
38
+ page.evaluate(f"window.scrollTo(0, {scroll_y});")
39
+ add_log(f"Scrolled to {scroll_y}px...")
40
+ time.sleep(random.uniform(1.0, 2.5))
 
 
 
41
 
42
+ # Back to top
43
+ page.evaluate("window.scrollTo(0, 0);")
44
+ add_log("Visit completed. Closing browser.")
45
+ browser.close()
46
+
 
47
  except Exception as e:
48
  add_log(f"CRITICAL ERROR: {str(e)}")
 
 
 
49
 
50
  def run_scheduler():
51
+ # Immediate first run
52
  visit_and_scroll()
53
 
54
  schedule.every(1).hours.do(visit_and_scroll)
55
  while True:
56
  schedule.run_pending()
57
+ time.sleep(60)
58
 
59
+ # Background Thread
60
  threading.Thread(target=run_scheduler, daemon=True).start()
61
 
62
  def get_logs():
63
  return "\n".join(reversed(logs))
64
 
65
+ # Gradio Interface
66
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
67
+ gr.Markdown("# 🚀 Streamlit Keep-Alive Cloud Console")
68
  gr.Markdown(f"Targeting: `{URL}`")
69
 
70
  with gr.Row():
71
  status_box = gr.Textbox(
72
+ label="Live logs (Newest First)",
73
  value=get_logs,
74
+ every=5, # Refresh every 5s automatically
75
+ lines=15,
76
  interactive=False
77
  )
78
 
requirements.txt CHANGED
@@ -1,4 +1,3 @@
1
- selenium
2
  schedule
3
  gradio
4
- webdriver-manager
 
1
+ playwright
2
  schedule
3
  gradio