datamk commited on
Commit
2416e87
·
verified ·
1 Parent(s): fcd3b26

Upload 3 files

Browse files
Files changed (3) hide show
  1. Dockerfile +29 -10
  2. app.py +77 -52
  3. requirements.txt +2 -1
Dockerfile CHANGED
@@ -1,25 +1,44 @@
1
- # Use the official Microsoft Playwright image. It has ALL complex system dependencies pre-installed!
2
- FROM mcr.microsoft.com/playwright/python:v1.40.0-jammy
3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  RUN useradd -m -u 1000 user
5
  USER user
6
  ENV HOME=/home/user \
7
- PATH=/home/user/.local/bin:$PATH \
8
- PYTHONUNBUFFERED=1
9
 
10
  WORKDIR $HOME/app
11
 
12
- # Install our python requirements
13
  COPY --chown=user requirements.txt .
14
  RUN pip install --no-cache-dir -r requirements.txt
15
 
16
- # Download the Chromium portable browser incredibly fast without apt-get!
17
- RUN playwright install chromium
18
-
19
- # Copy our backend script into the container
20
  COPY --chown=user app.py .
21
 
 
22
  EXPOSE 7860
23
 
24
- # Start Gradio background daemon
25
  CMD ["python", "app.py"]
 
1
+ # Use a robust, official Python image
2
+ FROM python:3.10-slim
3
 
4
+ # Set environment variables for non-interactive installs
5
+ ENV DEBIAN_FRONTEND=noninteractive
6
+ ENV PYTHONUNBUFFERED=1
7
+
8
+ # Install essential system dependencies for Chrome/Selenium
9
+ RUN apt-get update && apt-get install -y \
10
+ wget \
11
+ gnupg \
12
+ unzip \
13
+ curl \
14
+ --no-install-recommends \
15
+ && rm -rf /var/lib/apt/lists/*
16
+
17
+ # Add Google Chrome Repository and Install Chrome
18
+ RUN wget -q -O - https://dl.google.com/linux/linux_signing_key.pub | gpg --dearmor -o /usr/share/keyrings/google-chrome.gpg \
19
+ && echo "deb [arch=amd64 signed-by=/usr/share/keyrings/google-chrome.gpg] http://dl.google.com/linux/chrome/deb/ stable main" > /etc/apt/sources.list.d/google-chrome.list \
20
+ && apt-get update \
21
+ && apt-get install -y google-chrome-stable \
22
+ --no-install-recommends \
23
+ && rm -rf /var/lib/apt/lists/*
24
+
25
+ # Set up user for Hugging Face (UID 1000 is required)
26
  RUN useradd -m -u 1000 user
27
  USER user
28
  ENV HOME=/home/user \
29
+ PATH=/home/user/.local/bin:$PATH
 
30
 
31
  WORKDIR $HOME/app
32
 
33
+ # Copy and install requirements
34
  COPY --chown=user requirements.txt .
35
  RUN pip install --no-cache-dir -r requirements.txt
36
 
37
+ # Copy application code
 
 
 
38
  COPY --chown=user app.py .
39
 
40
+ # Standard HF Port
41
  EXPOSE 7860
42
 
43
+ # Start command
44
  CMD ["python", "app.py"]
app.py CHANGED
@@ -2,77 +2,102 @@ import gradio as gr
2
  import threading
3
  import time
4
  import schedule
5
- from playwright.sync_api import sync_playwright
6
  import random
7
  import datetime
 
 
 
 
8
 
9
  URL = "https://streamer1-gvbgwaea2w2qmwgjey7ksg.streamlit.app/"
10
- logs = ["Service started. Waiting for next schedule..."]
11
 
12
  def add_log(message):
13
  timestamp = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
14
- logs.append(f"[{timestamp}] {message}")
15
- if len(logs) > 30:
16
- logs.pop(0)
 
 
17
 
18
  def visit_and_scroll():
19
- add_log(f"Started keeping alive {URL}...")
20
 
 
 
 
 
 
 
 
 
 
 
21
  try:
22
- with sync_playwright() as p:
23
- # Playwright handles headless browsers inside Docker flawlessly
24
- browser = p.chromium.launch(headless=True, args=['--no-sandbox', '--disable-dev-shm-usage'])
25
- page = browser.new_page()
26
-
27
- # Open the Streamlit app
28
- page.goto(URL, wait_until='networkidle')
29
- time.sleep(8)
30
-
31
- total_height = page.evaluate("document.body.scrollHeight")
32
- add_log("Scrolling down naturally...")
33
- current_scroll = 0
34
- while current_scroll < total_height:
35
- scroll_step = random.randint(100, 500)
36
- current_scroll += scroll_step
37
- page.evaluate(f"window.scrollTo(0, {current_scroll});")
38
- time.sleep(random.uniform(0.3, 1.2))
39
- new_height = page.evaluate("document.body.scrollHeight")
40
- if new_height > total_height:
41
- total_height = new_height
42
-
43
- add_log("Scrolling up naturally...")
44
- while current_scroll > 0:
45
- scroll_step = random.randint(100, 500)
46
- current_scroll -= scroll_step
47
- if current_scroll < 0:
48
- current_scroll = 0
49
- page.evaluate(f"window.scrollTo(0, {current_scroll});")
50
- time.sleep(random.uniform(0.3, 1.2))
51
-
52
- add_log("Done navigating! Closing Playwright browser...")
53
- browser.close()
54
-
55
  except Exception as e:
56
- add_log(f"Error occurred: {e}")
 
 
 
57
 
58
- def run_schedule():
 
59
  visit_and_scroll()
 
60
  schedule.every(1).hours.do(visit_and_scroll)
61
  while True:
62
  schedule.run_pending()
63
- time.sleep(60)
64
 
65
- # Start background thread
66
- thread = threading.Thread(target=run_schedule, daemon=True)
67
- thread.start()
68
 
69
  def get_logs():
70
- return "\n".join(logs)
71
 
72
- with gr.Blocks() as demo:
73
- gr.Markdown("# 🤖 Playwright Keep-Alive Status")
74
- output = gr.Textbox(label="Live System Logs", lines=15, value=get_logs(), interactive=False)
75
- refresh_btn = gr.Button("Refresh Logs")
76
- refresh_btn.click(fn=get_logs, outputs=output)
 
 
 
 
 
 
 
 
 
 
 
77
 
78
- demo.launch(server_name="0.0.0.0", server_port=7860)
 
 
2
  import threading
3
  import time
4
  import schedule
 
5
  import random
6
  import datetime
7
+ from selenium import webdriver
8
+ from selenium.webdriver.chrome.options import Options
9
+ from selenium.webdriver.chrome.service import Service
10
+ from webdriver_manager.chrome import ChromeDriverManager
11
 
12
  URL = "https://streamer1-gvbgwaea2w2qmwgjey7ksg.streamlit.app/"
13
+ logs = ["System initialized. Waiting for first run..."]
14
 
15
  def add_log(message):
16
  timestamp = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
17
+ log_entry = f"[{timestamp}] {message}"
18
+ print(log_entry) # Also print to container logs
19
+ logs.append(log_entry)
20
+ if len(logs) > 50:
21
+ logs.pop(0)
22
 
23
  def visit_and_scroll():
24
+ add_log(f"Starting keep-alive visit to: {URL}")
25
 
26
+ options = Options()
27
+ options.add_argument("--headless=new")
28
+ options.add_argument("--no-sandbox")
29
+ options.add_argument("--disable-dev-shm-usage")
30
+ options.add_argument("--disable-gpu")
31
+ options.add_argument("--window-size=1920,1080")
32
+ # Add a real-looking user agent to prevent blocks
33
+ options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36")
34
+
35
+ driver = None
36
  try:
37
+ # Use webdriver-manager to handle the driver automatically
38
+ service = Service(ChromeDriverManager().install())
39
+ driver = webdriver.Chrome(service=service, options=options)
40
+
41
+ driver.get(URL)
42
+ add_log("Page loaded. Waiting 10s for Streamlit hydration...")
43
+ time.sleep(10)
44
+
45
+ # Natural scrolling logic
46
+ total_height = driver.execute_script("return document.body.scrollHeight")
47
+ add_log(f"Page height detected: {total_height}px. Beginning scroll...")
48
+
49
+ current_scroll = 0
50
+ while current_scroll < total_height:
51
+ step = random.randint(200, 600)
52
+ current_scroll += step
53
+ driver.execute_script(f"window.scrollTo(0, {current_scroll});")
54
+ time.sleep(random.uniform(0.5, 1.5))
55
+ # Update height in case of lazy loading
56
+ total_height = driver.execute_script("return document.body.scrollHeight")
57
+
58
+ add_log("Reached bottom. Scrolling back to top...")
59
+ driver.execute_script("window.scrollTo({top: 0, behavior: 'smooth'});")
60
+ time.sleep(3)
61
+
62
+ add_log("Visit completed successfully.")
63
+
 
 
 
 
 
 
64
  except Exception as e:
65
+ add_log(f"CRITICAL ERROR: {str(e)}")
66
+ finally:
67
+ if driver:
68
+ driver.quit()
69
 
70
+ def run_scheduler():
71
+ # Run immediately on startup
72
  visit_and_scroll()
73
+
74
  schedule.every(1).hours.do(visit_and_scroll)
75
  while True:
76
  schedule.run_pending()
77
+ time.sleep(30)
78
 
79
+ # Start the background thread
80
+ threading.Thread(target=run_scheduler, daemon=True).start()
 
81
 
82
  def get_logs():
83
+ return "\n".join(reversed(logs))
84
 
85
+ # Build the Gradio UI
86
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
87
+ gr.Markdown("# 🚀 Streamlit Keep-Alive Bot")
88
+ gr.Markdown(f"Targeting: `{URL}`")
89
+
90
+ with gr.Row():
91
+ status_box = gr.Textbox(
92
+ label="Live Activity Logs (Newest First)",
93
+ value=get_logs,
94
+ every=5, # Auto-refresh every 5 seconds
95
+ lines=20,
96
+ interactive=False
97
+ )
98
+
99
+ refresh_btn = gr.Button("Manual Refresh")
100
+ refresh_btn.click(fn=get_logs, outputs=status_box)
101
 
102
+ if __name__ == "__main__":
103
+ demo.launch(server_name="0.0.0.0", server_port=7860)
requirements.txt CHANGED
@@ -1,3 +1,4 @@
1
- playwright
2
  schedule
3
  gradio
 
 
1
+ selenium
2
  schedule
3
  gradio
4
+ webdriver-manager