Google_web_scrapper / src /streamlit_app.py
Miraj74's picture
Update src/streamlit_app.py
002019c verified
import streamlit as st
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException
import time
st.set_page_config(page_title="Google Scraper", layout="wide")
st.markdown('Google Search Scraper')
query = st.text_input("Enter Search Query:")
if st.button("Search Google"):
if query.strip():
progress_bar = st.progress(0)
status_text = st.empty()
options = webdriver.ChromeOptions()
options.add_argument("--headless=new") # Avoid detection
options.add_argument("--disable-blink-features=AutomationControlled")
options.add_argument("--disable-gpu")
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")
options.add_argument("--incognito") # Private mode
options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
driver = webdriver.Chrome(options=options)
driver.get(f"https://www.google.com/search?q={query.replace(' ', '+')}")
driver.maximize_window()
all_links = set()
page = 1
try:
while page <= 3:
time.sleep(2)
WebDriverWait(driver, 5).until(
EC.presence_of_all_elements_located((By.CSS_SELECTOR, "a"))
)
search_results = driver.find_elements(By.CSS_SELECTOR, "div.tF2Cxc a")
for result in search_results:
link = result.get_attribute("href")
if link and "google.com" not in link: # Avoid Google-related links
all_links.add(link)
progress_bar.progress(page * 33)
try:
next_button = WebDriverWait(driver, 5).until(
EC.element_to_be_clickable((By.CSS_SELECTOR, "#pnnext"))
)
next_button.click()
page += 1
except (NoSuchElementException, TimeoutException):
break
except Exception as e:
st.error(f"⚠️ Error: {str(e)}")
driver.quit()
if all_links:
df = pd.DataFrame(sorted(all_links), columns=["🌍 Web Links"])
status_text.text("βœ… Scraping Completed!")
progress_bar.empty()
st.write("### πŸ”— Extracted Links:")
st.dataframe(df, use_container_width=True)
copy_text = "\n".join(df["🌍 Web Links"])
st.code(copy_text, language="text")
st.success("βœ… You can copy and save these links!")
else:
st.warning("⚠️ No links were extracted. Google may have blocked automated access.")
else:
st.warning("⚠️ Please enter a search query.")