Update app.py
Browse files
app.py
CHANGED
|
@@ -8,8 +8,8 @@ from selenium.webdriver.support import expected_conditions as EC
|
|
| 8 |
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
|
| 9 |
from sentence_transformers import SentenceTransformer
|
| 10 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
| 11 |
-
from langchain_community.vectorstores import FAISS
|
| 12 |
-
from langchain_community.embeddings import HuggingFaceEmbeddings
|
| 13 |
from langchain.schema import Document
|
| 14 |
import logging
|
| 15 |
import subprocess
|
|
@@ -18,6 +18,8 @@ import os
|
|
| 18 |
import re
|
| 19 |
import time
|
| 20 |
import psutil
|
|
|
|
|
|
|
| 21 |
from selenium.common.exceptions import WebDriverException, TimeoutException
|
| 22 |
|
| 23 |
# Set up logging
|
|
@@ -53,14 +55,17 @@ if 'qa_pipeline' not in st.session_state:
|
|
| 53 |
st.session_state.qa_pipeline = None
|
| 54 |
|
| 55 |
def cleanup_chromedriver_processes():
|
| 56 |
-
"""Kill any lingering ChromeDriver processes."""
|
| 57 |
try:
|
| 58 |
for proc in psutil.process_iter(['name']):
|
| 59 |
-
if proc.info['name']
|
| 60 |
-
|
| 61 |
-
|
|
|
|
|
|
|
|
|
|
| 62 |
except Exception as e:
|
| 63 |
-
logging.warning(f"Error cleaning up
|
| 64 |
|
| 65 |
def check_port(port):
|
| 66 |
"""Check if a port is in use."""
|
|
@@ -128,7 +133,6 @@ def setup_driver():
|
|
| 128 |
subprocess.run(['chmod', '+x', chromedriver_path], check=True)
|
| 129 |
except subprocess.CalledProcessError:
|
| 130 |
st.warning(f"Could not set executable permissions on {chromedriver_path}")
|
| 131 |
-
# Continue anyway - sometimes the file is already executable
|
| 132 |
|
| 133 |
if not os.access(chromium_path, os.X_OK):
|
| 134 |
try:
|
|
@@ -150,6 +154,12 @@ def setup_driver():
|
|
| 150 |
options.add_argument('--remote-debugging-port=0')
|
| 151 |
options.add_argument('--ignore-certificate-errors')
|
| 152 |
options.add_argument('--disable-web-security')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 153 |
options.binary_location = chromium_path
|
| 154 |
|
| 155 |
# Initialize ChromeDriver service
|
|
@@ -166,7 +176,10 @@ def setup_driver():
|
|
| 166 |
service.start()
|
| 167 |
driver = webdriver.Chrome(service=service, options=options)
|
| 168 |
driver.set_page_load_timeout(60)
|
| 169 |
-
|
|
|
|
|
|
|
|
|
|
| 170 |
return driver, service
|
| 171 |
|
| 172 |
except WebDriverException as e:
|
|
@@ -178,6 +191,11 @@ def setup_driver():
|
|
| 178 |
pass
|
| 179 |
time.sleep(2)
|
| 180 |
else:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 181 |
raise
|
| 182 |
|
| 183 |
except Exception as e:
|
|
@@ -249,6 +267,15 @@ def scrape_website(url):
|
|
| 249 |
try:
|
| 250 |
driver.quit()
|
| 251 |
service.stop()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 252 |
logging.info("WebDriver and service stopped")
|
| 253 |
except Exception as e:
|
| 254 |
logging.warning(f"Error quitting driver: {str(e)}")
|
|
|
|
| 8 |
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
|
| 9 |
from sentence_transformers import SentenceTransformer
|
| 10 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
| 11 |
+
from langchain_community.vectorstores import FAISS
|
| 12 |
+
from langchain_community.embeddings import HuggingFaceEmbeddings
|
| 13 |
from langchain.schema import Document
|
| 14 |
import logging
|
| 15 |
import subprocess
|
|
|
|
| 18 |
import re
|
| 19 |
import time
|
| 20 |
import psutil
|
| 21 |
+
import tempfile
|
| 22 |
+
import shutil
|
| 23 |
from selenium.common.exceptions import WebDriverException, TimeoutException
|
| 24 |
|
| 25 |
# Set up logging
|
|
|
|
| 55 |
st.session_state.qa_pipeline = None
|
| 56 |
|
| 57 |
def cleanup_chromedriver_processes():
|
| 58 |
+
"""Kill any lingering ChromeDriver and Chrome processes."""
|
| 59 |
try:
|
| 60 |
for proc in psutil.process_iter(['name']):
|
| 61 |
+
if proc.info['name'] in ['chromedriver', 'chrome', 'chromium', 'chromium-browser']:
|
| 62 |
+
try:
|
| 63 |
+
proc.kill()
|
| 64 |
+
logging.info(f"Killed process {proc.info['name']} PID {proc.pid}")
|
| 65 |
+
except:
|
| 66 |
+
pass
|
| 67 |
except Exception as e:
|
| 68 |
+
logging.warning(f"Error cleaning up processes: {str(e)}")
|
| 69 |
|
| 70 |
def check_port(port):
|
| 71 |
"""Check if a port is in use."""
|
|
|
|
| 133 |
subprocess.run(['chmod', '+x', chromedriver_path], check=True)
|
| 134 |
except subprocess.CalledProcessError:
|
| 135 |
st.warning(f"Could not set executable permissions on {chromedriver_path}")
|
|
|
|
| 136 |
|
| 137 |
if not os.access(chromium_path, os.X_OK):
|
| 138 |
try:
|
|
|
|
| 154 |
options.add_argument('--remote-debugging-port=0')
|
| 155 |
options.add_argument('--ignore-certificate-errors')
|
| 156 |
options.add_argument('--disable-web-security')
|
| 157 |
+
|
| 158 |
+
# Add unique user data directory to prevent conflicts
|
| 159 |
+
temp_dir = tempfile.mkdtemp()
|
| 160 |
+
options.add_argument(f"--user-data-dir={temp_dir}")
|
| 161 |
+
options.add_argument("--profile-directory=Default")
|
| 162 |
+
|
| 163 |
options.binary_location = chromium_path
|
| 164 |
|
| 165 |
# Initialize ChromeDriver service
|
|
|
|
| 176 |
service.start()
|
| 177 |
driver = webdriver.Chrome(service=service, options=options)
|
| 178 |
driver.set_page_load_timeout(60)
|
| 179 |
+
|
| 180 |
+
# Store temp directory for cleanup
|
| 181 |
+
driver.temp_dir = temp_dir
|
| 182 |
+
logging.info(f"ChromeDriver initialized on port {service.port} with temp dir: {temp_dir}")
|
| 183 |
return driver, service
|
| 184 |
|
| 185 |
except WebDriverException as e:
|
|
|
|
| 191 |
pass
|
| 192 |
time.sleep(2)
|
| 193 |
else:
|
| 194 |
+
# Clean up temp directory if creation failed
|
| 195 |
+
try:
|
| 196 |
+
shutil.rmtree(temp_dir)
|
| 197 |
+
except:
|
| 198 |
+
pass
|
| 199 |
raise
|
| 200 |
|
| 201 |
except Exception as e:
|
|
|
|
| 267 |
try:
|
| 268 |
driver.quit()
|
| 269 |
service.stop()
|
| 270 |
+
|
| 271 |
+
# Clean up temporary directory
|
| 272 |
+
if hasattr(driver, 'temp_dir'):
|
| 273 |
+
try:
|
| 274 |
+
shutil.rmtree(driver.temp_dir)
|
| 275 |
+
logging.info(f"Cleaned up temp directory: {driver.temp_dir}")
|
| 276 |
+
except Exception as e:
|
| 277 |
+
logging.warning(f"Error cleaning up temp directory: {str(e)}")
|
| 278 |
+
|
| 279 |
logging.info("WebDriver and service stopped")
|
| 280 |
except Exception as e:
|
| 281 |
logging.warning(f"Error quitting driver: {str(e)}")
|