muddasser commited on
Commit
22a159f
·
verified ·
1 Parent(s): 359dd8d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -9
app.py CHANGED
@@ -8,8 +8,8 @@ from selenium.webdriver.support import expected_conditions as EC
8
  from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
9
  from sentence_transformers import SentenceTransformer
10
  from langchain.text_splitter import RecursiveCharacterTextSplitter
11
- from langchain_community.vectorstores import FAISS # Fixed import
12
- from langchain_community.embeddings import HuggingFaceEmbeddings # Fixed import
13
  from langchain.schema import Document
14
  import logging
15
  import subprocess
@@ -18,6 +18,8 @@ import os
18
  import re
19
  import time
20
  import psutil
 
 
21
  from selenium.common.exceptions import WebDriverException, TimeoutException
22
 
23
  # Set up logging
@@ -53,14 +55,17 @@ if 'qa_pipeline' not in st.session_state:
53
  st.session_state.qa_pipeline = None
54
 
55
  def cleanup_chromedriver_processes():
56
- """Kill any lingering ChromeDriver processes."""
57
  try:
58
  for proc in psutil.process_iter(['name']):
59
- if proc.info['name'] == 'chromedriver':
60
- proc.kill()
61
- logging.info(f"Killed ChromeDriver process PID {proc.pid}")
 
 
 
62
  except Exception as e:
63
- logging.warning(f"Error cleaning up ChromeDriver processes: {str(e)}")
64
 
65
  def check_port(port):
66
  """Check if a port is in use."""
@@ -128,7 +133,6 @@ def setup_driver():
128
  subprocess.run(['chmod', '+x', chromedriver_path], check=True)
129
  except subprocess.CalledProcessError:
130
  st.warning(f"Could not set executable permissions on {chromedriver_path}")
131
- # Continue anyway - sometimes the file is already executable
132
 
133
  if not os.access(chromium_path, os.X_OK):
134
  try:
@@ -150,6 +154,12 @@ def setup_driver():
150
  options.add_argument('--remote-debugging-port=0')
151
  options.add_argument('--ignore-certificate-errors')
152
  options.add_argument('--disable-web-security')
 
 
 
 
 
 
153
  options.binary_location = chromium_path
154
 
155
  # Initialize ChromeDriver service
@@ -166,7 +176,10 @@ def setup_driver():
166
  service.start()
167
  driver = webdriver.Chrome(service=service, options=options)
168
  driver.set_page_load_timeout(60)
169
- logging.info(f"ChromeDriver initialized on port {service.port}")
 
 
 
170
  return driver, service
171
 
172
  except WebDriverException as e:
@@ -178,6 +191,11 @@ def setup_driver():
178
  pass
179
  time.sleep(2)
180
  else:
 
 
 
 
 
181
  raise
182
 
183
  except Exception as e:
@@ -249,6 +267,15 @@ def scrape_website(url):
249
  try:
250
  driver.quit()
251
  service.stop()
 
 
 
 
 
 
 
 
 
252
  logging.info("WebDriver and service stopped")
253
  except Exception as e:
254
  logging.warning(f"Error quitting driver: {str(e)}")
 
8
  from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
9
  from sentence_transformers import SentenceTransformer
10
  from langchain.text_splitter import RecursiveCharacterTextSplitter
11
+ from langchain_community.vectorstores import FAISS
12
+ from langchain_community.embeddings import HuggingFaceEmbeddings
13
  from langchain.schema import Document
14
  import logging
15
  import subprocess
 
18
  import re
19
  import time
20
  import psutil
21
+ import tempfile
22
+ import shutil
23
  from selenium.common.exceptions import WebDriverException, TimeoutException
24
 
25
  # Set up logging
 
55
  st.session_state.qa_pipeline = None
56
 
57
  def cleanup_chromedriver_processes():
58
+ """Kill any lingering ChromeDriver and Chrome processes."""
59
  try:
60
  for proc in psutil.process_iter(['name']):
61
+ if proc.info['name'] in ['chromedriver', 'chrome', 'chromium', 'chromium-browser']:
62
+ try:
63
+ proc.kill()
64
+ logging.info(f"Killed process {proc.info['name']} PID {proc.pid}")
65
+ except:
66
+ pass
67
  except Exception as e:
68
+ logging.warning(f"Error cleaning up processes: {str(e)}")
69
 
70
  def check_port(port):
71
  """Check if a port is in use."""
 
133
  subprocess.run(['chmod', '+x', chromedriver_path], check=True)
134
  except subprocess.CalledProcessError:
135
  st.warning(f"Could not set executable permissions on {chromedriver_path}")
 
136
 
137
  if not os.access(chromium_path, os.X_OK):
138
  try:
 
154
  options.add_argument('--remote-debugging-port=0')
155
  options.add_argument('--ignore-certificate-errors')
156
  options.add_argument('--disable-web-security')
157
+
158
+ # Add unique user data directory to prevent conflicts
159
+ temp_dir = tempfile.mkdtemp()
160
+ options.add_argument(f"--user-data-dir={temp_dir}")
161
+ options.add_argument("--profile-directory=Default")
162
+
163
  options.binary_location = chromium_path
164
 
165
  # Initialize ChromeDriver service
 
176
  service.start()
177
  driver = webdriver.Chrome(service=service, options=options)
178
  driver.set_page_load_timeout(60)
179
+
180
+ # Store temp directory for cleanup
181
+ driver.temp_dir = temp_dir
182
+ logging.info(f"ChromeDriver initialized on port {service.port} with temp dir: {temp_dir}")
183
  return driver, service
184
 
185
  except WebDriverException as e:
 
191
  pass
192
  time.sleep(2)
193
  else:
194
+ # Clean up temp directory if creation failed
195
+ try:
196
+ shutil.rmtree(temp_dir)
197
+ except:
198
+ pass
199
  raise
200
 
201
  except Exception as e:
 
267
  try:
268
  driver.quit()
269
  service.stop()
270
+
271
+ # Clean up temporary directory
272
+ if hasattr(driver, 'temp_dir'):
273
+ try:
274
+ shutil.rmtree(driver.temp_dir)
275
+ logging.info(f"Cleaned up temp directory: {driver.temp_dir}")
276
+ except Exception as e:
277
+ logging.warning(f"Error cleaning up temp directory: {str(e)}")
278
+
279
  logging.info("WebDriver and service stopped")
280
  except Exception as e:
281
  logging.warning(f"Error quitting driver: {str(e)}")