Raj718 commited on
Commit
fe24c7b
·
1 Parent(s): 63ddd77

Replace browser automation with comprehensive mock data system - Modified BrowserAgent.forward() to return realistic mock listings instead of scraping - Added intelligent filtering by bedrooms, borough, budget, and voucher acceptance - Commented out browser dependencies and functions since they're no longer needed - Maintains all agent orchestration functionality while providing realistic demo responses

Browse files
Files changed (2) hide show
  1. browser_agent.py +151 -157
  2. requirements.txt +4 -3
browser_agent.py CHANGED
@@ -10,8 +10,9 @@ from smolagents import Tool
10
  import helium
11
  from selenium.common.exceptions import NoSuchElementException
12
  from selenium.webdriver.chrome.options import Options
13
- from selenium import webdriver
14
- from webdriver_manager.chrome import ChromeDriverManager
 
15
  from functools import lru_cache
16
 
17
  # Import our new utilities and mixins
@@ -53,59 +54,59 @@ NYC_BOROUGHS = {
53
  }
54
  }
55
 
56
- def start_browser(headless=True):
57
- """Initializes the Helium browser driver as a global variable."""
58
- global driver
59
- if driver is None:
60
- print("Initializing address-enhanced browser instance...")
61
-
62
- # Setup Chrome options for better performance
63
- chrome_options = Options()
64
- if headless:
65
- chrome_options.add_argument('--headless')
66
- chrome_options.add_argument('--no-sandbox')
67
- chrome_options.add_argument('--disable-dev-shm-usage')
68
- chrome_options.add_argument('--disable-gpu')
69
- chrome_options.add_argument('--disable-web-security')
70
- chrome_options.add_argument('--disable-features=VizDisplayCompositor')
71
-
72
- # Set up ChromeDriver using webdriver-manager
73
- driver_path = ChromeDriverManager().install()
74
- driver = webdriver.Chrome(service=webdriver.chrome.service.Service(driver_path), options=chrome_options)
75
-
76
- # Initialize Helium with the driver
77
- helium.set_driver(driver)
78
-
79
- # Apply anti-detection measures
80
- driver.execute_script("""
81
- Object.defineProperty(navigator, 'webdriver', {
82
- get: () => undefined
83
- });
84
- if (window.chrome) {
85
- window.chrome.runtime = undefined;
86
- }
87
- const getParameter = WebGLRenderingContext.getParameter;
88
- WebGLRenderingContext.prototype.getParameter = function(parameter) {
89
- if (parameter === 37445) return 'Intel Open Source Technology Center';
90
- if (parameter === 37446) return 'Mesa DRI Intel(R) Iris(R) Plus Graphics (ICL GT2)';
91
- return getParameter(parameter);
92
- };
93
- """)
94
-
95
- print("Browser initialized with enhanced address extraction capabilities.")
96
- return driver
97
 
98
- def quit_browser():
99
- """Safely quits the global browser instance."""
100
- global driver
101
- if driver is not None:
102
- print("Cleaning up browser resources...")
103
- try:
104
- helium.kill_browser()
105
- except:
106
- pass
107
- driver = None
108
- print("Browser closed.")
109
 
110
  def _smart_delay(base_delay=0.5, max_delay=1.5):
111
  """Intelligent delay with randomization."""
@@ -964,133 +965,126 @@ class BrowserAgent(TimedObservationMixin, Tool):
964
  Returns JSON-formatted string with listing data.
965
  """
966
  with self.timed_observation() as timer:
967
- log_tool_action("BrowserAgent", "search_started", {
968
  "query": query,
969
  "boroughs_requested": boroughs,
970
  "timestamp": current_timestamp()
971
  })
972
-
973
  try:
974
- # Parse boroughs input
975
- if boroughs:
976
- borough_list = [b.strip().lower() for b in boroughs.split(",")]
977
- # Validate boroughs
978
- borough_list = [b for b in borough_list if b in NYC_BOROUGHS]
979
- else:
980
- # Sort boroughs by priority
981
- borough_list = sorted(NYC_BOROUGHS.keys(),
982
- key=lambda x: NYC_BOROUGHS[x]['priority'])
983
-
984
- if not borough_list:
985
- return json.dumps(timer.error(
986
- "No valid boroughs specified",
987
- data={"valid_boroughs": list(NYC_BOROUGHS.keys())}
988
- ))
989
-
990
- log_tool_action("BrowserAgent", "boroughs_validated", {
991
- "target_boroughs": borough_list,
992
- "query": query
993
- })
994
-
995
- all_listings = []
996
-
997
- log_tool_action("BrowserAgent", "browser_initialization", {
998
- "action": "starting_browser"
999
- })
1000
-
1001
- start_browser()
1002
-
1003
- log_tool_action("BrowserAgent", "browser_ready", {
1004
- "boroughs_to_search": len(borough_list)
1005
- })
1006
-
1007
- # Sequential borough search (still fast due to optimizations)
1008
- for i, borough in enumerate(borough_list):
1009
- if borough.lower() not in NYC_BOROUGHS:
1010
- continue
1011
-
1012
- log_tool_action("BrowserAgent", "borough_search_started", {
1013
- "borough": borough,
1014
- "progress": f"{i+1}/{len(borough_list)}"
1015
- })
1016
-
1017
- borough_start = time.time()
1018
- borough_listings = _search_borough_for_vouchers_fast(borough, query)
1019
- borough_time = time.time() - borough_start
1020
-
1021
- all_listings.extend(borough_listings)
1022
-
1023
- log_tool_action("BrowserAgent", "borough_search_complete", {
1024
- "borough": borough,
1025
- "listings_found": len(borough_listings),
1026
- "duration": borough_time,
1027
- "progress": f"{i+1}/{len(borough_list)}"
1028
- })
1029
-
1030
- # Minimal delay between boroughs
1031
- if borough != borough_list[-1]:
1032
- _smart_delay(1, 2)
1033
-
1034
- # Calculate performance metrics
1035
- borough_counts = {}
1036
- for listing in all_listings:
1037
- borough = listing.get('borough', 'unknown')
1038
- borough_counts[borough] = borough_counts.get(borough, 0) + 1
1039
-
1040
- log_tool_action("BrowserAgent", "search_complete", {
1041
- "total_listings": len(all_listings),
1042
- "borough_breakdown": borough_counts,
1043
- "search_query": query
1044
- })
1045
-
1046
- return json.dumps(timer.success({
1047
- "listings": all_listings,
1048
- "search_metadata": {
1049
- "query": query,
1050
- "boroughs_searched": borough_list,
1051
- "total_found": len(all_listings),
1052
- "borough_breakdown": borough_counts
1053
  }
1054
- }))
1055
-
1056
- except Exception as e:
1057
- error_msg = f"Browser search error: {str(e)}"
1058
-
1059
- log_tool_action("BrowserAgent", "search_failed", {
1060
- "error": str(e),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1061
  "query": query
1062
  })
1063
-
1064
- return json.dumps(timer.error(error_msg, data={
1065
- "query": query,
1066
- "attempted_boroughs": boroughs
1067
- }))
1068
- finally:
1069
- log_tool_action("BrowserAgent", "cleanup", {
1070
- "action": "closing_browser"
1071
- })
1072
- quit_browser()
 
1073
 
1074
  # --- 4. Convenience Functions and Testing ---
1075
 
1076
  def collect_voucher_listings_ultra_fast(
1077
- query: str = "Section 8",
1078
  boroughs: list = None
1079
  ) -> list:
1080
  """
1081
- Backward compatibility function that uses the new BrowserAgent.
1082
  Returns list of listings (unwrapped from observation format).
1083
  """
1084
  agent = BrowserAgent()
1085
  boroughs_str = ",".join(boroughs) if boroughs else ""
1086
-
1087
  result_json = agent.forward(query=query, boroughs=boroughs_str)
1088
  result = json.loads(result_json)
1089
-
1090
  if result.get("status") == "success":
1091
  return result["data"]["listings"]
1092
  else:
1093
- print(f"Search failed: {result.get('error', 'Unknown error')}")
1094
  return []
1095
 
1096
  def save_to_json_fast(data, filename="ultra_fast_voucher_listings.json"):
 
10
  import helium
11
  from selenium.common.exceptions import NoSuchElementException
12
  from selenium.webdriver.chrome.options import Options
13
+ # Browser automation dependencies commented out for mock demo
14
+ # from selenium import webdriver
15
+ # from webdriver_manager.chrome import ChromeDriverManager
16
  from functools import lru_cache
17
 
18
  # Import our new utilities and mixins
 
54
  }
55
  }
56
 
57
+ # # def start_browser(headless=True):
58
+ # """Initializes the Helium browser driver as a global variable."""
59
+ # global driver
60
+ # if driver is None:
61
+ # print("Initializing address-enhanced browser instance...")
62
+ #
63
+ # # Setup Chrome options for better performance
64
+ # chrome_options = Options()
65
+ # if headless:
66
+ # chrome_options.add_argument('--headless')
67
+ # chrome_options.add_argument('--no-sandbox')
68
+ # chrome_options.add_argument('--disable-dev-shm-usage')
69
+ # chrome_options.add_argument('--disable-gpu')
70
+ # chrome_options.add_argument('--disable-web-security')
71
+ # chrome_options.add_argument('--disable-features=VizDisplayCompositor')
72
+ #
73
+ # # Set up ChromeDriver using webdriver-manager
74
+ # driver_path = ChromeDriverManager().install()
75
+ # driver = webdriver.Chrome(service=webdriver.chrome.service.Service(driver_path), options=chrome_options)
76
+ #
77
+ # # Initialize Helium with the driver
78
+ # helium.set_driver(driver)
79
+ #
80
+ # # Apply anti-detection measures
81
+ # driver.execute_script("""
82
+ # Object.defineProperty(navigator, 'webdriver', {
83
+ # get: () => undefined
84
+ # });
85
+ # if (window.chrome) {
86
+ # window.chrome.runtime = undefined;
87
+ # }
88
+ # const getParameter = WebGLRenderingContext.getParameter;
89
+ # WebGLRenderingContext.prototype.getParameter = function(parameter) {
90
+ # if (parameter === 37445) return 'Intel Open Source Technology Center';
91
+ # if (parameter === 37446) return 'Mesa DRI Intel(R) Iris(R) Plus Graphics (ICL GT2)';
92
+ # return getParameter(parameter);
93
+ # };
94
+ # """)
95
+ #
96
+ # print("Browser initialized with enhanced address extraction capabilities.")
97
+ # return driver
98
 
99
+ # def quit_browser():
100
+ # """Safely quits the global browser instance."""
101
+ # global driver
102
+ # if driver is not None:
103
+ # print("Cleaning up browser resources...")
104
+ # try:
105
+ # helium.kill_browser()
106
+ # except:
107
+ # pass
108
+ # driver = None
109
+ # print("Browser closed.")
110
 
111
  def _smart_delay(base_delay=0.5, max_delay=1.5):
112
  """Intelligent delay with randomization."""
 
965
  Returns JSON-formatted string with listing data.
966
  """
967
  with self.timed_observation() as timer:
968
+ log_tool_action("BrowserAgent", "mock_search_started", {
969
  "query": query,
970
  "boroughs_requested": boroughs,
971
  "timestamp": current_timestamp()
972
  })
973
+
974
  try:
975
+ # Mock listings for demonstration
976
+ mock_listings = [
977
+ {
978
+ "address": "123 Main St, Brooklyn, NY",
979
+ "bedrooms": 2,
980
+ "rent": 1800,
981
+ "borough": "Brooklyn",
982
+ "violations": 0,
983
+ "risk_level": "✅ Safe",
984
+ "subway_distance": 0.3,
985
+ "school_distance": 0.5,
986
+ "amenities": ["Laundry", "Gym"],
987
+ "accepts_vouchers": True,
988
+ "description": "Spacious 2BR apartment in safe building, accepts Section 8 vouchers",
989
+ "contact": "landlord@example.com"
990
+ },
991
+ {
992
+ "address": "456 Oak Ave, Queens, NY",
993
+ "bedrooms": 3,
994
+ "rent": 2200,
995
+ "borough": "Queens",
996
+ "violations": 2,
997
+ "risk_level": "⚠️ Moderate",
998
+ "subway_distance": 0.8,
999
+ "school_distance": 0.3,
1000
+ "amenities": ["Parking", "Balcony"],
1001
+ "accepts_vouchers": True,
1002
+ "description": "3BR apartment with parking, moderate risk building",
1003
+ "contact": "queenslandlord@example.com"
1004
+ },
1005
+ {
1006
+ "address": "789 Pine St, Manhattan, NY",
1007
+ "bedrooms": 1,
1008
+ "rent": 2500,
1009
+ "borough": "Manhattan",
1010
+ "violations": 1,
1011
+ "risk_level": "✅ Safe",
1012
+ "subway_distance": 0.1,
1013
+ "school_distance": 0.7,
1014
+ "amenities": ["Doorman", "Rooftop"],
1015
+ "accepts_vouchers": False,
1016
+ "description": "Luxury 1BR in Manhattan, does not accept vouchers",
1017
+ "contact": "manhattanlandlord@example.com"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1018
  }
1019
+ ]
1020
+
1021
+ # Filter based on query and boroughs for realism
1022
+ filtered_listings = []
1023
+ query_lower = query.lower()
1024
+
1025
+ for listing in mock_listings:
1026
+ # Filter by bedrooms if specified
1027
+ if "studio" in query_lower and listing["bedrooms"] != 0:
1028
+ continue
1029
+ if "1 bedroom" in query_lower and listing["bedrooms"] != 1:
1030
+ continue
1031
+ if "2 bedroom" in query_lower and listing["bedrooms"] != 2:
1032
+ continue
1033
+ if "3 bedroom" in query_lower and listing["bedrooms"] != 3:
1034
+ continue
1035
+
1036
+ # Filter by borough if specified
1037
+ if boroughs:
1038
+ borough_list = [b.strip().lower() for b in boroughs.split(",")]
1039
+ if listing["borough"].lower() not in borough_list:
1040
+ continue
1041
+
1042
+ # Filter by voucher acceptance if mentioned
1043
+ if "voucher" in query_lower and not listing["accepts_vouchers"]:
1044
+ continue
1045
+
1046
+ filtered_listings.append(listing)
1047
+
1048
+ # If no specific filters, return first 2 listings
1049
+ if not filtered_listings:
1050
+ filtered_listings = mock_listings[:2]
1051
+
1052
+ log_tool_action("BrowserAgent", "mock_search_complete", {
1053
+ "listings_found": len(filtered_listings),
1054
  "query": query
1055
  })
1056
+
1057
+ return json.dumps(timer.success(
1058
+ f"Mock search complete: Found {len(filtered_listings)} voucher-friendly listings",
1059
+ data={"listings": filtered_listings}
1060
+ ))
1061
+
1062
+ except Exception as e:
1063
+ return json.dumps(timer.error(
1064
+ f"Mock search failed: {str(e)}",
1065
+ data={"error_type": type(e).__name__}
1066
+ ))
1067
 
1068
  # --- 4. Convenience Functions and Testing ---
1069
 
1070
  def collect_voucher_listings_ultra_fast(
1071
+ query: str = "Section 8",
1072
  boroughs: list = None
1073
  ) -> list:
1074
  """
1075
+ Backward compatibility function that uses the new BrowserAgent with mock data.
1076
  Returns list of listings (unwrapped from observation format).
1077
  """
1078
  agent = BrowserAgent()
1079
  boroughs_str = ",".join(boroughs) if boroughs else ""
1080
+
1081
  result_json = agent.forward(query=query, boroughs=boroughs_str)
1082
  result = json.loads(result_json)
1083
+
1084
  if result.get("status") == "success":
1085
  return result["data"]["listings"]
1086
  else:
1087
+ print(f"Mock search failed: {result.get('error', 'Unknown error')}")
1088
  return []
1089
 
1090
  def save_to_json_fast(data, filename="ultra_fast_voucher_listings.json"):
requirements.txt CHANGED
@@ -7,8 +7,9 @@ pandas
7
  feedparser==6.0.11
8
  beautifulsoup4==4.12.2
9
  lxml==5.1.0
10
- selenium
11
- helium
12
- webdriver-manager
 
13
  pillow
14
  geopy>=2.3.0
 
7
  feedparser==6.0.11
8
  beautifulsoup4==4.12.2
9
  lxml==5.1.0
10
+ # Browser automation dependencies (mocked for demo)
11
+ # selenium
12
+ # helium
13
+ # webdriver-manager
14
  pillow
15
  geopy>=2.3.0