Spaces:
Sleeping
Sleeping
Replace browser automation with comprehensive mock data system - Modified BrowserAgent.forward() to return realistic mock listings instead of scraping - Added intelligent filtering by bedrooms, borough, budget, and voucher acceptance - Commented out browser dependencies and functions since they're no longer needed - Maintains all agent orchestration functionality while providing realistic demo responses
Browse files- browser_agent.py +151 -157
- requirements.txt +4 -3
browser_agent.py
CHANGED
|
@@ -10,8 +10,9 @@ from smolagents import Tool
|
|
| 10 |
import helium
|
| 11 |
from selenium.common.exceptions import NoSuchElementException
|
| 12 |
from selenium.webdriver.chrome.options import Options
|
| 13 |
-
|
| 14 |
-
from
|
|
|
|
| 15 |
from functools import lru_cache
|
| 16 |
|
| 17 |
# Import our new utilities and mixins
|
|
@@ -53,59 +54,59 @@ NYC_BOROUGHS = {
|
|
| 53 |
}
|
| 54 |
}
|
| 55 |
|
| 56 |
-
def start_browser(headless=True):
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
|
| 98 |
-
def quit_browser():
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
|
| 110 |
def _smart_delay(base_delay=0.5, max_delay=1.5):
|
| 111 |
"""Intelligent delay with randomization."""
|
|
@@ -964,133 +965,126 @@ class BrowserAgent(TimedObservationMixin, Tool):
|
|
| 964 |
Returns JSON-formatted string with listing data.
|
| 965 |
"""
|
| 966 |
with self.timed_observation() as timer:
|
| 967 |
-
log_tool_action("BrowserAgent", "
|
| 968 |
"query": query,
|
| 969 |
"boroughs_requested": boroughs,
|
| 970 |
"timestamp": current_timestamp()
|
| 971 |
})
|
| 972 |
-
|
| 973 |
try:
|
| 974 |
-
#
|
| 975 |
-
|
| 976 |
-
|
| 977 |
-
|
| 978 |
-
|
| 979 |
-
|
| 980 |
-
|
| 981 |
-
|
| 982 |
-
|
| 983 |
-
|
| 984 |
-
|
| 985 |
-
|
| 986 |
-
"
|
| 987 |
-
|
| 988 |
-
|
| 989 |
-
|
| 990 |
-
|
| 991 |
-
|
| 992 |
-
|
| 993 |
-
|
| 994 |
-
|
| 995 |
-
|
| 996 |
-
|
| 997 |
-
|
| 998 |
-
|
| 999 |
-
|
| 1000 |
-
|
| 1001 |
-
|
| 1002 |
-
|
| 1003 |
-
|
| 1004 |
-
|
| 1005 |
-
|
| 1006 |
-
|
| 1007 |
-
|
| 1008 |
-
|
| 1009 |
-
|
| 1010 |
-
|
| 1011 |
-
|
| 1012 |
-
|
| 1013 |
-
"
|
| 1014 |
-
"
|
| 1015 |
-
|
| 1016 |
-
|
| 1017 |
-
borough_start = time.time()
|
| 1018 |
-
borough_listings = _search_borough_for_vouchers_fast(borough, query)
|
| 1019 |
-
borough_time = time.time() - borough_start
|
| 1020 |
-
|
| 1021 |
-
all_listings.extend(borough_listings)
|
| 1022 |
-
|
| 1023 |
-
log_tool_action("BrowserAgent", "borough_search_complete", {
|
| 1024 |
-
"borough": borough,
|
| 1025 |
-
"listings_found": len(borough_listings),
|
| 1026 |
-
"duration": borough_time,
|
| 1027 |
-
"progress": f"{i+1}/{len(borough_list)}"
|
| 1028 |
-
})
|
| 1029 |
-
|
| 1030 |
-
# Minimal delay between boroughs
|
| 1031 |
-
if borough != borough_list[-1]:
|
| 1032 |
-
_smart_delay(1, 2)
|
| 1033 |
-
|
| 1034 |
-
# Calculate performance metrics
|
| 1035 |
-
borough_counts = {}
|
| 1036 |
-
for listing in all_listings:
|
| 1037 |
-
borough = listing.get('borough', 'unknown')
|
| 1038 |
-
borough_counts[borough] = borough_counts.get(borough, 0) + 1
|
| 1039 |
-
|
| 1040 |
-
log_tool_action("BrowserAgent", "search_complete", {
|
| 1041 |
-
"total_listings": len(all_listings),
|
| 1042 |
-
"borough_breakdown": borough_counts,
|
| 1043 |
-
"search_query": query
|
| 1044 |
-
})
|
| 1045 |
-
|
| 1046 |
-
return json.dumps(timer.success({
|
| 1047 |
-
"listings": all_listings,
|
| 1048 |
-
"search_metadata": {
|
| 1049 |
-
"query": query,
|
| 1050 |
-
"boroughs_searched": borough_list,
|
| 1051 |
-
"total_found": len(all_listings),
|
| 1052 |
-
"borough_breakdown": borough_counts
|
| 1053 |
}
|
| 1054 |
-
|
| 1055 |
-
|
| 1056 |
-
|
| 1057 |
-
|
| 1058 |
-
|
| 1059 |
-
|
| 1060 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1061 |
"query": query
|
| 1062 |
})
|
| 1063 |
-
|
| 1064 |
-
return json.dumps(timer.
|
| 1065 |
-
"
|
| 1066 |
-
"
|
| 1067 |
-
|
| 1068 |
-
|
| 1069 |
-
|
| 1070 |
-
|
| 1071 |
-
|
| 1072 |
-
|
|
|
|
| 1073 |
|
| 1074 |
# --- 4. Convenience Functions and Testing ---
|
| 1075 |
|
| 1076 |
def collect_voucher_listings_ultra_fast(
|
| 1077 |
-
query: str = "Section 8",
|
| 1078 |
boroughs: list = None
|
| 1079 |
) -> list:
|
| 1080 |
"""
|
| 1081 |
-
Backward compatibility function that uses the new BrowserAgent.
|
| 1082 |
Returns list of listings (unwrapped from observation format).
|
| 1083 |
"""
|
| 1084 |
agent = BrowserAgent()
|
| 1085 |
boroughs_str = ",".join(boroughs) if boroughs else ""
|
| 1086 |
-
|
| 1087 |
result_json = agent.forward(query=query, boroughs=boroughs_str)
|
| 1088 |
result = json.loads(result_json)
|
| 1089 |
-
|
| 1090 |
if result.get("status") == "success":
|
| 1091 |
return result["data"]["listings"]
|
| 1092 |
else:
|
| 1093 |
-
print(f"
|
| 1094 |
return []
|
| 1095 |
|
| 1096 |
def save_to_json_fast(data, filename="ultra_fast_voucher_listings.json"):
|
|
|
|
| 10 |
import helium
|
| 11 |
from selenium.common.exceptions import NoSuchElementException
|
| 12 |
from selenium.webdriver.chrome.options import Options
|
| 13 |
+
# Browser automation dependencies commented out for mock demo
|
| 14 |
+
# from selenium import webdriver
|
| 15 |
+
# from webdriver_manager.chrome import ChromeDriverManager
|
| 16 |
from functools import lru_cache
|
| 17 |
|
| 18 |
# Import our new utilities and mixins
|
|
|
|
| 54 |
}
|
| 55 |
}
|
| 56 |
|
| 57 |
+
# # def start_browser(headless=True):
|
| 58 |
+
# """Initializes the Helium browser driver as a global variable."""
|
| 59 |
+
# global driver
|
| 60 |
+
# if driver is None:
|
| 61 |
+
# print("Initializing address-enhanced browser instance...")
|
| 62 |
+
#
|
| 63 |
+
# # Setup Chrome options for better performance
|
| 64 |
+
# chrome_options = Options()
|
| 65 |
+
# if headless:
|
| 66 |
+
# chrome_options.add_argument('--headless')
|
| 67 |
+
# chrome_options.add_argument('--no-sandbox')
|
| 68 |
+
# chrome_options.add_argument('--disable-dev-shm-usage')
|
| 69 |
+
# chrome_options.add_argument('--disable-gpu')
|
| 70 |
+
# chrome_options.add_argument('--disable-web-security')
|
| 71 |
+
# chrome_options.add_argument('--disable-features=VizDisplayCompositor')
|
| 72 |
+
#
|
| 73 |
+
# # Set up ChromeDriver using webdriver-manager
|
| 74 |
+
# driver_path = ChromeDriverManager().install()
|
| 75 |
+
# driver = webdriver.Chrome(service=webdriver.chrome.service.Service(driver_path), options=chrome_options)
|
| 76 |
+
#
|
| 77 |
+
# # Initialize Helium with the driver
|
| 78 |
+
# helium.set_driver(driver)
|
| 79 |
+
#
|
| 80 |
+
# # Apply anti-detection measures
|
| 81 |
+
# driver.execute_script("""
|
| 82 |
+
# Object.defineProperty(navigator, 'webdriver', {
|
| 83 |
+
# get: () => undefined
|
| 84 |
+
# });
|
| 85 |
+
# if (window.chrome) {
|
| 86 |
+
# window.chrome.runtime = undefined;
|
| 87 |
+
# }
|
| 88 |
+
# const getParameter = WebGLRenderingContext.getParameter;
|
| 89 |
+
# WebGLRenderingContext.prototype.getParameter = function(parameter) {
|
| 90 |
+
# if (parameter === 37445) return 'Intel Open Source Technology Center';
|
| 91 |
+
# if (parameter === 37446) return 'Mesa DRI Intel(R) Iris(R) Plus Graphics (ICL GT2)';
|
| 92 |
+
# return getParameter(parameter);
|
| 93 |
+
# };
|
| 94 |
+
# """)
|
| 95 |
+
#
|
| 96 |
+
# print("Browser initialized with enhanced address extraction capabilities.")
|
| 97 |
+
# return driver
|
| 98 |
|
| 99 |
+
# def quit_browser():
|
| 100 |
+
# """Safely quits the global browser instance."""
|
| 101 |
+
# global driver
|
| 102 |
+
# if driver is not None:
|
| 103 |
+
# print("Cleaning up browser resources...")
|
| 104 |
+
# try:
|
| 105 |
+
# helium.kill_browser()
|
| 106 |
+
# except:
|
| 107 |
+
# pass
|
| 108 |
+
# driver = None
|
| 109 |
+
# print("Browser closed.")
|
| 110 |
|
| 111 |
def _smart_delay(base_delay=0.5, max_delay=1.5):
|
| 112 |
"""Intelligent delay with randomization."""
|
|
|
|
| 965 |
Returns JSON-formatted string with listing data.
|
| 966 |
"""
|
| 967 |
with self.timed_observation() as timer:
|
| 968 |
+
log_tool_action("BrowserAgent", "mock_search_started", {
|
| 969 |
"query": query,
|
| 970 |
"boroughs_requested": boroughs,
|
| 971 |
"timestamp": current_timestamp()
|
| 972 |
})
|
| 973 |
+
|
| 974 |
try:
|
| 975 |
+
# Mock listings for demonstration
|
| 976 |
+
mock_listings = [
|
| 977 |
+
{
|
| 978 |
+
"address": "123 Main St, Brooklyn, NY",
|
| 979 |
+
"bedrooms": 2,
|
| 980 |
+
"rent": 1800,
|
| 981 |
+
"borough": "Brooklyn",
|
| 982 |
+
"violations": 0,
|
| 983 |
+
"risk_level": "✅ Safe",
|
| 984 |
+
"subway_distance": 0.3,
|
| 985 |
+
"school_distance": 0.5,
|
| 986 |
+
"amenities": ["Laundry", "Gym"],
|
| 987 |
+
"accepts_vouchers": True,
|
| 988 |
+
"description": "Spacious 2BR apartment in safe building, accepts Section 8 vouchers",
|
| 989 |
+
"contact": "landlord@example.com"
|
| 990 |
+
},
|
| 991 |
+
{
|
| 992 |
+
"address": "456 Oak Ave, Queens, NY",
|
| 993 |
+
"bedrooms": 3,
|
| 994 |
+
"rent": 2200,
|
| 995 |
+
"borough": "Queens",
|
| 996 |
+
"violations": 2,
|
| 997 |
+
"risk_level": "⚠️ Moderate",
|
| 998 |
+
"subway_distance": 0.8,
|
| 999 |
+
"school_distance": 0.3,
|
| 1000 |
+
"amenities": ["Parking", "Balcony"],
|
| 1001 |
+
"accepts_vouchers": True,
|
| 1002 |
+
"description": "3BR apartment with parking, moderate risk building",
|
| 1003 |
+
"contact": "queenslandlord@example.com"
|
| 1004 |
+
},
|
| 1005 |
+
{
|
| 1006 |
+
"address": "789 Pine St, Manhattan, NY",
|
| 1007 |
+
"bedrooms": 1,
|
| 1008 |
+
"rent": 2500,
|
| 1009 |
+
"borough": "Manhattan",
|
| 1010 |
+
"violations": 1,
|
| 1011 |
+
"risk_level": "✅ Safe",
|
| 1012 |
+
"subway_distance": 0.1,
|
| 1013 |
+
"school_distance": 0.7,
|
| 1014 |
+
"amenities": ["Doorman", "Rooftop"],
|
| 1015 |
+
"accepts_vouchers": False,
|
| 1016 |
+
"description": "Luxury 1BR in Manhattan, does not accept vouchers",
|
| 1017 |
+
"contact": "manhattanlandlord@example.com"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1018 |
}
|
| 1019 |
+
]
|
| 1020 |
+
|
| 1021 |
+
# Filter based on query and boroughs for realism
|
| 1022 |
+
filtered_listings = []
|
| 1023 |
+
query_lower = query.lower()
|
| 1024 |
+
|
| 1025 |
+
for listing in mock_listings:
|
| 1026 |
+
# Filter by bedrooms if specified
|
| 1027 |
+
if "studio" in query_lower and listing["bedrooms"] != 0:
|
| 1028 |
+
continue
|
| 1029 |
+
if "1 bedroom" in query_lower and listing["bedrooms"] != 1:
|
| 1030 |
+
continue
|
| 1031 |
+
if "2 bedroom" in query_lower and listing["bedrooms"] != 2:
|
| 1032 |
+
continue
|
| 1033 |
+
if "3 bedroom" in query_lower and listing["bedrooms"] != 3:
|
| 1034 |
+
continue
|
| 1035 |
+
|
| 1036 |
+
# Filter by borough if specified
|
| 1037 |
+
if boroughs:
|
| 1038 |
+
borough_list = [b.strip().lower() for b in boroughs.split(",")]
|
| 1039 |
+
if listing["borough"].lower() not in borough_list:
|
| 1040 |
+
continue
|
| 1041 |
+
|
| 1042 |
+
# Filter by voucher acceptance if mentioned
|
| 1043 |
+
if "voucher" in query_lower and not listing["accepts_vouchers"]:
|
| 1044 |
+
continue
|
| 1045 |
+
|
| 1046 |
+
filtered_listings.append(listing)
|
| 1047 |
+
|
| 1048 |
+
# If no specific filters, return first 2 listings
|
| 1049 |
+
if not filtered_listings:
|
| 1050 |
+
filtered_listings = mock_listings[:2]
|
| 1051 |
+
|
| 1052 |
+
log_tool_action("BrowserAgent", "mock_search_complete", {
|
| 1053 |
+
"listings_found": len(filtered_listings),
|
| 1054 |
"query": query
|
| 1055 |
})
|
| 1056 |
+
|
| 1057 |
+
return json.dumps(timer.success(
|
| 1058 |
+
f"Mock search complete: Found {len(filtered_listings)} voucher-friendly listings",
|
| 1059 |
+
data={"listings": filtered_listings}
|
| 1060 |
+
))
|
| 1061 |
+
|
| 1062 |
+
except Exception as e:
|
| 1063 |
+
return json.dumps(timer.error(
|
| 1064 |
+
f"Mock search failed: {str(e)}",
|
| 1065 |
+
data={"error_type": type(e).__name__}
|
| 1066 |
+
))
|
| 1067 |
|
| 1068 |
# --- 4. Convenience Functions and Testing ---
|
| 1069 |
|
| 1070 |
def collect_voucher_listings_ultra_fast(
|
| 1071 |
+
query: str = "Section 8",
|
| 1072 |
boroughs: list = None
|
| 1073 |
) -> list:
|
| 1074 |
"""
|
| 1075 |
+
Backward compatibility function that uses the new BrowserAgent with mock data.
|
| 1076 |
Returns list of listings (unwrapped from observation format).
|
| 1077 |
"""
|
| 1078 |
agent = BrowserAgent()
|
| 1079 |
boroughs_str = ",".join(boroughs) if boroughs else ""
|
| 1080 |
+
|
| 1081 |
result_json = agent.forward(query=query, boroughs=boroughs_str)
|
| 1082 |
result = json.loads(result_json)
|
| 1083 |
+
|
| 1084 |
if result.get("status") == "success":
|
| 1085 |
return result["data"]["listings"]
|
| 1086 |
else:
|
| 1087 |
+
print(f"Mock search failed: {result.get('error', 'Unknown error')}")
|
| 1088 |
return []
|
| 1089 |
|
| 1090 |
def save_to_json_fast(data, filename="ultra_fast_voucher_listings.json"):
|
requirements.txt
CHANGED
|
@@ -7,8 +7,9 @@ pandas
|
|
| 7 |
feedparser==6.0.11
|
| 8 |
beautifulsoup4==4.12.2
|
| 9 |
lxml==5.1.0
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
|
|
|
| 13 |
pillow
|
| 14 |
geopy>=2.3.0
|
|
|
|
| 7 |
feedparser==6.0.11
|
| 8 |
beautifulsoup4==4.12.2
|
| 9 |
lxml==5.1.0
|
| 10 |
+
# Browser automation dependencies (mocked for demo)
|
| 11 |
+
# selenium
|
| 12 |
+
# helium
|
| 13 |
+
# webdriver-manager
|
| 14 |
pillow
|
| 15 |
geopy>=2.3.0
|