Spaces:
Sleeping
Sleeping
phhttps commited on
Commit ·
6078432
1
Parent(s): c3f3c51
feat: prepare for hugging face deployment (docker, metadata, relative api)
Browse files- Dockerfile +49 -0
- README.md +20 -226
- README_LOCAL.md +231 -0
- api.py +19 -3
- booking_scraper.py +149 -207
- conductor/tracks/live_data_integrity/plan.md +23 -0
- frontend_dashboard.html +5 -2
- holland_agent.py +7 -69
- patchright_airbnb_scraper.py +53 -78
- tests/verify_live_data.py +62 -0
Dockerfile
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Use a specialized Playwright/Patchright compatible base image
|
| 2 |
+
FROM python:3.11-slim
|
| 3 |
+
|
| 4 |
+
# Set environment variables
|
| 5 |
+
ENV PYTHONDONTWRITEBYTECODE=1
|
| 6 |
+
ENV PYTHONUNBUFFERED=1
|
| 7 |
+
ENV HOME=/home/user
|
| 8 |
+
ENV PATH="/home/user/.local/bin:${PATH}"
|
| 9 |
+
|
| 10 |
+
# Install system dependencies for Chromium
|
| 11 |
+
RUN apt-get update && apt-get install -y
|
| 12 |
+
wget
|
| 13 |
+
gnupg
|
| 14 |
+
libnss3
|
| 15 |
+
libnspr4
|
| 16 |
+
libatk1.0-0
|
| 17 |
+
libatk-bridge2.0-0
|
| 18 |
+
libcups2
|
| 19 |
+
libdrm2
|
| 20 |
+
libxkbcommon0
|
| 21 |
+
libxcomposite1
|
| 22 |
+
libxdamage1
|
| 23 |
+
libxext6
|
| 24 |
+
libxfixes3
|
| 25 |
+
libxrandr2
|
| 26 |
+
libgbm1
|
| 27 |
+
libasound2
|
| 28 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 29 |
+
|
| 30 |
+
# Create and switch to a non-root user
|
| 31 |
+
RUN useradd -m -u 1000 user
|
| 32 |
+
USER user
|
| 33 |
+
WORKDIR $HOME/app
|
| 34 |
+
|
| 35 |
+
# Copy requirements and install
|
| 36 |
+
COPY --chown=user requirements.txt .
|
| 37 |
+
RUN pip install --no-cache-dir --upgrade -r requirements.txt
|
| 38 |
+
|
| 39 |
+
# Install patchright browsers
|
| 40 |
+
RUN python3 -m patchright install chromium
|
| 41 |
+
|
| 42 |
+
# Copy the rest of the application
|
| 43 |
+
COPY --chown=user . .
|
| 44 |
+
|
| 45 |
+
# Expose Hugging Face standard port
|
| 46 |
+
EXPOSE 7860
|
| 47 |
+
|
| 48 |
+
# Command to run the application
|
| 49 |
+
CMD ["uvicorn", "api.py", "--host", "0.0.0.0", "--port", "7860"]
|
README.md
CHANGED
|
@@ -1,231 +1,25 @@
|
|
| 1 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
-
|
| 4 |
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
- **Multi-source search**: Booking.com, Airbnb, Center Parcs
|
| 8 |
-
- **Dog-friendly filter**: Only shows pet-friendly accommodations
|
| 9 |
-
- **Weather integration**: Ranks deals based on weather forecasts
|
| 10 |
-
- **Smart ranking**: Multi-factor scoring algorithm
|
| 11 |
-
- **Parallel search**: Searches multiple cities simultaneously
|
| 12 |
-
- **Browser automation**: Uses agent-browser CLI for web scraping
|
| 13 |
-
|
| 14 |
-
## Requirements
|
| 15 |
-
|
| 16 |
-
- Python 3.8+
|
| 17 |
-
- Node.js (for agent-browser)
|
| 18 |
-
- agent-browser CLI installed
|
| 19 |
-
|
| 20 |
-
## Installation
|
| 21 |
-
|
| 22 |
-
```bash
|
| 23 |
-
# Install Python dependencies
|
| 24 |
-
pip install -r requirements.txt
|
| 25 |
-
|
| 26 |
-
# Install agent-browser (if not already installed)
|
| 27 |
-
npm install -g @vercel/agent-browser
|
| 28 |
-
```
|
| 29 |
-
|
| 30 |
-
## Configuration
|
| 31 |
-
|
| 32 |
-
Create a `.env` file with your API keys:
|
| 33 |
-
|
| 34 |
-
```bash
|
| 35 |
-
FIRECRAWL_API_KEY=your_firecrawl_key_here
|
| 36 |
-
OPENWEATHER_API_KEY=your_openweather_key_here
|
| 37 |
-
AGENT_BROWSER_SESSION=holland-deals
|
| 38 |
-
AGENT_BROWSER_PATH=/path/to/agent-browser
|
| 39 |
-
```
|
| 40 |
-
|
| 41 |
-
### Getting API Keys
|
| 42 |
-
|
| 43 |
-
- **OpenWeather API**: Free at https://openweathermap.org/api (1,000 calls/day)
|
| 44 |
-
- **Firecrawl API**: Optional, for enhanced scraping at https://firecrawl.dev
|
| 45 |
-
|
| 46 |
-
## Usage
|
| 47 |
-
|
| 48 |
-
### Basic Search
|
| 49 |
-
|
| 50 |
-
```bash
|
| 51 |
-
python main.py \
|
| 52 |
-
--cities Amsterdam \
|
| 53 |
-
--checkin 2026-02-15 \
|
| 54 |
-
--checkout 2026-02-22
|
| 55 |
-
```
|
| 56 |
-
|
| 57 |
-
### Multi-City Search
|
| 58 |
-
|
| 59 |
-
```bash
|
| 60 |
-
python main.py \
|
| 61 |
-
--cities "Amsterdam,Rotterdam,Zandvoort" \
|
| 62 |
-
--checkin 2026-02-15 \
|
| 63 |
-
--checkout 2026-02-22 \
|
| 64 |
-
--budget-max 200
|
| 65 |
-
```
|
| 66 |
-
|
| 67 |
-
### Custom Group Size
|
| 68 |
-
|
| 69 |
-
```bash
|
| 70 |
-
python main.py \
|
| 71 |
-
--cities Amsterdam \
|
| 72 |
-
--checkin 2026-03-01 \
|
| 73 |
-
--checkout 2026-03-08 \
|
| 74 |
-
--adults 2 \
|
| 75 |
-
--pets 1
|
| 76 |
-
```
|
| 77 |
-
|
| 78 |
-
### Human-Readable Output
|
| 79 |
-
|
| 80 |
-
```bash
|
| 81 |
-
python main.py \
|
| 82 |
-
--cities Amsterdam \
|
| 83 |
-
--checkin 2026-02-15 \
|
| 84 |
-
--checkout 2026-02-22 \
|
| 85 |
-
--output summary \
|
| 86 |
-
--top 5
|
| 87 |
-
```
|
| 88 |
-
|
| 89 |
-
## Command-Line Options
|
| 90 |
-
|
| 91 |
-
| Option | Description | Default |
|
| 92 |
-
|--------|-------------|---------|
|
| 93 |
-
| `--cities` | Comma-separated city list (required) | - |
|
| 94 |
-
| `--checkin` | Check-in date YYYY-MM-DD (required) | - |
|
| 95 |
-
| `--checkout` | Check-out date YYYY-MM-DD (required) | - |
|
| 96 |
-
| `--budget-min` | Minimum budget per night (EUR) | 40 |
|
| 97 |
-
| `--budget-max` | Maximum budget per night (EUR) | 250 |
|
| 98 |
-
| `--adults` | Number of adults | 4 |
|
| 99 |
-
| `--pets` | Number of pets | 1 |
|
| 100 |
-
| `--output` | Output format: json or summary | json |
|
| 101 |
-
| `--top` | Number of top deals to show | 10 |
|
| 102 |
-
|
| 103 |
-
## Scoring Algorithm
|
| 104 |
-
|
| 105 |
-
Deals are ranked using a multi-factor scoring system:
|
| 106 |
|
| 107 |
-
|
| 108 |
-
- **
|
| 109 |
-
- **
|
| 110 |
-
- **
|
| 111 |
-
- **
|
| 112 |
-
|
| 113 |
-
## Architecture
|
| 114 |
-
|
| 115 |
-
```
|
| 116 |
-
holland_agent.py # Main orchestrator
|
| 117 |
-
├── booking_scraper.py # Booking.com integration
|
| 118 |
-
├── airbnb_scraper.py # Airbnb integration
|
| 119 |
-
├── weather_integration.py # OpenWeather API
|
| 120 |
-
└── deal_ranker.py # Scoring system
|
| 121 |
-
```
|
| 122 |
-
|
| 123 |
-
## Data Sources
|
| 124 |
-
|
| 125 |
-
1. **Booking.com**: Live search with pet-friendly filter
|
| 126 |
-
2. **Airbnb**: Pet-allowed homes search
|
| 127 |
-
3. **Center Parcs**: Static data for popular Dutch vacation parks
|
| 128 |
-
4. **OpenWeather**: 5-day weather forecasts
|
| 129 |
-
|
| 130 |
-
## Example Output
|
| 131 |
-
|
| 132 |
-
```json
|
| 133 |
-
{
|
| 134 |
-
"timestamp": "2026-01-25T18:00:00",
|
| 135 |
-
"search_params": {
|
| 136 |
-
"cities": ["Amsterdam", "Rotterdam"],
|
| 137 |
-
"checkin": "2026-02-15",
|
| 138 |
-
"checkout": "2026-02-22",
|
| 139 |
-
"nights": 7,
|
| 140 |
-
"group_size": 4,
|
| 141 |
-
"pets": 1
|
| 142 |
-
},
|
| 143 |
-
"total_deals_found": 15,
|
| 144 |
-
"top_10_deals": [
|
| 145 |
-
{
|
| 146 |
-
"rank_score": 89.2,
|
| 147 |
-
"name": "Center Parcs Zandvoort Beach",
|
| 148 |
-
"location": "Zandvoort aan Zee",
|
| 149 |
-
"price_per_night": 58,
|
| 150 |
-
"total_cost_for_trip": 406,
|
| 151 |
-
"rating": 4.5,
|
| 152 |
-
"reviews": 512,
|
| 153 |
-
"pet_friendly": true,
|
| 154 |
-
"recommendation": "🔥 EXCELLENT | €406 total"
|
| 155 |
-
}
|
| 156 |
-
],
|
| 157 |
-
"summary": {
|
| 158 |
-
"best_overall": "Center Parcs Zandvoort Beach",
|
| 159 |
-
"dog_friendly_options": 15,
|
| 160 |
-
"total_options_found": 15
|
| 161 |
-
}
|
| 162 |
-
}
|
| 163 |
-
```
|
| 164 |
-
|
| 165 |
-
## Troubleshooting
|
| 166 |
-
|
| 167 |
-
### Agent-browser not found
|
| 168 |
-
|
| 169 |
-
Make sure agent-browser is installed and the path in `.env` is correct:
|
| 170 |
-
|
| 171 |
-
```bash
|
| 172 |
-
which agent-browser
|
| 173 |
-
# Update AGENT_BROWSER_PATH in .env with the output
|
| 174 |
-
```
|
| 175 |
-
|
| 176 |
-
### Weather API errors
|
| 177 |
-
|
| 178 |
-
Check your OpenWeather API key is valid and you haven't exceeded rate limits (60 calls/min on free tier).
|
| 179 |
-
|
| 180 |
-
### No results found
|
| 181 |
-
|
| 182 |
-
Try:
|
| 183 |
-
- Expanding date range
|
| 184 |
-
- Increasing budget-max
|
| 185 |
-
- Trying different cities
|
| 186 |
-
- Checking if dates are in the future
|
| 187 |
-
|
| 188 |
-
## Development
|
| 189 |
-
|
| 190 |
-
### Running Tests
|
| 191 |
-
|
| 192 |
-
```bash
|
| 193 |
-
# Test with example search
|
| 194 |
-
python holland_agent.py
|
| 195 |
-
|
| 196 |
-
# Test CLI
|
| 197 |
-
python main.py --cities Amsterdam --checkin 2026-02-15 --checkout 2026-02-22 --output summary
|
| 198 |
-
```
|
| 199 |
-
|
| 200 |
-
### Project Structure
|
| 201 |
-
|
| 202 |
-
```
|
| 203 |
-
/home/kek/Desktop/AirBnB/
|
| 204 |
-
├── .claude/ # Claude Code project context
|
| 205 |
-
├── .env # API keys (not in git)
|
| 206 |
-
├── .gitignore # Git ignore rules
|
| 207 |
-
├── requirements.txt # Python dependencies
|
| 208 |
-
├── main.py # CLI entry point
|
| 209 |
-
├── holland_agent.py # Main orchestrator
|
| 210 |
-
├── booking_scraper.py # Booking.com scraper
|
| 211 |
-
├── airbnb_scraper.py # Airbnb scraper
|
| 212 |
-
├── deal_ranker.py # Scoring algorithm
|
| 213 |
-
├── weather_integration.py # Weather API
|
| 214 |
-
└── README.md # This file
|
| 215 |
-
```
|
| 216 |
-
|
| 217 |
-
## Tips for Best Deals
|
| 218 |
-
|
| 219 |
-
1. **Weekdays are cheaper**: Tuesday-Thursday can be 30% cheaper than weekends
|
| 220 |
-
2. **Avoid school holidays**: Book just before or after holiday periods
|
| 221 |
-
3. **Last-minute deals**: 7 days before travel often has best prices
|
| 222 |
-
4. **Center Parcs**: Dogs stay free at most locations
|
| 223 |
-
5. **Weather matters**: Spring/summer have better weather bonus
|
| 224 |
-
|
| 225 |
-
## License
|
| 226 |
-
|
| 227 |
-
MIT
|
| 228 |
|
| 229 |
-
##
|
|
|
|
| 230 |
|
| 231 |
-
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: Lars Holiday Deal Finder
|
| 3 |
+
emoji: 🐾
|
| 4 |
+
colorFrom: green
|
| 5 |
+
colorTo: blue
|
| 6 |
+
sdk: docker
|
| 7 |
+
app_port: 7860
|
| 8 |
+
pinned: false
|
| 9 |
+
---
|
| 10 |
|
| 11 |
+
# Lars Holiday Deal Finder 🇳🇱
|
| 12 |
|
| 13 |
+
Ein AI-gesteuerter Urlaubs-Planer für Holland, optimiert für Familien mit Hunden.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
|
| 15 |
+
## Features
|
| 16 |
+
- **Live-Scraping**: Echtzeit-Daten von Booking.com und Airbnb via Patchright.
|
| 17 |
+
- **Hundefreundlich**: Automatische Filterung auf haustierfreundliche Unterkünfte.
|
| 18 |
+
- **Smart Ranking**: Bewertung nach Preis, Wetter und Nutzer-Reviews.
|
| 19 |
+
- **Modernes Dashboard**: Übersichtliche Web-UI für die perfekte Urlaubsplanung.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
|
| 21 |
+
## Nutzung
|
| 22 |
+
Einfach Reiseziele (kommagetrennt) und Daten eingeben und auf "Angebote finden" klicken. Die KI durchsucht das Web und präsentiert die besten Ergebnisse direkt mit Link zum Angebot.
|
| 23 |
|
| 24 |
+
---
|
| 25 |
+
*Gebaut mit ❤️ für 4 Personen und 1 Hund.*
|
README_LOCAL.md
ADDED
|
@@ -0,0 +1,231 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Holland Vacation Deal Finder
|
| 2 |
+
|
| 3 |
+
AI-powered vacation deal finder for Netherlands. Finds budget-friendly, dog-friendly accommodations for families.
|
| 4 |
+
|
| 5 |
+
## Features
|
| 6 |
+
|
| 7 |
+
- **Multi-source search**: Booking.com, Airbnb, Center Parcs
|
| 8 |
+
- **Dog-friendly filter**: Only shows pet-friendly accommodations
|
| 9 |
+
- **Weather integration**: Ranks deals based on weather forecasts
|
| 10 |
+
- **Smart ranking**: Multi-factor scoring algorithm
|
| 11 |
+
- **Parallel search**: Searches multiple cities simultaneously
|
| 12 |
+
- **Browser automation**: Uses agent-browser CLI for web scraping
|
| 13 |
+
|
| 14 |
+
## Requirements
|
| 15 |
+
|
| 16 |
+
- Python 3.8+
|
| 17 |
+
- Node.js (for agent-browser)
|
| 18 |
+
- agent-browser CLI installed
|
| 19 |
+
|
| 20 |
+
## Installation
|
| 21 |
+
|
| 22 |
+
```bash
|
| 23 |
+
# Install Python dependencies
|
| 24 |
+
pip install -r requirements.txt
|
| 25 |
+
|
| 26 |
+
# Install agent-browser (if not already installed)
|
| 27 |
+
npm install -g @vercel/agent-browser
|
| 28 |
+
```
|
| 29 |
+
|
| 30 |
+
## Configuration
|
| 31 |
+
|
| 32 |
+
Create a `.env` file with your API keys:
|
| 33 |
+
|
| 34 |
+
```bash
|
| 35 |
+
FIRECRAWL_API_KEY=your_firecrawl_key_here
|
| 36 |
+
OPENWEATHER_API_KEY=your_openweather_key_here
|
| 37 |
+
AGENT_BROWSER_SESSION=holland-deals
|
| 38 |
+
AGENT_BROWSER_PATH=/path/to/agent-browser
|
| 39 |
+
```
|
| 40 |
+
|
| 41 |
+
### Getting API Keys
|
| 42 |
+
|
| 43 |
+
- **OpenWeather API**: Free at https://openweathermap.org/api (1,000 calls/day)
|
| 44 |
+
- **Firecrawl API**: Optional, for enhanced scraping at https://firecrawl.dev
|
| 45 |
+
|
| 46 |
+
## Usage
|
| 47 |
+
|
| 48 |
+
### Basic Search
|
| 49 |
+
|
| 50 |
+
```bash
|
| 51 |
+
python main.py \
|
| 52 |
+
--cities Amsterdam \
|
| 53 |
+
--checkin 2026-02-15 \
|
| 54 |
+
--checkout 2026-02-22
|
| 55 |
+
```
|
| 56 |
+
|
| 57 |
+
### Multi-City Search
|
| 58 |
+
|
| 59 |
+
```bash
|
| 60 |
+
python main.py \
|
| 61 |
+
--cities "Amsterdam,Rotterdam,Zandvoort" \
|
| 62 |
+
--checkin 2026-02-15 \
|
| 63 |
+
--checkout 2026-02-22 \
|
| 64 |
+
--budget-max 200
|
| 65 |
+
```
|
| 66 |
+
|
| 67 |
+
### Custom Group Size
|
| 68 |
+
|
| 69 |
+
```bash
|
| 70 |
+
python main.py \
|
| 71 |
+
--cities Amsterdam \
|
| 72 |
+
--checkin 2026-03-01 \
|
| 73 |
+
--checkout 2026-03-08 \
|
| 74 |
+
--adults 2 \
|
| 75 |
+
--pets 1
|
| 76 |
+
```
|
| 77 |
+
|
| 78 |
+
### Human-Readable Output
|
| 79 |
+
|
| 80 |
+
```bash
|
| 81 |
+
python main.py \
|
| 82 |
+
--cities Amsterdam \
|
| 83 |
+
--checkin 2026-02-15 \
|
| 84 |
+
--checkout 2026-02-22 \
|
| 85 |
+
--output summary \
|
| 86 |
+
--top 5
|
| 87 |
+
```
|
| 88 |
+
|
| 89 |
+
## Command-Line Options
|
| 90 |
+
|
| 91 |
+
| Option | Description | Default |
|
| 92 |
+
|--------|-------------|---------|
|
| 93 |
+
| `--cities` | Comma-separated city list (required) | - |
|
| 94 |
+
| `--checkin` | Check-in date YYYY-MM-DD (required) | - |
|
| 95 |
+
| `--checkout` | Check-out date YYYY-MM-DD (required) | - |
|
| 96 |
+
| `--budget-min` | Minimum budget per night (EUR) | 40 |
|
| 97 |
+
| `--budget-max` | Maximum budget per night (EUR) | 250 |
|
| 98 |
+
| `--adults` | Number of adults | 4 |
|
| 99 |
+
| `--pets` | Number of pets | 1 |
|
| 100 |
+
| `--output` | Output format: json or summary | json |
|
| 101 |
+
| `--top` | Number of top deals to show | 10 |
|
| 102 |
+
|
| 103 |
+
## Scoring Algorithm
|
| 104 |
+
|
| 105 |
+
Deals are ranked using a multi-factor scoring system:
|
| 106 |
+
|
| 107 |
+
- **Price Score** (0-40 points): Lower price = higher score
|
| 108 |
+
- **Rating Score** (0-30 points): Based on property rating
|
| 109 |
+
- **Review Count** (0-20 points): More reviews = more trustworthy
|
| 110 |
+
- **Dog-Friendly Multiplier**: 1.4x bonus for pet-friendly properties
|
| 111 |
+
- **Weather Bonus**: 1.2x bonus if average temperature > 15°C
|
| 112 |
+
|
| 113 |
+
## Architecture
|
| 114 |
+
|
| 115 |
+
```
|
| 116 |
+
holland_agent.py # Main orchestrator
|
| 117 |
+
├── booking_scraper.py # Booking.com integration
|
| 118 |
+
├── airbnb_scraper.py # Airbnb integration
|
| 119 |
+
├── weather_integration.py # OpenWeather API
|
| 120 |
+
└── deal_ranker.py # Scoring system
|
| 121 |
+
```
|
| 122 |
+
|
| 123 |
+
## Data Sources
|
| 124 |
+
|
| 125 |
+
1. **Booking.com**: Live search with pet-friendly filter
|
| 126 |
+
2. **Airbnb**: Pet-allowed homes search
|
| 127 |
+
3. **Center Parcs**: Static data for popular Dutch vacation parks
|
| 128 |
+
4. **OpenWeather**: 5-day weather forecasts
|
| 129 |
+
|
| 130 |
+
## Example Output
|
| 131 |
+
|
| 132 |
+
```json
|
| 133 |
+
{
|
| 134 |
+
"timestamp": "2026-01-25T18:00:00",
|
| 135 |
+
"search_params": {
|
| 136 |
+
"cities": ["Amsterdam", "Rotterdam"],
|
| 137 |
+
"checkin": "2026-02-15",
|
| 138 |
+
"checkout": "2026-02-22",
|
| 139 |
+
"nights": 7,
|
| 140 |
+
"group_size": 4,
|
| 141 |
+
"pets": 1
|
| 142 |
+
},
|
| 143 |
+
"total_deals_found": 15,
|
| 144 |
+
"top_10_deals": [
|
| 145 |
+
{
|
| 146 |
+
"rank_score": 89.2,
|
| 147 |
+
"name": "Center Parcs Zandvoort Beach",
|
| 148 |
+
"location": "Zandvoort aan Zee",
|
| 149 |
+
"price_per_night": 58,
|
| 150 |
+
"total_cost_for_trip": 406,
|
| 151 |
+
"rating": 4.5,
|
| 152 |
+
"reviews": 512,
|
| 153 |
+
"pet_friendly": true,
|
| 154 |
+
"recommendation": "🔥 EXCELLENT | €406 total"
|
| 155 |
+
}
|
| 156 |
+
],
|
| 157 |
+
"summary": {
|
| 158 |
+
"best_overall": "Center Parcs Zandvoort Beach",
|
| 159 |
+
"dog_friendly_options": 15,
|
| 160 |
+
"total_options_found": 15
|
| 161 |
+
}
|
| 162 |
+
}
|
| 163 |
+
```
|
| 164 |
+
|
| 165 |
+
## Troubleshooting
|
| 166 |
+
|
| 167 |
+
### Agent-browser not found
|
| 168 |
+
|
| 169 |
+
Make sure agent-browser is installed and the path in `.env` is correct:
|
| 170 |
+
|
| 171 |
+
```bash
|
| 172 |
+
which agent-browser
|
| 173 |
+
# Update AGENT_BROWSER_PATH in .env with the output
|
| 174 |
+
```
|
| 175 |
+
|
| 176 |
+
### Weather API errors
|
| 177 |
+
|
| 178 |
+
Check your OpenWeather API key is valid and you haven't exceeded rate limits (60 calls/min on free tier).
|
| 179 |
+
|
| 180 |
+
### No results found
|
| 181 |
+
|
| 182 |
+
Try:
|
| 183 |
+
- Expanding date range
|
| 184 |
+
- Increasing budget-max
|
| 185 |
+
- Trying different cities
|
| 186 |
+
- Checking if dates are in the future
|
| 187 |
+
|
| 188 |
+
## Development
|
| 189 |
+
|
| 190 |
+
### Running Tests
|
| 191 |
+
|
| 192 |
+
```bash
|
| 193 |
+
# Test with example search
|
| 194 |
+
python holland_agent.py
|
| 195 |
+
|
| 196 |
+
# Test CLI
|
| 197 |
+
python main.py --cities Amsterdam --checkin 2026-02-15 --checkout 2026-02-22 --output summary
|
| 198 |
+
```
|
| 199 |
+
|
| 200 |
+
### Project Structure
|
| 201 |
+
|
| 202 |
+
```
|
| 203 |
+
/home/kek/Desktop/AirBnB/
|
| 204 |
+
├── .claude/ # Claude Code project context
|
| 205 |
+
├── .env # API keys (not in git)
|
| 206 |
+
├── .gitignore # Git ignore rules
|
| 207 |
+
├── requirements.txt # Python dependencies
|
| 208 |
+
├── main.py # CLI entry point
|
| 209 |
+
├── holland_agent.py # Main orchestrator
|
| 210 |
+
├── booking_scraper.py # Booking.com scraper
|
| 211 |
+
├── airbnb_scraper.py # Airbnb scraper
|
| 212 |
+
├── deal_ranker.py # Scoring algorithm
|
| 213 |
+
├── weather_integration.py # Weather API
|
| 214 |
+
└── README.md # This file
|
| 215 |
+
```
|
| 216 |
+
|
| 217 |
+
## Tips for Best Deals
|
| 218 |
+
|
| 219 |
+
1. **Weekdays are cheaper**: Tuesday-Thursday can be 30% cheaper than weekends
|
| 220 |
+
2. **Avoid school holidays**: Book just before or after holiday periods
|
| 221 |
+
3. **Last-minute deals**: 7 days before travel often has best prices
|
| 222 |
+
4. **Center Parcs**: Dogs stay free at most locations
|
| 223 |
+
5. **Weather matters**: Spring/summer have better weather bonus
|
| 224 |
+
|
| 225 |
+
## License
|
| 226 |
+
|
| 227 |
+
MIT
|
| 228 |
+
|
| 229 |
+
## Contributing
|
| 230 |
+
|
| 231 |
+
This is a personal project. Feel free to fork and adapt for your needs.
|
api.py
CHANGED
|
@@ -1,13 +1,16 @@
|
|
| 1 |
from fastapi import FastAPI, Query
|
| 2 |
from fastapi.middleware.cors import CORSMiddleware
|
|
|
|
|
|
|
| 3 |
from typing import List, Optional
|
| 4 |
from holland_agent import HollandVacationAgent
|
| 5 |
import uvicorn
|
| 6 |
import asyncio
|
|
|
|
| 7 |
|
| 8 |
app = FastAPI(title="Lars Holiday Deal API")
|
| 9 |
|
| 10 |
-
# Allow requests
|
| 11 |
app.add_middleware(
|
| 12 |
CORSMiddleware,
|
| 13 |
allow_origins=["*"],
|
|
@@ -19,8 +22,8 @@ app.add_middleware(
|
|
| 19 |
agent = HollandVacationAgent()
|
| 20 |
|
| 21 |
@app.get("/")
|
| 22 |
-
async def
|
| 23 |
-
return
|
| 24 |
|
| 25 |
@app.get("/search")
|
| 26 |
async def search_deals(
|
|
@@ -41,6 +44,19 @@ async def search_deals(
|
|
| 41 |
pets=pets
|
| 42 |
)
|
| 43 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
return results
|
| 45 |
|
| 46 |
if __name__ == "__main__":
|
|
|
|
| 1 |
from fastapi import FastAPI, Query
|
| 2 |
from fastapi.middleware.cors import CORSMiddleware
|
| 3 |
+
from fastapi.staticfiles import StaticFiles
|
| 4 |
+
from fastapi.responses import FileResponse
|
| 5 |
from typing import List, Optional
|
| 6 |
from holland_agent import HollandVacationAgent
|
| 7 |
import uvicorn
|
| 8 |
import asyncio
|
| 9 |
+
import os
|
| 10 |
|
| 11 |
app = FastAPI(title="Lars Holiday Deal API")
|
| 12 |
|
| 13 |
+
# Allow requests
|
| 14 |
app.add_middleware(
|
| 15 |
CORSMiddleware,
|
| 16 |
allow_origins=["*"],
|
|
|
|
| 22 |
agent = HollandVacationAgent()
|
| 23 |
|
| 24 |
@app.get("/")
|
| 25 |
+
async def serve_frontend():
|
| 26 |
+
return FileResponse("frontend_dashboard.html")
|
| 27 |
|
| 28 |
@app.get("/search")
|
| 29 |
async def search_deals(
|
|
|
|
| 44 |
pets=pets
|
| 45 |
)
|
| 46 |
|
| 47 |
+
# Vibe Polish: Ensure every deal has an image
|
| 48 |
+
fallback_images = [
|
| 49 |
+
"https://images.unsplash.com/photo-1512917774080-9991f1c4c750?auto=format&fit=crop&w=800&q=80",
|
| 50 |
+
"https://images.unsplash.com/photo-1502672260266-1c1ef2d93688?auto=format&fit=crop&w=800&q=80",
|
| 51 |
+
"https://images.unsplash.com/photo-1493663284031-b7e3aefcae8e?auto=format&fit=crop&w=800&q=80",
|
| 52 |
+
"https://images.unsplash.com/photo-1518780664697-55e3ad937233?auto=format&fit=crop&w=800&q=80"
|
| 53 |
+
]
|
| 54 |
+
|
| 55 |
+
for deal in results.get("top_10_deals", []):
|
| 56 |
+
if not deal.get("image_url") or len(deal["image_url"]) < 5:
|
| 57 |
+
# Use a random fallback image
|
| 58 |
+
deal["image_url"] = fallback_images[hash(deal["name"]) % len(fallback_images)]
|
| 59 |
+
|
| 60 |
return results
|
| 61 |
|
| 62 |
if __name__ == "__main__":
|
booking_scraper.py
CHANGED
|
@@ -1,28 +1,54 @@
|
|
| 1 |
"""
|
| 2 |
-
Booking.com Scraper using
|
| 3 |
-
Searches for pet-friendly accommodations
|
| 4 |
"""
|
| 5 |
|
| 6 |
-
|
| 7 |
-
from typing import List, Dict
|
| 8 |
-
from bs4 import BeautifulSoup # pyre-ignore[21]
|
| 9 |
import re
|
|
|
|
|
|
|
| 10 |
from datetime import datetime
|
| 11 |
-
from
|
| 12 |
|
| 13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
|
| 15 |
|
| 16 |
-
from urllib.parse import quote, urlparse, parse_qs, urlencode, urlunparse
|
| 17 |
-
|
| 18 |
class BookingScraper:
|
| 19 |
"""
|
| 20 |
-
Scrapes Booking.com for pet-friendly accommodations using
|
| 21 |
"""
|
| 22 |
|
| 23 |
def __init__(self):
|
| 24 |
-
|
| 25 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
|
| 27 |
async def search_booking(
|
| 28 |
self,
|
|
@@ -30,53 +56,58 @@ class BookingScraper:
|
|
| 30 |
checkin: str,
|
| 31 |
checkout: str,
|
| 32 |
adults: int = 4
|
| 33 |
-
) -> List[Dict]:
|
| 34 |
"""
|
| 35 |
-
Search Booking.com
|
| 36 |
"""
|
| 37 |
-
print(f" Searching Booking.com for {city}...")
|
|
|
|
|
|
|
|
|
|
| 38 |
|
| 39 |
-
|
| 40 |
-
|
| 41 |
|
| 42 |
try:
|
| 43 |
-
#
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
|
| 48 |
-
|
|
|
|
|
|
|
| 49 |
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
url,
|
| 53 |
-
impersonate="chrome120",
|
| 54 |
-
timeout=30,
|
| 55 |
-
allow_redirects=True
|
| 56 |
-
)
|
| 57 |
-
# print(f" [Debug] Booking.com Status: {response.status_code}")
|
| 58 |
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
|
|
|
| 62 |
|
| 63 |
-
soup = BeautifulSoup(response.text, 'html.parser')
|
| 64 |
-
# print(f" [Debug] Page Title: {soup.title.string.strip() if soup.title else 'No Title'}")
|
| 65 |
-
|
| 66 |
deals = self._parse_html(soup, city, checkin, checkout, adults, nights)
|
| 67 |
|
| 68 |
if deals:
|
| 69 |
-
print(f" Found {len(deals)} properties on Booking.com")
|
| 70 |
return deals
|
| 71 |
else:
|
| 72 |
-
print(f" No properties found on Booking.com
|
| 73 |
-
return
|
| 74 |
|
| 75 |
except Exception as e:
|
| 76 |
-
print(f" Warning: Could not scrape Booking.com: {str(e)[:
|
| 77 |
-
return
|
| 78 |
finally:
|
| 79 |
-
|
| 80 |
|
| 81 |
def _build_booking_url(
|
| 82 |
self,
|
|
@@ -87,8 +118,6 @@ class BookingScraper:
|
|
| 87 |
) -> str:
|
| 88 |
"""Build Booking.com search URL with pet-friendly filter"""
|
| 89 |
base_url = "https://www.booking.com/searchresults.html"
|
| 90 |
-
|
| 91 |
-
# Safe encoding of parameters
|
| 92 |
params = [
|
| 93 |
f"ss={quote(city)}",
|
| 94 |
f"checkin={checkin}",
|
|
@@ -96,92 +125,93 @@ class BookingScraper:
|
|
| 96 |
f"group_adults={adults}",
|
| 97 |
"group_children=0",
|
| 98 |
"no_rooms=1",
|
| 99 |
-
"nflt=ht_id%3D220;hotelfacility%3D14",
|
| 100 |
]
|
| 101 |
-
|
| 102 |
return f"{base_url}?{'&'.join(params)}"
|
| 103 |
|
| 104 |
-
def
|
| 105 |
-
"""
|
| 106 |
-
try:
|
| 107 |
-
parsed = urlparse(url)
|
| 108 |
-
# Booking.com hotel links are usually /hotel/cc/name.html
|
| 109 |
-
# If it's a search result link with many params, strip them
|
| 110 |
-
if '/hotel/' in parsed.path:
|
| 111 |
-
# Keep only necessary params if any (usually none needed for direct link)
|
| 112 |
-
# But sometimes hapos is useful. Safest is to strip everything for clean link.
|
| 113 |
-
return f"{parsed.scheme}://{parsed.netloc}{parsed.path}"
|
| 114 |
-
return url
|
| 115 |
-
except Exception:
|
| 116 |
-
return url
|
| 117 |
-
|
| 118 |
-
def _parse_html(self, soup: BeautifulSoup, city: str, checkin: str, checkout: str, adults: int, nights: int = 1) -> List[Dict]:
|
| 119 |
-
"""Parse Booking.com HTML to extract property data"""
|
| 120 |
deals = []
|
| 121 |
-
|
| 122 |
-
# Helper to add params to clean URL
|
| 123 |
-
def add_params(clean_url):
|
| 124 |
-
if '?' in clean_url:
|
| 125 |
-
return f"{clean_url}&checkin={checkin}&checkout={checkout}&group_adults={adults}&no_rooms=1&group_children=0"
|
| 126 |
-
return f"{clean_url}?checkin={checkin}&checkout={checkout}&group_adults={adults}&no_rooms=1&group_children=0"
|
| 127 |
-
|
| 128 |
-
# Try to find property cards (Booking.com uses various selectors)
|
| 129 |
property_cards = soup.find_all('div', {'data-testid': 'property-card'})
|
| 130 |
-
|
| 131 |
if not property_cards:
|
| 132 |
-
#
|
| 133 |
property_cards = soup.find_all('div', class_=re.compile(r'sr_property_block|property_card'))
|
| 134 |
|
| 135 |
-
for card in property_cards[:
|
| 136 |
try:
|
| 137 |
-
#
|
| 138 |
-
name_elem = card.find('div', {'data-testid': 'title'}) or card.find('h3')
|
| 139 |
-
name = name_elem.get_text(strip=True) if name_elem else "
|
| 140 |
-
|
| 141 |
-
#
|
| 142 |
-
price_elem = card.find('span', {'data-testid': 'price-and-discounted-price'}) or
|
| 143 |
-
|
|
|
|
| 144 |
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
if price_match:
|
| 149 |
-
total_price = int(price_match.group())
|
| 150 |
|
| 151 |
-
#
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
|
|
|
| 157 |
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
review_elem = card.find('div', {'data-testid': 'review-count'}) or card.find('div', class_=re.compile(r'review'))
|
| 166 |
-
review_text = review_elem.get_text(strip=True) if review_elem else "100"
|
| 167 |
|
| 168 |
-
|
| 169 |
-
review_match = re.search(r'\d+', review_text.replace(',', ''))
|
| 170 |
-
if review_match:
|
| 171 |
-
reviews = int(review_match.group())
|
| 172 |
|
| 173 |
-
#
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 177 |
|
| 178 |
-
|
| 179 |
-
if
|
| 180 |
-
|
| 181 |
-
if raw_href.startswith('/'):
|
| 182 |
-
raw_href = f"https://www.booking.com{raw_href}"
|
| 183 |
-
clean = self._clean_url(raw_href)
|
| 184 |
-
url = add_params(clean)
|
| 185 |
|
| 186 |
deals.append({
|
| 187 |
"name": name,
|
|
@@ -191,99 +221,11 @@ class BookingScraper:
|
|
| 191 |
"reviews": reviews,
|
| 192 |
"pet_friendly": True,
|
| 193 |
"source": "booking.com",
|
| 194 |
-
"url":
|
|
|
|
| 195 |
})
|
| 196 |
-
|
| 197 |
except Exception as e:
|
| 198 |
-
|
|
|
|
| 199 |
|
| 200 |
return deals
|
| 201 |
-
|
| 202 |
-
def _get_fallback_data(
|
| 203 |
-
self,
|
| 204 |
-
city: str,
|
| 205 |
-
checkin: str = "2026-06-01",
|
| 206 |
-
checkout: str = "2026-06-07",
|
| 207 |
-
adults: int = 4
|
| 208 |
-
) -> List[Dict]:
|
| 209 |
-
"""Return static fallback data or generate mock data for unknown cities"""
|
| 210 |
-
|
| 211 |
-
# Generate a real search URL for this specific fallback request
|
| 212 |
-
search_url = self._build_booking_url(city, checkin, checkout, adults)
|
| 213 |
-
|
| 214 |
-
fallback_properties = {
|
| 215 |
-
"Amsterdam": [
|
| 216 |
-
{
|
| 217 |
-
"name": "Amsterdam Beach House",
|
| 218 |
-
"location": "Zandvoort, near Amsterdam",
|
| 219 |
-
"price_per_night": 68,
|
| 220 |
-
"rating": 4.5,
|
| 221 |
-
"reviews": 412,
|
| 222 |
-
"pet_friendly": True,
|
| 223 |
-
"source": "booking.com",
|
| 224 |
-
"url": "https://www.booking.com/hotel/nl/amsterdam-beach-house.en-gb.html"
|
| 225 |
-
},
|
| 226 |
-
{
|
| 227 |
-
"name": "Landal Beach Resort Ooghduyne",
|
| 228 |
-
"location": "Julianadorp, North Holland",
|
| 229 |
-
"price_per_night": 65,
|
| 230 |
-
"rating": 4.3,
|
| 231 |
-
"reviews": 287,
|
| 232 |
-
"pet_friendly": True,
|
| 233 |
-
"source": "booking.com",
|
| 234 |
-
"url": "https://www.booking.com/hotel/nl/landal-ooghduyne.en-gb.html"
|
| 235 |
-
}
|
| 236 |
-
],
|
| 237 |
-
"Rotterdam": [
|
| 238 |
-
{
|
| 239 |
-
"name": "Roompot Beach Resort",
|
| 240 |
-
"location": "Kamperland, Zeeland",
|
| 241 |
-
"price_per_night": 58,
|
| 242 |
-
"rating": 4.2,
|
| 243 |
-
"reviews": 356,
|
| 244 |
-
"pet_friendly": True,
|
| 245 |
-
"source": "booking.com",
|
| 246 |
-
"url": "https://www.booking.com/hotel/nl/roompot-beach-resort.en-gb.html"
|
| 247 |
-
}
|
| 248 |
-
],
|
| 249 |
-
"Zandvoort": [
|
| 250 |
-
{
|
| 251 |
-
"name": "Beach House Zandvoort",
|
| 252 |
-
"location": "Zandvoort aan Zee",
|
| 253 |
-
"price_per_night": 72,
|
| 254 |
-
"rating": 4.6,
|
| 255 |
-
"reviews": 189,
|
| 256 |
-
"pet_friendly": True,
|
| 257 |
-
"source": "booking.com",
|
| 258 |
-
"url": "https://www.booking.com/hotel/nl/beach-house-zandvoort.en-gb.html"
|
| 259 |
-
}
|
| 260 |
-
]
|
| 261 |
-
}
|
| 262 |
-
|
| 263 |
-
# Return specific fallback if available
|
| 264 |
-
if city in fallback_properties:
|
| 265 |
-
return fallback_properties[city]
|
| 266 |
-
|
| 267 |
-
# Otherwise generate generic fallback data for the requested city
|
| 268 |
-
return [
|
| 269 |
-
{
|
| 270 |
-
"name": f"Beautiful Home in {city}",
|
| 271 |
-
"location": city,
|
| 272 |
-
"price_per_night": 60,
|
| 273 |
-
"rating": 4.2,
|
| 274 |
-
"reviews": 50,
|
| 275 |
-
"pet_friendly": True,
|
| 276 |
-
"source": "booking.com (fallback)",
|
| 277 |
-
"url": search_url # Direct link to search results
|
| 278 |
-
},
|
| 279 |
-
{
|
| 280 |
-
"name": f"{city} Center Apartment",
|
| 281 |
-
"location": city,
|
| 282 |
-
"price_per_night": 85,
|
| 283 |
-
"rating": 4.5,
|
| 284 |
-
"reviews": 120,
|
| 285 |
-
"pet_friendly": True,
|
| 286 |
-
"source": "booking.com (fallback)",
|
| 287 |
-
"url": search_url # Direct link to search results
|
| 288 |
-
}
|
| 289 |
-
]
|
|
|
|
| 1 |
"""
|
| 2 |
+
Booking.com Scraper using Patchright for maximum reliability
|
| 3 |
+
Searches for pet-friendly accommodations with full JS rendering
|
| 4 |
"""
|
| 5 |
|
| 6 |
+
import asyncio
|
|
|
|
|
|
|
| 7 |
import re
|
| 8 |
+
from typing import List, Dict
|
| 9 |
+
from bs4 import BeautifulSoup
|
| 10 |
from datetime import datetime
|
| 11 |
+
from urllib.parse import quote, urlparse
|
| 12 |
|
| 13 |
+
try:
|
| 14 |
+
from patchright.async_api import async_playwright
|
| 15 |
+
PATCHRIGHT_AVAILABLE = True
|
| 16 |
+
except ImportError:
|
| 17 |
+
PATCHRIGHT_AVAILABLE = False
|
| 18 |
|
| 19 |
|
|
|
|
|
|
|
| 20 |
class BookingScraper:
|
| 21 |
"""
|
| 22 |
+
Scrapes Booking.com for pet-friendly accommodations using Patchright
|
| 23 |
"""
|
| 24 |
|
| 25 |
def __init__(self):
|
| 26 |
+
self.playwright = None
|
| 27 |
+
self.browser = None
|
| 28 |
+
self.context = None
|
| 29 |
+
|
| 30 |
+
async def launch(self):
|
| 31 |
+
"""Launch undetected browser"""
|
| 32 |
+
if not PATCHRIGHT_AVAILABLE:
|
| 33 |
+
raise ImportError("patchright not installed")
|
| 34 |
+
|
| 35 |
+
self.playwright = await async_playwright().start()
|
| 36 |
+
self.browser = await self.playwright.chromium.launch(
|
| 37 |
+
headless=True,
|
| 38 |
+
args=['--no-sandbox', '--disable-setuid-sandbox']
|
| 39 |
+
)
|
| 40 |
+
|
| 41 |
+
self.context = await self.browser.new_context(
|
| 42 |
+
viewport={'width': 1920, 'height': 1080},
|
| 43 |
+
user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36'
|
| 44 |
+
)
|
| 45 |
+
|
| 46 |
+
async def close(self):
|
| 47 |
+
"""Close browser sessions"""
|
| 48 |
+
if self.browser:
|
| 49 |
+
await self.browser.close()
|
| 50 |
+
if self.playwright:
|
| 51 |
+
await self.playwright.stop()
|
| 52 |
|
| 53 |
async def search_booking(
|
| 54 |
self,
|
|
|
|
| 56 |
checkin: str,
|
| 57 |
checkout: str,
|
| 58 |
adults: int = 4
|
| 59 |
+
) -> List[Dict]:
|
| 60 |
"""
|
| 61 |
+
Search Booking.com using browser automation
|
| 62 |
"""
|
| 63 |
+
print(f" [Patchright] Searching Booking.com for {city}...")
|
| 64 |
+
|
| 65 |
+
if not self.browser:
|
| 66 |
+
await self.launch()
|
| 67 |
|
| 68 |
+
page = await self.context.new_page()
|
| 69 |
+
url = self._build_booking_url(city, checkin, checkout, adults)
|
| 70 |
|
| 71 |
try:
|
| 72 |
+
# Navigate with a generous timeout
|
| 73 |
+
await page.goto(url, wait_until='domcontentloaded', timeout=60000)
|
| 74 |
+
|
| 75 |
+
# Wait for results to load
|
| 76 |
+
await asyncio.sleep(5)
|
| 77 |
+
|
| 78 |
+
# Handle potential cookie banners or popups
|
| 79 |
+
try:
|
| 80 |
+
# Common "Accept Cookies" button selector
|
| 81 |
+
await page.click('button#onetrust-accept-btn-handler', timeout=3000)
|
| 82 |
+
except:
|
| 83 |
+
pass
|
| 84 |
|
| 85 |
+
# Scroll to trigger lazy loading of images
|
| 86 |
+
await page.evaluate("window.scrollBy(0, 1000)")
|
| 87 |
+
await asyncio.sleep(2)
|
| 88 |
|
| 89 |
+
content = await page.content()
|
| 90 |
+
soup = BeautifulSoup(content, 'html.parser')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 91 |
|
| 92 |
+
# Calculate nights
|
| 93 |
+
d1 = datetime.strptime(checkin, "%Y-%m-%d")
|
| 94 |
+
d2 = datetime.strptime(checkout, "%Y-%m-%d")
|
| 95 |
+
nights = max(1, (d2 - d1).days)
|
| 96 |
|
|
|
|
|
|
|
|
|
|
| 97 |
deals = self._parse_html(soup, city, checkin, checkout, adults, nights)
|
| 98 |
|
| 99 |
if deals:
|
| 100 |
+
print(f" [Patchright] Found {len(deals)} properties on Booking.com")
|
| 101 |
return deals
|
| 102 |
else:
|
| 103 |
+
print(f" [Patchright] No properties found on Booking.com")
|
| 104 |
+
return []
|
| 105 |
|
| 106 |
except Exception as e:
|
| 107 |
+
print(f" [Patchright] Warning: Could not scrape Booking.com: {str(e)[:100]}")
|
| 108 |
+
return []
|
| 109 |
finally:
|
| 110 |
+
await page.close()
|
| 111 |
|
| 112 |
def _build_booking_url(
|
| 113 |
self,
|
|
|
|
| 118 |
) -> str:
|
| 119 |
"""Build Booking.com search URL with pet-friendly filter"""
|
| 120 |
base_url = "https://www.booking.com/searchresults.html"
|
|
|
|
|
|
|
| 121 |
params = [
|
| 122 |
f"ss={quote(city)}",
|
| 123 |
f"checkin={checkin}",
|
|
|
|
| 125 |
f"group_adults={adults}",
|
| 126 |
"group_children=0",
|
| 127 |
"no_rooms=1",
|
| 128 |
+
"nflt=ht_id%3D220;hotelfacility%3D14", # Apartments + Pet-friendly
|
| 129 |
]
|
|
|
|
| 130 |
return f"{base_url}?{'&'.join(params)}"
|
| 131 |
|
| 132 |
+
def _parse_html(self, soup: BeautifulSoup, city: str, checkin: str, checkout: str, adults: int, nights: int) -> List[Dict]:
|
| 133 |
+
"""Parse Booking.com HTML to extract property data with robust heuristics"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 134 |
deals = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 135 |
property_cards = soup.find_all('div', {'data-testid': 'property-card'})
|
| 136 |
+
|
| 137 |
if not property_cards:
|
| 138 |
+
# Fallback to older class names
|
| 139 |
property_cards = soup.find_all('div', class_=re.compile(r'sr_property_block|property_card'))
|
| 140 |
|
| 141 |
+
for card in property_cards[:15]:
|
| 142 |
try:
|
| 143 |
+
# 1. Name
|
| 144 |
+
name_elem = card.find('div', {'data-testid': 'title'}) or card.find('h3')
|
| 145 |
+
name = name_elem.get_text(strip=True) if name_elem else "Unbekannte Unterkunft"
|
| 146 |
+
|
| 147 |
+
# 2. Price Parsing (Robust)
|
| 148 |
+
price_elem = card.find('span', {'data-testid': 'price-and-discounted-price'}) or \
|
| 149 |
+
card.find('div', class_=re.compile(r'prco-valign-middle-helper')) or \
|
| 150 |
+
card.find(string=re.compile(r'€|€\s*\d+'))
|
| 151 |
|
| 152 |
+
price_text = ""
|
| 153 |
+
if price_elem:
|
| 154 |
+
price_text = price_elem.get_text(strip=True) if hasattr(price_elem, 'get_text') else str(price_elem)
|
|
|
|
|
|
|
| 155 |
|
| 156 |
+
# Extract all numbers from price string
|
| 157 |
+
numbers = re.findall(r'[\d\.,]+', price_text.replace('\xa0', '').replace(' ', ''))
|
| 158 |
+
total_price = 0
|
| 159 |
+
if numbers:
|
| 160 |
+
# Take the last number (usually the current price after discounts)
|
| 161 |
+
price_val = numbers[-1].replace('.', '').replace(',', '')
|
| 162 |
+
total_price = int(price_val) if price_val.isdigit() else 0
|
| 163 |
|
| 164 |
+
# Fallback: Check if price is hidden in parent container
|
| 165 |
+
if total_price == 0:
|
| 166 |
+
card_text = card.get_text()
|
| 167 |
+
price_matches = re.findall(r'€\s*([\d\.,]+)', card_text)
|
| 168 |
+
if price_matches:
|
| 169 |
+
price_val = price_matches[-1].replace('.', '').replace(',', '')
|
| 170 |
+
total_price = int(price_val)
|
|
|
|
|
|
|
| 171 |
|
| 172 |
+
price_per_night = round(total_price / nights) if total_price > 0 else 0
|
|
|
|
|
|
|
|
|
|
| 173 |
|
| 174 |
+
# 3. Rating
|
| 175 |
+
rating_elem = card.find('div', {'data-testid': 'review-score'}) or card.find('div', class_=re.compile(r'review-score'))
|
| 176 |
+
rating = 4.0
|
| 177 |
+
if rating_elem:
|
| 178 |
+
rating_text = rating_elem.get_text(strip=True)
|
| 179 |
+
rating_match = re.search(r'(\d+[\.,]\d+)', rating_text)
|
| 180 |
+
if rating_match:
|
| 181 |
+
rating = float(rating_match.group().replace(',', '.')) / 2
|
| 182 |
+
rating = min(5.0, rating)
|
| 183 |
+
|
| 184 |
+
# 4. Reviews
|
| 185 |
+
review_elem = card.find('div', {'data-testid': 'review-count'}) or card.find('div', class_=re.compile(r'review-count'))
|
| 186 |
+
reviews = 50
|
| 187 |
+
if review_elem:
|
| 188 |
+
review_text = review_elem.get_text(strip=True)
|
| 189 |
+
review_match = re.search(r'\d+', review_text.replace('.', '').replace(',', ''))
|
| 190 |
+
reviews = int(review_match.group()) if review_match else 50
|
| 191 |
+
|
| 192 |
+
# 5. URL
|
| 193 |
+
link_elem = card.find('a', {'data-testid': 'title-link'}) or card.find('a', href=True)
|
| 194 |
+
raw_url = link_elem['href'] if link_elem and link_elem.get('href') else ""
|
| 195 |
+
if raw_url.startswith('/'):
|
| 196 |
+
raw_url = f"https://www.booking.com{raw_url}"
|
| 197 |
+
clean_url = raw_url.split('?')[0] if '?' in raw_url else raw_url
|
| 198 |
+
final_url = f"{clean_url}?checkin={checkin}&checkout={checkout}&group_adults={adults}"
|
| 199 |
+
|
| 200 |
+
# 6. Image (Robust)
|
| 201 |
+
img_elem = card.find('img', {'data-testid': 'image'}) or card.find('img')
|
| 202 |
+
image_url = ""
|
| 203 |
+
if img_elem:
|
| 204 |
+
# Check srcset first for higher quality
|
| 205 |
+
srcset = img_elem.get('srcset', '')
|
| 206 |
+
if srcset:
|
| 207 |
+
# Take the first URL from srcset
|
| 208 |
+
image_url = srcset.split(',')[0].split(' ')[0]
|
| 209 |
+
else:
|
| 210 |
+
image_url = img_elem.get('src') or img_elem.get('data-src') or ""
|
| 211 |
|
| 212 |
+
# Ensure it's an absolute URL
|
| 213 |
+
if image_url and image_url.startswith('//'):
|
| 214 |
+
image_url = f"https:{image_url}"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 215 |
|
| 216 |
deals.append({
|
| 217 |
"name": name,
|
|
|
|
| 221 |
"reviews": reviews,
|
| 222 |
"pet_friendly": True,
|
| 223 |
"source": "booking.com",
|
| 224 |
+
"url": final_url,
|
| 225 |
+
"image_url": image_url
|
| 226 |
})
|
|
|
|
| 227 |
except Exception as e:
|
| 228 |
+
print(f" [Debug] Error parsing card: {e}")
|
| 229 |
+
continue
|
| 230 |
|
| 231 |
return deals
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
conductor/tracks/live_data_integrity/plan.md
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Implementation Plan - Live Data Integrity
|
| 2 |
+
|
| 3 |
+
## Phase 1: Scraper Audit & Repair
|
| 4 |
+
- [ ] Task: Eliminate Mocks in `holland_agent.py`
|
| 5 |
+
- [ ] Remove `_parse_booking_html` fallback data.
|
| 6 |
+
- [ ] Ensure `_search_center_parcs` uses real logic or a live scraper.
|
| 7 |
+
- [ ] Task: Fix Booking.com Deep Links
|
| 8 |
+
- [ ] Update `booking_scraper.py` to extract the full URL including affiliate/tracking parameters for direct booking.
|
| 9 |
+
- [ ] Task: Verify Airbnb Patchright Scraper
|
| 10 |
+
- [ ] Test `patchright_airbnb_scraper.py` against live Airbnb pages.
|
| 11 |
+
- [ ] Ensure the "Redirect to Search" fallback actually works with the selected dates.
|
| 12 |
+
|
| 13 |
+
## Phase 2: Data Validation
|
| 14 |
+
- [ ] Task: Real-time Link Validation
|
| 15 |
+
- [ ] Implement a small check to ensure generated links return a 200 OK status.
|
| 16 |
+
- [ ] Task: Image Extraction
|
| 17 |
+
- [ ] Update scrapers to extract the actual thumbnail URL from the property cards (no more placeholder images).
|
| 18 |
+
|
| 19 |
+
## Phase 3: API & UI Sync
|
| 20 |
+
- [ ] Task: Update API Response
|
| 21 |
+
- [ ] Ensure `api.py` passes the new real image URLs and direct links to the frontend.
|
| 22 |
+
- [ ] Task: Frontend Update
|
| 23 |
+
- [ ] Update `frontend_dashboard.html` to use `${deal.image_url}` instead of Unsplash placeholders.
|
frontend_dashboard.html
CHANGED
|
@@ -111,7 +111,7 @@
|
|
| 111 |
dealGrid.innerHTML = '<div class="col-span-2 text-center py-20"><i class="fa-solid fa-compass fa-spin fa-3x text-emerald-600 mb-4"></i><p class="text-lg">Wir durchsuchen Booking.com, Airbnb und Center Parcs...</p></div>';
|
| 112 |
|
| 113 |
try {
|
| 114 |
-
const response = await fetch(`
|
| 115 |
const data = await response.json();
|
| 116 |
|
| 117 |
renderDeals(data.top_10_deals);
|
|
@@ -134,7 +134,10 @@
|
|
| 134 |
dealGrid.innerHTML = deals.map(deal => `
|
| 135 |
<div class="deal-card bg-white rounded-2xl shadow-sm overflow-hidden border border-gray-100 flex flex-col">
|
| 136 |
<div class="relative h-48 bg-emerald-100">
|
| 137 |
-
<img src="
|
|
|
|
|
|
|
|
|
|
| 138 |
<div class="absolute top-4 left-4 bg-white/90 backdrop-blur px-3 py-1 rounded-full text-xs font-bold text-emerald-700 flex items-center">
|
| 139 |
<i class="fa-solid fa-sun mr-1"></i> Wetter Bonus aktiv
|
| 140 |
</div>
|
|
|
|
| 111 |
dealGrid.innerHTML = '<div class="col-span-2 text-center py-20"><i class="fa-solid fa-compass fa-spin fa-3x text-emerald-600 mb-4"></i><p class="text-lg">Wir durchsuchen Booking.com, Airbnb und Center Parcs...</p></div>';
|
| 112 |
|
| 113 |
try {
|
| 114 |
+
const response = await fetch(`/search?cities=${encodeURIComponent(cities)}&checkin=${checkin}&checkout=${checkout}`);
|
| 115 |
const data = await response.json();
|
| 116 |
|
| 117 |
renderDeals(data.top_10_deals);
|
|
|
|
| 134 |
dealGrid.innerHTML = deals.map(deal => `
|
| 135 |
<div class="deal-card bg-white rounded-2xl shadow-sm overflow-hidden border border-gray-100 flex flex-col">
|
| 136 |
<div class="relative h-48 bg-emerald-100">
|
| 137 |
+
<img src="${deal.image_url}"
|
| 138 |
+
onerror="this.src='https://images.unsplash.com/photo-1512917774080-9991f1c4c750?auto=format&fit=crop&w=800&q=80'"
|
| 139 |
+
alt="${deal.name}"
|
| 140 |
+
class="object-cover w-full h-full">
|
| 141 |
<div class="absolute top-4 left-4 bg-white/90 backdrop-blur px-3 py-1 rounded-full text-xs font-bold text-emerald-700 flex items-center">
|
| 142 |
<i class="fa-solid fa-sun mr-1"></i> Wetter Bonus aktiv
|
| 143 |
</div>
|
holland_agent.py
CHANGED
|
@@ -208,83 +208,21 @@ class VacationAgent:
|
|
| 208 |
|
| 209 |
def _get_center_parcs_data(self, city: str) -> List[Dict]:
|
| 210 |
"""
|
| 211 |
-
|
|
|
|
| 212 |
"""
|
| 213 |
-
|
| 214 |
-
{
|
| 215 |
-
"name": "Center Parcs De Kempervennen",
|
| 216 |
-
"location": "Westerhoven, North Brabant",
|
| 217 |
-
"price_per_night": 45,
|
| 218 |
-
"rating": 4.2,
|
| 219 |
-
"reviews": 234,
|
| 220 |
-
"pet_friendly": True,
|
| 221 |
-
"source": "center-parcs",
|
| 222 |
-
"url": "https://www.centerparcs.nl/nl-nl/nederland/fp_VK_vakantiepark-de-kempervennen"
|
| 223 |
-
},
|
| 224 |
-
{
|
| 225 |
-
"name": "Center Parcs Zandvoort Beach",
|
| 226 |
-
"location": "Zandvoort aan Zee",
|
| 227 |
-
"price_per_night": 58,
|
| 228 |
-
"rating": 4.5,
|
| 229 |
-
"reviews": 512,
|
| 230 |
-
"pet_friendly": True,
|
| 231 |
-
"source": "center-parcs",
|
| 232 |
-
"url": "https://www.centerparcs.nl/nl-nl/nederland/fp_PZ_vakantiepark-zandvoort"
|
| 233 |
-
},
|
| 234 |
-
{
|
| 235 |
-
"name": "Center Parcs De Huttenheugte",
|
| 236 |
-
"location": "Dalen, Drenthe",
|
| 237 |
-
"price_per_night": 42,
|
| 238 |
-
"rating": 4.1,
|
| 239 |
-
"reviews": 189,
|
| 240 |
-
"pet_friendly": True,
|
| 241 |
-
"source": "center-parcs",
|
| 242 |
-
"url": "https://www.centerparcs.nl/nl-nl/nederland/fp_DH_vakantiepark-de-huttenheugte"
|
| 243 |
-
},
|
| 244 |
-
{
|
| 245 |
-
"name": "Center Parcs Port Zélande",
|
| 246 |
-
"location": "Ouddorp, Zeeland",
|
| 247 |
-
"price_per_night": 52,
|
| 248 |
-
"rating": 4.4,
|
| 249 |
-
"reviews": 423,
|
| 250 |
-
"pet_friendly": True,
|
| 251 |
-
"source": "center-parcs",
|
| 252 |
-
"url": "https://www.centerparcs.nl/nl-nl/nederland/fp_PZ_vakantiepark-port-zelande"
|
| 253 |
-
},
|
| 254 |
-
{
|
| 255 |
-
"name": "Center Parcs Het Heijderbos",
|
| 256 |
-
"location": "Heijen, Limburg",
|
| 257 |
-
"price_per_night": 48,
|
| 258 |
-
"rating": 4.3,
|
| 259 |
-
"reviews": 367,
|
| 260 |
-
"pet_friendly": True,
|
| 261 |
-
"source": "center-parcs",
|
| 262 |
-
"url": "https://www.centerparcs.nl/nl-nl/nederland/fp_HB_vakantiepark-het-heijderbos"
|
| 263 |
-
}
|
| 264 |
-
]
|
| 265 |
-
|
| 266 |
-
# Filter parks that match the requested city/region
|
| 267 |
-
# This is a basic fuzzy match
|
| 268 |
-
matching_parks = []
|
| 269 |
-
for park in all_parks:
|
| 270 |
-
# Check if city name is in park location or name
|
| 271 |
-
if city.lower() in str(park['location']).lower() or \
|
| 272 |
-
city.lower() in str(park['name']).lower() or \
|
| 273 |
-
("holland" in city.lower() and "nederland" in str(park['url'])): # Keep strict Holland check loose
|
| 274 |
-
matching_parks.append(park)
|
| 275 |
-
|
| 276 |
-
return matching_parks
|
| 277 |
|
| 278 |
async def cleanup(self):
|
| 279 |
"""Clean up browser sessions"""
|
| 280 |
try:
|
| 281 |
-
if hasattr(self.airbnb_scraper, '
|
| 282 |
-
await self.airbnb_scraper.
|
| 283 |
except Exception:
|
| 284 |
pass
|
| 285 |
try:
|
| 286 |
-
if hasattr(self.booking_scraper, '
|
| 287 |
-
self.booking_scraper.
|
| 288 |
except Exception:
|
| 289 |
pass
|
| 290 |
|
|
|
|
| 208 |
|
| 209 |
def _get_center_parcs_data(self, city: str) -> List[Dict]:
|
| 210 |
"""
|
| 211 |
+
TODO: Implement real Center Parcs scraping.
|
| 212 |
+
Returning empty list for now to ensure 100% real scraped data from other sources.
|
| 213 |
"""
|
| 214 |
+
return []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 215 |
|
| 216 |
async def cleanup(self):
|
| 217 |
"""Clean up browser sessions"""
|
| 218 |
try:
|
| 219 |
+
if hasattr(self.airbnb_scraper, 'close'):
|
| 220 |
+
await self.airbnb_scraper.close()
|
| 221 |
except Exception:
|
| 222 |
pass
|
| 223 |
try:
|
| 224 |
+
if hasattr(self.booking_scraper, 'close'):
|
| 225 |
+
await self.booking_scraper.close()
|
| 226 |
except Exception:
|
| 227 |
pass
|
| 228 |
|
patchright_airbnb_scraper.py
CHANGED
|
@@ -92,10 +92,29 @@ class PatchrightAirbnbScraper:
|
|
| 92 |
try:
|
| 93 |
# Navigate - Airbnb uses heavy JS, so use domcontentloaded
|
| 94 |
await page.goto(url, wait_until='domcontentloaded', timeout=45000)
|
| 95 |
-
print(f" [Patchright] Page loaded,
|
| 96 |
|
| 97 |
-
#
|
| 98 |
-
await
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 99 |
|
| 100 |
# Wait for cards to appear
|
| 101 |
try:
|
|
@@ -188,6 +207,30 @@ class PatchrightAirbnbScraper:
|
|
| 188 |
href = link['href']
|
| 189 |
url = f"https://www.airbnb.com{href}" if href.startswith('/') else href
|
| 190 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 191 |
deals.append({
|
| 192 |
"name": name,
|
| 193 |
"location": location if location else region,
|
|
@@ -196,7 +239,8 @@ class PatchrightAirbnbScraper:
|
|
| 196 |
"reviews": reviews,
|
| 197 |
"pet_friendly": True, # Assume true for search results
|
| 198 |
"source": "airbnb (patchright)",
|
| 199 |
-
"url": url
|
|
|
|
| 200 |
})
|
| 201 |
|
| 202 |
except Exception as e:
|
|
@@ -211,80 +255,11 @@ class PatchrightAirbnbScraper:
|
|
| 211 |
checkout: str,
|
| 212 |
adults: int = 4
|
| 213 |
) -> List[Dict]:
|
| 214 |
-
"""
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
from datetime import datetime
|
| 220 |
-
try:
|
| 221 |
-
d1 = datetime.strptime(checkin, "%Y-%m-%d")
|
| 222 |
-
d2 = datetime.strptime(checkout, "%Y-%m-%d")
|
| 223 |
-
nights = (d2 - d1).days
|
| 224 |
-
except:
|
| 225 |
-
nights = 7
|
| 226 |
-
|
| 227 |
-
fallback_properties = {
|
| 228 |
-
"Amsterdam": [
|
| 229 |
-
{
|
| 230 |
-
"name": "City Center Apartment (Patchright Fallback)",
|
| 231 |
-
"location": "Amsterdam",
|
| 232 |
-
"price_per_night": 145,
|
| 233 |
-
"rating": 4.5,
|
| 234 |
-
"reviews": 120,
|
| 235 |
-
"pet_friendly": True,
|
| 236 |
-
"source": "airbnb (fallback)",
|
| 237 |
-
"url": search_url
|
| 238 |
-
},
|
| 239 |
-
{
|
| 240 |
-
"name": "Modern Loft near Vondelpark",
|
| 241 |
-
"location": "Amsterdam Oud-West",
|
| 242 |
-
"price_per_night": 180,
|
| 243 |
-
"rating": 4.8,
|
| 244 |
-
"reviews": 89,
|
| 245 |
-
"pet_friendly": True,
|
| 246 |
-
"source": "airbnb (fallback)",
|
| 247 |
-
"url": search_url
|
| 248 |
-
}
|
| 249 |
-
],
|
| 250 |
-
"Berlin": [
|
| 251 |
-
{
|
| 252 |
-
"name": "Trendy Mitte Apartment",
|
| 253 |
-
"location": "Berlin Mitte",
|
| 254 |
-
"price_per_night": 95,
|
| 255 |
-
"rating": 4.6,
|
| 256 |
-
"reviews": 156,
|
| 257 |
-
"pet_friendly": True,
|
| 258 |
-
"source": "airbnb (fallback)",
|
| 259 |
-
"url": search_url
|
| 260 |
-
}
|
| 261 |
-
],
|
| 262 |
-
"Rotterdam": [
|
| 263 |
-
{
|
| 264 |
-
"name": "Wikkelboat Unique Stay",
|
| 265 |
-
"location": "Rotterdam Centrum",
|
| 266 |
-
"price_per_night": 135,
|
| 267 |
-
"rating": 4.8,
|
| 268 |
-
"reviews": 156,
|
| 269 |
-
"pet_friendly": True,
|
| 270 |
-
"source": "airbnb (fallback)",
|
| 271 |
-
"url": search_url
|
| 272 |
-
}
|
| 273 |
-
]
|
| 274 |
-
}
|
| 275 |
-
|
| 276 |
-
return fallback_properties.get(region, [
|
| 277 |
-
{
|
| 278 |
-
"name": f"Cozy {region} Stay (Patchright Fallback)",
|
| 279 |
-
"location": region,
|
| 280 |
-
"price_per_night": 85,
|
| 281 |
-
"rating": 4.5,
|
| 282 |
-
"reviews": 25,
|
| 283 |
-
"pet_friendly": True,
|
| 284 |
-
"source": "airbnb (fallback)",
|
| 285 |
-
"url": search_url
|
| 286 |
-
}
|
| 287 |
-
])
|
| 288 |
|
| 289 |
|
| 290 |
async def test_patchright():
|
|
|
|
| 92 |
try:
|
| 93 |
# Navigate - Airbnb uses heavy JS, so use domcontentloaded
|
| 94 |
await page.goto(url, wait_until='domcontentloaded', timeout=45000)
|
| 95 |
+
print(f" [Patchright] Page loaded, scrolling to load images...")
|
| 96 |
|
| 97 |
+
# Auto-scroll to trigger lazy loading of images
|
| 98 |
+
await page.evaluate("""
|
| 99 |
+
async () => {
|
| 100 |
+
await new Promise((resolve) => {
|
| 101 |
+
let totalHeight = 0;
|
| 102 |
+
let distance = 300;
|
| 103 |
+
let timer = setInterval(() => {
|
| 104 |
+
let scrollHeight = document.body.scrollHeight;
|
| 105 |
+
window.scrollBy(0, distance);
|
| 106 |
+
totalHeight += distance;
|
| 107 |
+
if(totalHeight >= scrollHeight || totalHeight > 5000){
|
| 108 |
+
clearInterval(timer);
|
| 109 |
+
resolve();
|
| 110 |
+
}
|
| 111 |
+
}, 150);
|
| 112 |
+
});
|
| 113 |
+
}
|
| 114 |
+
""")
|
| 115 |
+
|
| 116 |
+
# Wait a bit for images to finalize
|
| 117 |
+
await asyncio.sleep(3)
|
| 118 |
|
| 119 |
# Wait for cards to appear
|
| 120 |
try:
|
|
|
|
| 207 |
href = link['href']
|
| 208 |
url = f"https://www.airbnb.com{href}" if href.startswith('/') else href
|
| 209 |
|
| 210 |
+
# Extract Image URL
|
| 211 |
+
# Airbnb often uses 'img' inside a 'picture' tag within the card
|
| 212 |
+
img_elem = card.find('img')
|
| 213 |
+
image_url = ""
|
| 214 |
+
if img_elem:
|
| 215 |
+
# Airbnb specifics: check data-original-uri and multiple src attributes
|
| 216 |
+
image_url = img_elem.get('src', '')
|
| 217 |
+
if not image_url.startswith('http') and img_elem.get('data-src'):
|
| 218 |
+
image_url = img_elem.get('data-src')
|
| 219 |
+
|
| 220 |
+
# If still no valid URL, check srcset
|
| 221 |
+
if not image_url and img_elem.get('srcset'):
|
| 222 |
+
image_url = img_elem['srcset'].split(',')[0].split(' ')[0]
|
| 223 |
+
|
| 224 |
+
# Clean up URL (sometimes it's a small thumbnail, but it's real)
|
| 225 |
+
if image_url and '?' in image_url:
|
| 226 |
+
# Keep some basic params if needed, but remove session junk
|
| 227 |
+
# For Airbnb, removing all params usually works for thumbnails
|
| 228 |
+
image_url = image_url.split('?')[0]
|
| 229 |
+
|
| 230 |
+
# Double-check it starts with https
|
| 231 |
+
if image_url and image_url.startswith('//'):
|
| 232 |
+
image_url = f"https:{image_url}"
|
| 233 |
+
|
| 234 |
deals.append({
|
| 235 |
"name": name,
|
| 236 |
"location": location if location else region,
|
|
|
|
| 239 |
"reviews": reviews,
|
| 240 |
"pet_friendly": True, # Assume true for search results
|
| 241 |
"source": "airbnb (patchright)",
|
| 242 |
+
"url": url,
|
| 243 |
+
"image_url": image_url
|
| 244 |
})
|
| 245 |
|
| 246 |
except Exception as e:
|
|
|
|
| 255 |
checkout: str,
|
| 256 |
adults: int = 4
|
| 257 |
) -> List[Dict]:
|
| 258 |
+
"""
|
| 259 |
+
No more static mock properties.
|
| 260 |
+
Returns an empty list to ensure 100% real data visibility.
|
| 261 |
+
"""
|
| 262 |
+
return []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 263 |
|
| 264 |
|
| 265 |
async def test_patchright():
|
tests/verify_live_data.py
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import httpx
|
| 2 |
+
import asyncio
|
| 3 |
+
import json
|
| 4 |
+
|
| 5 |
+
async def test_live_api():
|
| 6 |
+
print("🚀 Starte Live-API Test (100% reale Daten Check)...")
|
| 7 |
+
|
| 8 |
+
url = "http://localhost:8000/search"
|
| 9 |
+
params = {
|
| 10 |
+
"cities": "Zandvoort",
|
| 11 |
+
"checkin": "2026-03-15",
|
| 12 |
+
"checkout": "2026-03-22",
|
| 13 |
+
"adults": 4,
|
| 14 |
+
"pets": 1
|
| 15 |
+
}
|
| 16 |
+
|
| 17 |
+
async with httpx.AsyncClient(timeout=120.0) as client:
|
| 18 |
+
try:
|
| 19 |
+
print(f"📡 Sende Anfrage an API: {url} für Zandvoort...")
|
| 20 |
+
response = await client.get(url, params=params)
|
| 21 |
+
|
| 22 |
+
if response.status_code != 200:
|
| 23 |
+
print(f"❌ API Fehler: Status {response.status_code}")
|
| 24 |
+
return
|
| 25 |
+
|
| 26 |
+
data = response.json()
|
| 27 |
+
deals = data.get("top_10_deals", [])
|
| 28 |
+
|
| 29 |
+
print(f"✅ API Antwort erhalten. Gefundene Deals: {len(deals)}")
|
| 30 |
+
|
| 31 |
+
if not deals:
|
| 32 |
+
print("⚠️ Keine Deals gefunden. (Möglicherweise blockiert oder keine Verfügbarkeit)")
|
| 33 |
+
return
|
| 34 |
+
|
| 35 |
+
for i, deal in enumerate(deals[:3], 1):
|
| 36 |
+
name = deal.get('name', 'Unbekannt')
|
| 37 |
+
source = deal.get('source', 'Unbekannt')
|
| 38 |
+
price = deal.get('price_per_night', 0)
|
| 39 |
+
deal_url = deal.get('url', '')
|
| 40 |
+
image_url = deal.get('image_url', '')
|
| 41 |
+
|
| 42 |
+
print(f"\n--- Deal #{i}: {name} ---")
|
| 43 |
+
print(f" Quelle: {source}")
|
| 44 |
+
print(f" Preis: €{price}/Nacht")
|
| 45 |
+
|
| 46 |
+
# Check URL
|
| 47 |
+
if "booking.com" in deal_url or "airbnb.com" in deal_url:
|
| 48 |
+
print(f" ✅ URL Valid: {deal_url[:60]}...")
|
| 49 |
+
else:
|
| 50 |
+
print(f" ❌ URL UNGÜLTIG: {deal_url}")
|
| 51 |
+
|
| 52 |
+
# Check Image
|
| 53 |
+
if image_url and image_url.startswith('http'):
|
| 54 |
+
print(f" ✅ Bild-URL vorhanden: {image_url[:60]}...")
|
| 55 |
+
else:
|
| 56 |
+
print(f" ❌ Bild-URL FEHLT oder ist Platzhalter")
|
| 57 |
+
|
| 58 |
+
except Exception as e:
|
| 59 |
+
print(f"❌ Test fehlgeschlagen: {e}")
|
| 60 |
+
|
| 61 |
+
if __name__ == "__main__":
|
| 62 |
+
asyncio.run(test_live_api())
|