cespin24 commited on
Commit
a98dd63
·
verified ·
1 Parent(s): 3d35f5a

Upload 10 files

Browse files
__pycache__/search.cpython-311.pyc ADDED
Binary file (24.5 kB). View file
 
__pycache__/utils.cpython-311.pyc ADDED
Binary file (3.23 kB). View file
 
app.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Hotel Search App — Gradio Interface
2
+
3
+ A free-form, natural-language hotel search application.
4
+ Deployed on Hugging Face Spaces.
5
+ """
6
+
7
+ import os
8
+ from pathlib import Path
9
+
10
+ import gradio as gr
11
+ from dotenv import load_dotenv
12
+
13
+ # Load .env from the same directory as this script so it works
14
+ # both locally and on Hugging Face Spaces.
15
+ _env_path = Path(__file__).resolve().parent / ".env"
16
+ load_dotenv(dotenv_path=_env_path, override=True)
17
+
18
+ from search import hotel_search
19
+
20
+ TITLE = "🏨 Hotel Search — Find Your Perfect Stay"
21
+
22
+ DESCRIPTION = """\
23
+ **Describe your ideal hotel in plain English** and this app will find matching hotels \
24
+ with direct links to their websites — no travel agency middlemen.
25
+
26
+ Mention your **location**, **dates**, **budget**, and any **amenities** you need. \
27
+ You can mark features as *required* ("must have a pool") or *preferred* \
28
+ ("ideally has a spa") and the app will rank results accordingly.
29
+ """
30
+
31
+ EXAMPLES = [
32
+ [
33
+ "I'm looking for a beachfront hotel in Miami, Florida for March 15-18, 2026. "
34
+ "I need free parking and it must be under $200 per night. "
35
+ "A pool and spa would be nice but aren't required."
36
+ ],
37
+ [
38
+ "Find me a pet-friendly hotel in downtown Austin, Texas for next weekend. "
39
+ "Must have free breakfast. Ideally has a rooftop bar and is walkable to "
40
+ "live music venues. Budget around $150/night."
41
+ ],
42
+ [
43
+ "I need a luxury hotel in Manhattan, New York for 2 guests, April 5-8, 2026. "
44
+ "Must have a fitness center and concierge service. "
45
+ "Would prefer a room with a city view and a hotel restaurant."
46
+ ],
47
+ [
48
+ "Budget-friendly hotel near Disneyland in Anaheim, California for a family "
49
+ "of 4. Must be under $120/night and have free Wi-Fi. "
50
+ "Would be nice to have a shuttle to the park and a pool for the kids."
51
+ ],
52
+ [
53
+ "Romantic boutique hotel in Savannah, Georgia for a weekend getaway. "
54
+ "Must be in the historic district. Prefer a hotel with a garden, "
55
+ "complimentary wine hour, and within walking distance of restaurants."
56
+ ],
57
+ ]
58
+
59
+
60
+ def search_wrapper(user_input: str) -> str:
61
+ """Thin wrapper so Gradio can call the search pipeline."""
62
+ return hotel_search(user_input)
63
+
64
+
65
+ with gr.Blocks(
66
+ title="Hotel Search",
67
+ theme=gr.themes.Soft(
68
+ primary_hue="blue",
69
+ secondary_hue="sky",
70
+ font=gr.themes.GoogleFont("Inter"),
71
+ ),
72
+ ) as demo:
73
+ gr.Markdown(f"# {TITLE}")
74
+ gr.Markdown(DESCRIPTION)
75
+
76
+ with gr.Row():
77
+ with gr.Column(scale=3):
78
+ user_input = gr.Textbox(
79
+ label="Describe Your Ideal Hotel",
80
+ placeholder=(
81
+ "e.g. I need a beachfront hotel in Miami for March 15-18 "
82
+ "under $200/night with free parking. A pool would be nice..."
83
+ ),
84
+ lines=5,
85
+ )
86
+ search_btn = gr.Button("🔍 Search Hotels", variant="primary", size="lg")
87
+ with gr.Column(scale=1):
88
+ gr.Markdown(
89
+ "### Tips for Best Results\n"
90
+ "- **Be specific** about location\n"
91
+ "- **Include dates** if you have them\n"
92
+ "- **Set a budget** (e.g. under $150/night)\n"
93
+ "- **Say 'must have'** for requirements\n"
94
+ "- **Say 'would be nice'** for preferences\n"
95
+ "- Avoid requiring features rarely listed "
96
+ "(e.g. Wi-Fi is universal but rarely advertised)"
97
+ )
98
+
99
+ results_output = gr.Markdown(label="Search Results")
100
+
101
+ search_btn.click(fn=search_wrapper, inputs=user_input, outputs=results_output)
102
+ user_input.submit(fn=search_wrapper, inputs=user_input, outputs=results_output)
103
+
104
+ gr.Examples(
105
+ examples=EXAMPLES,
106
+ inputs=user_input,
107
+ label="Example Searches — Click to Try",
108
+ )
109
+
110
+ gr.Markdown(
111
+ "---\n"
112
+ "*This app uses AI to interpret your request and searches the web for "
113
+ "matching hotels. All links point to hotel websites directly — never to "
114
+ "travel agencies like Expedia or Booking.com.*"
115
+ )
116
+
117
+ if __name__ == "__main__":
118
+ demo.launch()
docs/synthesizer_report.md ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Synthesizer Report — Hotel Search App
2
+
3
+ ## 1. Final Review Summary
4
+
5
+ The Hotel Search App has been designed (Architect), built (Builder), and quality-tested (QA) following the 4-agent development process. This Synthesizer report provides the final review, covering documentation completeness, known bugs, and recommendations for future versions.
6
+
7
+ ## 2. Deliverables Produced
8
+
9
+ | Deliverable | File | Status |
10
+ |------------|------|--------|
11
+ | Architecture plan | `plan.md` | ✅ Complete |
12
+ | Main application | `app.py` | ✅ Complete |
13
+ | Search orchestrator | `search.py` | ✅ Complete |
14
+ | Utilities | `utils.py` | ✅ Complete |
15
+ | Dependencies | `requirements.txt` | ✅ Complete |
16
+ | HF Spaces config | `README.md` | ✅ Complete |
17
+ | QA report | `qa_report.md` | ✅ Complete |
18
+ | User guide | `docs/user_guide.md` | ✅ Complete |
19
+ | Synthesizer report | `docs/synthesizer_report.md` | ✅ This document |
20
+
21
+ ## 3. Known Bugs & Limitations
22
+
23
+ ### 3.1 Known Bugs
24
+
25
+ | # | Bug | Severity | Workaround |
26
+ |---|-----|----------|------------|
27
+ | B1 | Feature matching uses simple keyword overlap — "free parking" might match a hotel whose snippet says "parking garage nearby ($15/day)" because "parking" matches | Medium | Users should check hotel websites directly for feature confirmation |
28
+ | B2 | If the LLM returns a location the user didn't intend (e.g., user says "near the beach" and LLM guesses "Miami Beach"), the search may target the wrong area | Low | Check the "How I Interpreted Your Request" section and rephrase if needed |
29
+ | B3 | Some SerpAPI hotel results may have `None` links, resulting in hotel cards without clickable links | Low | These hotels still appear with their name and description for reference |
30
+
31
+ ### 3.2 Known Limitations
32
+
33
+ | # | Limitation | Impact |
34
+ |---|-----------|--------|
35
+ | L1 | No real-time availability checking — the app searches the web but cannot verify room availability | Hotels shown may be fully booked |
36
+ | L2 | Price data depends on what appears in search results — many hotels don't show prices in snippets | Some results will show "N/A" for price |
37
+ | L3 | Features like Wi-Fi, TV, air conditioning are universal but rarely appear in hotel marketing — requiring them hurts match scores | Users should not require common features |
38
+ | L4 | The blocked-domain list is static — new travel agencies won't be automatically blocked | Periodic manual updates needed |
39
+ | L5 | No session memory — each search is independent; the app doesn't remember previous searches | Users must retype for modified searches |
40
+ | L6 | Results limited to top 10 — SerpAPI returns ~20 results and the UI shows the top 10 after ranking | Users with very specific needs may miss edge-case matches |
41
+
42
+ ## 4. Bugs That Could Be Fixed in Future Versions
43
+
44
+ ### Fix 1: Semantic Feature Matching (for B1)
45
+ Replace the current keyword-matching heuristic in `_feature_present()` with a semantic similarity check using sentence embeddings (e.g., `sentence-transformers`). This would correctly match "complimentary self-parking" to the feature "free parking" and reject "paid parking garage."
46
+
47
+ ### Fix 2: User Confirmation of Parsed Intent (for B2)
48
+ Add a two-step flow: first show the user how their request was parsed, let them correct it, then execute the search. This adds friction but eliminates misinterpretation.
49
+
50
+ ### Fix 3: Hotel Website Discovery (for B3)
51
+ When a hotel result has no direct link, perform a secondary search for "[Hotel Name] official website" to find and verify the hotel's own domain.
52
+
53
+ ## 5. Recommended Future Enhancements
54
+
55
+ ### Priority 1 (High Value, Moderate Effort)
56
+ 1. **Caching layer** — Cache SerpAPI results for identical queries to reduce API costs and improve response time. A simple TTL-based dict or Redis cache would work.
57
+ 2. **Loading indicator** — Add a Gradio progress bar or status message during the search (which can take 5-10 seconds).
58
+ 3. **Dynamic blocked-domain list** — Load blocked domains from a configurable text file or database instead of hardcoding.
59
+
60
+ ### Priority 2 (High Value, Higher Effort)
61
+ 4. **Map view** — Display hotel locations on an interactive map using `folium` or Gradio's built-in Plot component.
62
+ 5. **Image previews** — Show hotel thumbnail images from search results in the output cards.
63
+ 6. **Multi-query comparison** — Allow users to compare results from multiple searches side by side.
64
+
65
+ ### Priority 3 (Nice to Have)
66
+ 7. **Search history** — Store recent searches in the session so users can refine without retyping.
67
+ 8. **Export to PDF/CSV** — Let users download their search results.
68
+ 9. **Multi-language support** — Accept hotel requests in languages other than English.
69
+ 10. **Review integration** — Pull star ratings and review snippets from hotel review sites.
70
+
71
+ ## 6. Deployment Checklist
72
+
73
+ - [ ] Create Hugging Face Space (Gradio SDK)
74
+ - [ ] Upload all project files
75
+ - [ ] Set `OPENAI_API_KEY` as Space secret
76
+ - [ ] Set `SERPAPI_API_KEY` as Space secret
77
+ - [ ] Verify the app loads and example queries work
78
+ - [ ] Test with various input types (vague, specific, edge cases)
79
+
80
+ ## 7. Final Assessment
81
+
82
+ The Hotel Search App successfully meets all 10 requirements from the architecture plan. It provides a unique value proposition: **free-form natural language hotel search with direct hotel links and smart must-have vs. nice-to-have ranking.** The 4-agent development process (Architect → Builder → QA → Synthesizer) ensured systematic design, quality implementation, thorough testing, and complete documentation.
83
+
84
+ The app is ready for deployment to Hugging Face Spaces.
docs/user_guide.md ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Hotel Search App — User Guide
2
+
3
+ ## What This App Does
4
+
5
+ The Hotel Search App lets you find hotels by describing what you want in plain, everyday English. Instead of filling out forms with checkboxes and dropdowns, you simply type a sentence or paragraph about your ideal hotel. The app uses AI to understand your request, searches the web for matching hotels, and presents ranked results with direct links to each hotel's own website.
6
+
7
+ **What makes this app different from Expedia, Booking.com, or Google Travel?**
8
+
9
+ 1. **Free-form input** — You write in natural language, not forms.
10
+ 2. **Direct hotel links only** — Every link goes to the hotel's own website, never a travel agency.
11
+ 3. **Smart ranking** — The app distinguishes between what you *must have* and what you'd *like to have*, and ranks hotels accordingly.
12
+
13
+ ---
14
+
15
+ ## How to Use the App
16
+
17
+ ### Step 1: Describe Your Ideal Hotel
18
+
19
+ Type into the text box. Include as many of these details as you can:
20
+
21
+ - **Location** — Where do you want to stay? (city, neighborhood, near a landmark)
22
+ - **Dates** — When are you checking in and out?
23
+ - **Budget** — What's your maximum price per night?
24
+ - **Required features** — What does the hotel absolutely *must* have?
25
+ - **Preferred features** — What would be *nice to have* but isn't a deal-breaker?
26
+
27
+ ### Step 2: Click "Search Hotels"
28
+
29
+ Or press Enter. The app will:
30
+ 1. Parse your text using AI.
31
+ 2. Search the web for matching hotels.
32
+ 3. Score and rank the results.
33
+ 4. Display them as cards with match scores.
34
+
35
+ ### Step 3: Review Results
36
+
37
+ Each hotel card shows:
38
+ - **Match Score** (0–100) — How well the hotel matches your request.
39
+ - **Price** (when available).
40
+ - **Rating** (when available).
41
+ - **Which of your required features matched.**
42
+ - **Which required features are missing.**
43
+ - **Which preferred features matched.**
44
+ - **A direct link to the hotel's website.**
45
+
46
+ At the bottom, you'll see a "How I Interpreted Your Request" section showing exactly how the AI understood your input.
47
+
48
+ ---
49
+
50
+ ## Good Examples (Do This)
51
+
52
+ ### Example 1: Specific Location + Budget + Clear Requirements
53
+ > "I'm looking for a beachfront hotel in Miami, Florida for March 15-18, 2026. I need free parking and it must be under $200 per night. A pool and spa would be nice but aren't required."
54
+
55
+ **Why this works well:**
56
+ - Clear location (Miami, Florida)
57
+ - Specific dates (March 15-18, 2026)
58
+ - Explicit budget ($200/night)
59
+ - Clear distinction: "I need" (required) vs "would be nice" (preferred)
60
+
61
+ ### Example 2: Clear Priorities with "Must" and "Ideally"
62
+ > "Find me a pet-friendly hotel in downtown Austin, Texas for next weekend. Must have free breakfast. Ideally has a rooftop bar and is walkable to live music venues. Budget around $150/night."
63
+
64
+ **Why this works well:**
65
+ - Uses "must have" for the one non-negotiable amenity
66
+ - Uses "ideally" for nice-to-have features
67
+ - Includes budget and location
68
+
69
+ ### Example 3: Luxury with Specific Needs
70
+ > "I need a luxury hotel in Manhattan, New York for 2 guests, April 5-8, 2026. Must have a fitness center and concierge service. Would prefer a room with a city view and a hotel restaurant."
71
+
72
+ **Why this works well:**
73
+ - Specifies guest count
74
+ - "Must have" vs "Would prefer" makes ranking accurate
75
+ - Specific dates help narrow results
76
+
77
+ ---
78
+
79
+ ## Bad Examples (Avoid This)
80
+
81
+ ### Bad Example 1: Too Vague
82
+ > "I need a hotel."
83
+
84
+ **Why this fails:**
85
+ - No location — the app doesn't know where to search.
86
+ - No dates, budget, or features — nothing to rank against.
87
+ - **Fix:** At minimum, include a city: "I need a hotel in Chicago."
88
+
89
+ ### Bad Example 2: Requiring Features Rarely Listed in Descriptions
90
+ > "Must have Wi-Fi, must have a TV in the room, must have running water."
91
+
92
+ **Why this fails:**
93
+ - Wi-Fi, TVs, and running water are *universal* in modern hotels but are almost never mentioned in hotel descriptions or search results.
94
+ - Since the app matches features against hotel descriptions, requiring these will lower match scores for every hotel.
95
+ - **Fix:** Don't require features that are standard everywhere. Focus on differentiating amenities like "pool," "spa," "free parking," or "beachfront."
96
+
97
+ ### Bad Example 3: No Distinction Between Must-Have and Nice-to-Have
98
+ > "I want a hotel in San Francisco with a pool, gym, restaurant, bar, spa, room service, concierge, valet parking, ocean view, and rooftop terrace."
99
+
100
+ **Why this fails:**
101
+ - Everything is treated as required (since there are no qualifiers like "ideally" or "would be nice").
102
+ - Very few hotels will match ALL of these features, resulting in low match scores across the board.
103
+ - **Fix:** Separate your must-haves from nice-to-haves:
104
+ > "Hotel in San Francisco. Must have a pool and gym. Would be nice to have a spa, ocean view, and rooftop terrace."
105
+
106
+ ### Bad Example 4: Including Travel Agency Preferences
107
+ > "Find me a hotel on Expedia under $100."
108
+
109
+ **Why this fails:**
110
+ - This app specifically avoids travel agencies. It finds hotel websites directly.
111
+ - Mentioning "Expedia" confuses the search.
112
+ - **Fix:** Just state your budget and location directly.
113
+
114
+ ---
115
+
116
+ ## Understanding Match Scores
117
+
118
+ | Score Range | Meaning |
119
+ |-------------|---------|
120
+ | 80–100 | Excellent match — most or all requirements met |
121
+ | 60–79 | Good match — some requirements met, most preferences met |
122
+ | 40–59 | Fair match — base score, few specific features confirmed |
123
+ | 0–39 | Poor match — multiple required features missing |
124
+
125
+ **How scoring works:**
126
+ - Each hotel starts at 50 points (base score).
127
+ - **+10 points** for each matched required feature.
128
+ - **+3 points** for each matched preferred feature.
129
+ - **-5 points** for each missing required feature.
130
+ - **+5 points** if price is within budget.
131
+ - **-10 points** if price exceeds budget.
132
+
133
+ ---
134
+
135
+ ## Tips for Best Results
136
+
137
+ 1. **Be specific about location.** "Downtown Chicago near Millennium Park" beats "somewhere in Illinois."
138
+ 2. **Include dates** when you have them — it helps narrow results.
139
+ 3. **Set a clear budget** — say "under $150/night" or "budget-friendly."
140
+ 4. **Use signal words:**
141
+ - For requirements: "must have," "need," "require," "essential"
142
+ - For preferences: "would be nice," "prefer," "ideally," "bonus if"
143
+ 5. **Limit required features to 2–4.** Too many requirements = fewer results.
144
+ 6. **Don't require universal features.** Skip Wi-Fi, AC, towels — they're everywhere but rarely listed.
145
+
146
+ ---
147
+
148
+ ## Troubleshooting
149
+
150
+ | Problem | Cause | Solution |
151
+ |---------|-------|----------|
152
+ | "No hotels found" | Too many required features or very niche request | Relax requirements; move some to preferences |
153
+ | Low match scores | Features not mentioned in hotel descriptions | Use broader terms (e.g., "pool" not "Olympic-size heated saltwater pool") |
154
+ | No prices shown | Price not available in search results | Check the hotel's website directly via the link |
155
+ | "API key not configured" | Missing environment variables | Set `OPENAI_API_KEY` and `SERPAPI_API_KEY` as environment variables or HF Space secrets |
156
+ | Results seem off | AI misinterpreted the request | Check the "How I Interpreted Your Request" section and rephrase |
157
+
158
+ ---
159
+
160
+ ## Privacy & Data
161
+
162
+ - Your search text is sent to OpenAI's API for parsing (not stored by this app).
163
+ - Hotel searches are performed via SerpAPI (Google Search).
164
+ - No personal data is collected or stored by this application.
plan.md ADDED
@@ -0,0 +1,192 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Hotel Search App — Architecture Plan
2
+
3
+ ## 1. Overview
4
+
5
+ A Gradio-based web application deployed on Hugging Face Spaces that lets users describe their ideal hotel in free-form natural language. The app parses the user's text to identify **required** ("must-have") and **preferred** ("nice-to-have") features, then searches for matching hotels and returns ranked results with direct links to each hotel's own website — never travel-agency links.
6
+
7
+ ## 2. Key Requirements
8
+
9
+ | # | Requirement | Priority |
10
+ |---|-------------|----------|
11
+ | R1 | Accept free-form text input describing hotel preferences | Must |
12
+ | R2 | Distinguish between **required** features and **preferred** features | Must |
13
+ | R3 | Extract structured fields: dates, location, price range, amenities | Must |
14
+ | R4 | Search for hotels matching the extracted criteria | Must |
15
+ | R5 | Rank results: required features first, then preferred features as tie-breakers | Must |
16
+ | R6 | Return hotel name, description, price estimate, match score, and **direct hotel website link** | Must |
17
+ | R7 | Never show travel-agency links (Expedia, Booking.com, etc.) | Must |
18
+ | R8 | Show a clear message when no hotels match required features | Must |
19
+ | R9 | Provide example queries in the UI | Nice |
20
+ | R10 | Deploy on Hugging Face Spaces via Gradio | Must |
21
+
22
+ ## 3. Architecture
23
+
24
+ ```
25
+ ┌─────────────────────────────────────────────────────┐
26
+ │ Gradio UI (app.py) │
27
+ │ ┌───────────────┐ ┌──────────────────────────┐ │
28
+ │ │ Text Input │ │ Results Display │ │
29
+ │ │ (free-form) │ │ (Markdown table/cards) │ │
30
+ │ └───────┬───────┘ └──────────▲───────────────┘ │
31
+ │ │ │ │
32
+ │ ▼ │ │
33
+ │ ┌───────────────────────────────┴──────┐ │
34
+ │ │ Orchestrator (search.py) │ │
35
+ │ │ │ │
36
+ │ │ 1. parse_request() ──► LLM / NLP │ │
37
+ │ │ 2. search_hotels() ──► SerpAPI / │ │
38
+ │ │ Google Search │ │
39
+ │ │ 3. rank_results() │ │
40
+ │ │ 4. format_output() │ │
41
+ │ └───────────────────────────────────────┘ │
42
+ └─────────────────────────────────────────────────────┘
43
+ ```
44
+
45
+ ### 3.1 Components
46
+
47
+ #### A. `app.py` — Gradio Interface
48
+ - Single-page Gradio app with:
49
+ - **Input**: `gr.Textbox` for free-form hotel request.
50
+ - **Output**: `gr.Markdown` displaying ranked hotel results as formatted cards.
51
+ - **Examples**: Pre-filled example queries demonstrating good usage.
52
+ - Launched with `gr.Blocks` for layout control.
53
+
54
+ #### B. `search.py` — Core Search Orchestrator
55
+ Four-stage pipeline:
56
+
57
+ 1. **`parse_request(text) → dict`**
58
+ Uses an LLM (OpenAI GPT via API) to extract structured data from free-form text:
59
+ ```json
60
+ {
61
+ "location": "Miami, FL",
62
+ "check_in": "2026-03-15",
63
+ "check_out": "2026-03-18",
64
+ "required_features": ["beachfront", "under $200/night", "free parking"],
65
+ "preferred_features": ["pool", "spa", "pet-friendly"],
66
+ "max_price": 200,
67
+ "guests": 2
68
+ }
69
+ ```
70
+
71
+ 2. **`search_hotels(parsed) → list[dict]`**
72
+ Builds a targeted Google search query from the parsed fields and uses SerpAPI (or fallback scraping) to retrieve hotel results. Filters out travel-agency domains.
73
+
74
+ 3. **`rank_results(hotels, parsed) → list[dict]`**
75
+ Scores each hotel:
76
+ - +10 points per matched **required** feature
77
+ - +3 points per matched **preferred** feature
78
+ - Penalize if required features are missing
79
+ - Sort descending by score
80
+
81
+ 4. **`format_output(ranked) → str`**
82
+ Produces Markdown output with hotel cards including:
83
+ - Hotel name (linked to hotel's own website)
84
+ - Location
85
+ - Price (if available)
86
+ - Match score breakdown
87
+ - Matched/unmatched features
88
+
89
+ #### C. `utils.py` — Utility Functions
90
+ - Domain filtering (block travel-agency domains)
91
+ - URL validation
92
+ - Price extraction helpers
93
+
94
+ ## 4. Technology Stack
95
+
96
+ | Component | Technology |
97
+ |-----------|-----------|
98
+ | UI Framework | Gradio 4.x |
99
+ | LLM for parsing | Mistral-7B-Instruct via Hugging Face Inference API (`huggingface_hub`) |
100
+ | Hotel search | SerpAPI Google Search API |
101
+ | Deployment | Hugging Face Spaces |
102
+ | Language | Python 3.10+ |
103
+
104
+ ## 5. API Keys Required
105
+
106
+ - `HF_TOKEN` — Hugging Face token for the Inference API (LLM-based text parsing)
107
+ - `SERPAPI_API_KEY` — for Google hotel search results
108
+
109
+ These will be stored as Hugging Face Space secrets (environment variables).
110
+
111
+ ## 6. Blocked Domains (Travel Agencies)
112
+
113
+ The following domains will be filtered from results to ensure only direct hotel links appear:
114
+
115
+ ```
116
+ expedia.com, booking.com, hotels.com, trivago.com,
117
+ kayak.com, priceline.com, orbitz.com, travelocity.com,
118
+ agoda.com, trip.com, hotwire.com, cheaptickets.com,
119
+ tripadvisor.com, google.com/travel, bing.com/travel
120
+ ```
121
+
122
+ ## 7. User Input Parsing Strategy
123
+
124
+ The LLM prompt will instruct the model to:
125
+ 1. Identify the **location** (city, state, country, region).
126
+ 2. Identify **dates** (check-in, check-out or general timeframe).
127
+ 3. Identify **price constraints** (max nightly rate, budget tier).
128
+ 4. Separate amenities/features into **required** vs **preferred**:
129
+ - Words like "must have", "need", "require", "essential" → required
130
+ - Words like "would be nice", "prefer", "ideally", "hope for" → preferred
131
+ - If no qualifier is given, default to **required** for core constraints (location, dates, price) and **required** for amenities (since the assignment specifies that the distinction matters and unqualified features should be treated as requirements).
132
+ 5. Identify number of guests if mentioned.
133
+
134
+ ## 8. Output Format
135
+
136
+ Each result card will look like:
137
+
138
+ ```
139
+ ### 🏨 Hotel Name
140
+ 📍 Location | 💰 ~$XXX/night | ⭐ Match Score: 85/100
141
+
142
+ **Matched Required Features:** beachfront, free parking
143
+ **Matched Preferred Features:** pool, spa
144
+ **Missing Required Features:** —
145
+
146
+ 🔗 [Visit Hotel Website](https://www.hotelname.com)
147
+
148
+ ---
149
+ ```
150
+
151
+ If no hotels match required features, display:
152
+ ```
153
+ ⚠️ No hotels found matching all your required features.
154
+
155
+ Try relaxing some requirements or marking them as preferences instead.
156
+ Your required features were: [list]
157
+ ```
158
+
159
+ ## 9. File Structure
160
+
161
+ ```
162
+ _Assignment_Week6/
163
+ ├── app.py # Gradio app entry point
164
+ ├── search.py # Core search orchestrator
165
+ ├── utils.py # Utility functions
166
+ ├── requirements.txt # Python dependencies
167
+ ├── plan.md # This architecture document
168
+ ├── README.md # HF Spaces readme + user guide
169
+ └── docs/
170
+ └── user_guide.md # Detailed user guide (Synthesizer output)
171
+ ```
172
+
173
+ ## 10. Error Handling
174
+
175
+ - **No API key**: Show friendly message asking user to configure keys.
176
+ - **API rate limit**: Graceful degradation with cached/sample results.
177
+ - **No results**: Clear message explaining why and suggesting modifications.
178
+ - **Ambiguous input**: Ask for clarification or show best-effort results with a note.
179
+
180
+ ## 11. Limitations & Future Enhancements
181
+
182
+ ### Known Limitations
183
+ - Real-time hotel availability/pricing depends on search API accuracy.
184
+ - Very niche features (e.g., "rooftop beekeeping") may not appear in any hotel description.
185
+ - Wi-fi is so common it's rarely mentioned in descriptions, so requiring it may filter out hotels that actually have it.
186
+
187
+ ### Future Enhancements
188
+ - Add map view of hotel locations.
189
+ - Support multiple languages.
190
+ - Add image previews of hotels.
191
+ - Integrate a booking calendar.
192
+ - Cache frequent searches for speed.
qa_report.md ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # QA Report — Hotel Search App
2
+
3
+ ## Plan-vs-Build Compliance Matrix
4
+
5
+ | # | Requirement | Plan | Build | Status |
6
+ |---|-------------|------|-------|--------|
7
+ | R1 | Free-form text input | `gr.Textbox` | `gr.Textbox`, 5 lines, placeholder | ✅ Pass |
8
+ | R2 | Distinguish required vs preferred features | LLM parsing with keyword rules | `PARSE_SYSTEM_PROMPT` classifies by qualifier words | ✅ Pass |
9
+ | R3 | Extract dates, location, price, amenities | LLM → JSON schema | `parse_request()` → structured dict | ✅ Pass |
10
+ | R4 | Search for matching hotels | SerpAPI Google Search | `search_hotels()` via SerpAPI | ✅ Pass |
11
+ | R5 | Rank by required first, preferred as tie-breaker | +10 req, +3 pref, -5 missing | `rank_results()` implements exact scoring | ✅ Pass |
12
+ | R6 | Return name, description, price, score, direct link | Markdown cards | `format_output()` produces cards | ✅ Pass |
13
+ | R7 | Never show travel-agency links | Block list in utils.py | `BLOCKED_DOMAINS` set (40+ domains) | ✅ Pass |
14
+ | R8 | Clear message when no results | Warning with suggestions | `format_output()` handles empty list | ✅ Pass |
15
+ | R9 | Example queries in UI | Pre-filled examples | 5 examples in `EXAMPLES` list | ✅ Pass |
16
+ | R10 | Deploy on HF Spaces via Gradio | Gradio Blocks | `gr.Blocks` + HF README metadata | ✅ Pass |
17
+
18
+ ## Architecture Compliance
19
+
20
+ | Component | Plan | Build | Match |
21
+ |-----------|------|-------|-------|
22
+ | `app.py` | Gradio Blocks, Textbox, Markdown output, Examples | Implemented exactly | ✅ |
23
+ | `search.py` | 4-stage pipeline (parse → search → rank → format) | All 4 stages present | ✅ |
24
+ | `utils.py` | Domain filtering, URL validation, price extraction | All implemented | ✅ |
25
+ | `requirements.txt` | gradio, openai, serpapi | All listed with versions | ✅ |
26
+
27
+ ## Issues Found and Resolved
28
+
29
+ ### Issue 1: Travel Agency Filter Gap (FIXED)
30
+ - **Severity:** Medium
31
+ - **Description:** Hotels from SerpAPI's `hotels_results` block were not filtered through `is_travel_agency()`, only organic results were filtered.
32
+ - **Fix:** Added `is_travel_agency(link)` check to the hotels_results loop in `search_hotels()`.
33
+
34
+ ### Issue 2: Plan-Build Discrepancy on Default Feature Classification (FIXED)
35
+ - **Severity:** Low
36
+ - **Description:** Plan Section 7 said unqualified amenities default to "preferred," but the LLM prompt (correctly) treats them as "required." The assignment description supports the "required" default.
37
+ - **Fix:** Updated plan.md to match the build behavior.
38
+
39
+ ## Code Quality Assessment
40
+
41
+ | Aspect | Rating | Notes |
42
+ |--------|--------|-------|
43
+ | Error handling | Good | API key checks, try/except blocks, graceful fallbacks |
44
+ | Input validation | Good | Empty input check, JSON parse fallback |
45
+ | Separation of concerns | Excellent | Clean split across app.py / search.py / utils.py |
46
+ | User experience | Good | Tips panel, example queries, interpretation display |
47
+ | Security | Good | API keys via env vars, not hardcoded |
48
+ | Code readability | Excellent | Clear docstrings, logical section dividers |
49
+
50
+ ## Edge Cases Considered
51
+
52
+ - ✅ Empty input → friendly prompt message
53
+ - ✅ Missing API keys → clear error with setup instructions
54
+ - ✅ LLM returns invalid JSON → fallback to raw text as location
55
+ - ✅ SerpAPI error → error message displayed
56
+ - ✅ No matching hotels → suggestions for broadening search
57
+ - ✅ Duplicate hotel names → deduplicated
58
+
59
+ ## Recommendations
60
+
61
+ 1. Future versions could add caching for repeated searches (mentioned in plan but not yet implemented).
62
+ 2. Consider adding a loading indicator/progress bar for long searches.
63
+ 3. The keyword-matching heuristic in `_feature_present()` could be enhanced with semantic similarity.
64
+
65
+ ## QA Verdict: ✅ PASS
66
+
67
+ The build faithfully implements the architecture plan. All 10 requirements are met.
68
+ Two minor issues were found and fixed during QA review.
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ gradio>=4.0
2
+ huggingface_hub>=0.30.0
3
+ requests>=2.31.0
4
+ python-dotenv>=1.0.0
search.py ADDED
@@ -0,0 +1,520 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Core search orchestrator for the Hotel Search App.
2
+
3
+ Pipeline:
4
+ 1. parse_request() — LLM extracts structured data from free-form text
5
+ 2. search_hotels() — SerpAPI Google Hotels engine retrieves hotel results
6
+ 3. rank_results() — score and rank by required/preferred match
7
+ 4. format_output() — produce Markdown cards for Gradio display
8
+ """
9
+
10
+ import json
11
+ import os
12
+ import re
13
+ from datetime import datetime, timedelta
14
+
15
+ import requests
16
+ from huggingface_hub import InferenceClient
17
+
18
+ from utils import is_travel_agency, extract_price_from_text
19
+
20
+ SERPAPI_URL = "https://serpapi.com/search.json"
21
+
22
+ # ---------------------------------------------------------------------------
23
+ # 1. Parse the user's free-form request with an LLM
24
+ # ---------------------------------------------------------------------------
25
+
26
+ PARSE_SYSTEM_PROMPT = """\
27
+ You are a hotel-search assistant. The user will describe what kind of hotel they want
28
+ in free-form English. Your job is to extract structured information and return ONLY
29
+ valid JSON with the following schema (omit keys whose value would be null):
30
+
31
+ {
32
+ "location": "<city, state/country or region>",
33
+ "check_in": "<YYYY-MM-DD or null>",
34
+ "check_out": "<YYYY-MM-DD or null>",
35
+ "max_price": <number or null>,
36
+ "guests": <number or null>,
37
+ "required_features": ["list", "of", "must-have features"],
38
+ "preferred_features": ["list", "of", "nice-to-have features"]
39
+ }
40
+
41
+ Rules for classifying features:
42
+ - Words/phrases like "must", "need", "require", "essential", "has to have",
43
+ "mandatory", "only if" → put in required_features.
44
+ - Words/phrases like "would be nice", "prefer", "ideally", "hope", "bonus",
45
+ "if possible", "optional" → put in preferred_features.
46
+ - Location, dates, and explicit price caps are always required constraints (put them
47
+ in their own top-level keys, NOT in required_features).
48
+ - If the user gives no qualifier for an amenity, treat it as a **required** feature.
49
+ - Keep feature strings short and descriptive (e.g. "pool", "free parking",
50
+ "beachfront", "pet-friendly").
51
+ - Return ONLY the JSON object. No markdown, no explanation.
52
+ """
53
+
54
+
55
+ def parse_request(user_text: str) -> dict:
56
+ """Use an LLM to turn free-form text into structured hotel search parameters."""
57
+ hf_token = os.environ.get("HF_TOKEN", "")
58
+ client = InferenceClient(token=hf_token)
59
+ response = client.chat_completion(
60
+ model="Qwen/Qwen2.5-72B-Instruct",
61
+ messages=[
62
+ {"role": "system", "content": PARSE_SYSTEM_PROMPT},
63
+ {"role": "user", "content": user_text},
64
+ ],
65
+ max_tokens=512,
66
+ temperature=0.1,
67
+ )
68
+ raw = response.choices[0].message.content.strip()
69
+ raw = re.sub(r"^```(?:json)?\s*", "", raw)
70
+ raw = re.sub(r"\s*```$", "", raw)
71
+ try:
72
+ parsed = json.loads(raw)
73
+ except json.JSONDecodeError:
74
+ parsed = {
75
+ "location": user_text,
76
+ "required_features": [],
77
+ "preferred_features": [],
78
+ }
79
+ parsed.setdefault("required_features", [])
80
+ parsed.setdefault("preferred_features", [])
81
+ return parsed
82
+
83
+
84
+ # ---------------------------------------------------------------------------
85
+ # 2. Search for hotels using SerpAPI (Google Hotels engine + fallback)
86
+ # ---------------------------------------------------------------------------
87
+
88
+ def _default_dates() -> tuple[str, str]:
89
+ """Return default check-in (tomorrow) and check-out (3 days later)."""
90
+ tomorrow = datetime.now() + timedelta(days=1)
91
+ check_in = tomorrow.strftime("%Y-%m-%d")
92
+ check_out = (tomorrow + timedelta(days=3)).strftime("%Y-%m-%d")
93
+ return check_in, check_out
94
+
95
+
96
+ def _search_google_hotels(parsed: dict, api_key: str) -> list[dict]:
97
+ """Use SerpAPI's dedicated Google Hotels engine for structured results."""
98
+ location = parsed.get("location", "hotels")
99
+
100
+ check_in = parsed.get("check_in")
101
+ check_out = parsed.get("check_out")
102
+ if not check_in or not check_out:
103
+ default_in, default_out = _default_dates()
104
+ check_in = check_in or default_in
105
+ check_out = check_out or default_out
106
+
107
+ params = {
108
+ "engine": "google_hotels",
109
+ "q": location,
110
+ "check_in_date": check_in,
111
+ "check_out_date": check_out,
112
+ "api_key": api_key,
113
+ "gl": "us",
114
+ "hl": "en",
115
+ "currency": "USD",
116
+ }
117
+ if parsed.get("guests"):
118
+ params["adults"] = int(parsed["guests"])
119
+ if parsed.get("max_price"):
120
+ params["max_price"] = int(parsed["max_price"])
121
+
122
+ resp = requests.get(SERPAPI_URL, params=params, timeout=30)
123
+ data = resp.json()
124
+
125
+ if "error" in data:
126
+ return [{"error": data["error"]}]
127
+
128
+ hotels = []
129
+ for prop in data.get("properties", []):
130
+ website = prop.get("website") or ""
131
+ gmap_link = prop.get("link") or ""
132
+
133
+ link = website if website and not is_travel_agency(website) else ""
134
+ if not link:
135
+ link = gmap_link
136
+
137
+ amenities = prop.get("amenities", [])
138
+ rate = prop.get("rate_per_night", {})
139
+
140
+ images = prop.get("images", [])
141
+ thumbnail = ""
142
+ if images and isinstance(images[0], dict):
143
+ thumbnail = images[0].get("thumbnail", "") or images[0].get("original_image", "")
144
+ elif images and isinstance(images[0], str):
145
+ thumbnail = images[0]
146
+
147
+ hotel = {
148
+ "name": prop.get("name", "Unknown Hotel"),
149
+ "link": link,
150
+ "thumbnail": thumbnail,
151
+ "address": prop.get("address", ""),
152
+ "property_token": prop.get("property_token", ""),
153
+ "snippet": prop.get("description", "") or ", ".join(amenities[:6]),
154
+ "price": rate.get("lowest"),
155
+ "price_value": rate.get("extracted_lowest"),
156
+ "rating": prop.get("overall_rating"),
157
+ "reviews": prop.get("reviews"),
158
+ "amenities": amenities,
159
+ "source": "google_hotels",
160
+ }
161
+ hotels.append(hotel)
162
+
163
+ return hotels
164
+
165
+
166
+ def _fetch_hotel_address(property_token: str, api_key: str, query: str,
167
+ check_in: str, check_out: str) -> str:
168
+ """Fetch the street address for a single hotel via property details."""
169
+ params = {
170
+ "engine": "google_hotels",
171
+ "q": query,
172
+ "property_token": property_token,
173
+ "check_in_date": check_in,
174
+ "check_out_date": check_out,
175
+ "api_key": api_key,
176
+ "gl": "us",
177
+ "hl": "en",
178
+ "currency": "USD",
179
+ }
180
+ try:
181
+ resp = requests.get(SERPAPI_URL, params=params, timeout=15)
182
+ data = resp.json()
183
+ return data.get("address", "") or data.get("location", {}).get("address", "")
184
+ except Exception:
185
+ return ""
186
+
187
+
188
+ def _enrich_addresses(hotels: list[dict], parsed: dict, api_key: str) -> None:
189
+ """Fill in missing addresses by calling property details for top results."""
190
+ location = parsed.get("location", "hotels")
191
+ check_in = parsed.get("check_in")
192
+ check_out = parsed.get("check_out")
193
+ if not check_in or not check_out:
194
+ default_in, default_out = _default_dates()
195
+ check_in = check_in or default_in
196
+ check_out = check_out or default_out
197
+
198
+ for hotel in hotels[:10]:
199
+ if hotel.get("address") or not hotel.get("property_token"):
200
+ continue
201
+ addr = _fetch_hotel_address(
202
+ hotel["property_token"], api_key, location, check_in, check_out
203
+ )
204
+ if addr:
205
+ hotel["address"] = addr
206
+
207
+
208
+ def search_hotels(parsed: dict) -> list[dict]:
209
+ """Query SerpAPI Google Hotels engine and return hotel result dicts."""
210
+ api_key = os.environ.get("SERPAPI_KEY", "")
211
+
212
+ try:
213
+ hotels = _search_google_hotels(parsed, api_key)
214
+ except Exception as exc:
215
+ return [{"error": f"Google Hotels search failed: {exc}"}]
216
+
217
+ # Separate real results from errors
218
+ errors = [h["error"] for h in hotels if h.get("error")]
219
+ real = [h for h in hotels if not h.get("error")]
220
+
221
+ if not real and errors:
222
+ return [{"error": "; ".join(errors)}]
223
+
224
+ # Deduplicate by name
225
+ seen: set[str] = set()
226
+ unique: list[dict] = []
227
+ for h in real:
228
+ norm = h["name"].lower().strip()
229
+ if norm not in seen:
230
+ seen.add(norm)
231
+ unique.append(h)
232
+
233
+ # Fetch addresses for hotels that don't have one yet
234
+ _enrich_addresses(unique, parsed, api_key)
235
+
236
+ return unique
237
+
238
+
239
+ # ---------------------------------------------------------------------------
240
+ # 3. Rank the results
241
+ # ---------------------------------------------------------------------------
242
+
243
+ def _feature_present(feature: str, hotel: dict) -> bool:
244
+ """Heuristic check: is `feature` mentioned in the hotel's data?"""
245
+ feature_lower = feature.lower()
246
+ amenities_str = " ".join(hotel.get("amenities", [])).lower()
247
+ searchable = " ".join([
248
+ hotel.get("name", ""),
249
+ hotel.get("snippet", ""),
250
+ str(hotel.get("price", "")),
251
+ amenities_str,
252
+ ]).lower()
253
+ keywords = re.split(r"[\s,\-/]+", feature_lower)
254
+ return any(kw in searchable for kw in keywords if len(kw) > 2)
255
+
256
+
257
+ def rank_results(hotels: list[dict], parsed: dict) -> list[dict]:
258
+ """Score and sort hotels by how well they match the parsed requirements."""
259
+ required = parsed.get("required_features", [])
260
+ preferred = parsed.get("preferred_features", [])
261
+ max_price = parsed.get("max_price")
262
+
263
+ scored = []
264
+ for hotel in hotels:
265
+ if hotel.get("error"):
266
+ continue
267
+
268
+ score = 50
269
+ matched_req = []
270
+ missing_req = []
271
+ matched_pref = []
272
+
273
+ for feat in required:
274
+ if _feature_present(feat, hotel):
275
+ score += 10
276
+ matched_req.append(feat)
277
+ else:
278
+ score -= 5
279
+ missing_req.append(feat)
280
+
281
+ for feat in preferred:
282
+ if _feature_present(feat, hotel):
283
+ score += 3
284
+ matched_pref.append(feat)
285
+
286
+ if max_price and hotel.get("price_value"):
287
+ if hotel["price_value"] <= max_price:
288
+ score += 5
289
+ else:
290
+ score -= 10
291
+ elif max_price and hotel.get("price"):
292
+ price_val = extract_price_from_text(str(hotel["price"]))
293
+ if price_val and price_val <= max_price:
294
+ score += 5
295
+ elif price_val and price_val > max_price:
296
+ score -= 10
297
+
298
+ hotel["score"] = max(0, min(100, score))
299
+ hotel["matched_required"] = matched_req
300
+ hotel["missing_required"] = missing_req
301
+ hotel["matched_preferred"] = matched_pref
302
+ scored.append(hotel)
303
+
304
+ scored.sort(key=lambda h: h["score"], reverse=True)
305
+ return scored
306
+
307
+
308
+ # ---------------------------------------------------------------------------
309
+ # 4. Format the output as Markdown
310
+ # ---------------------------------------------------------------------------
311
+
312
+ def format_output(ranked_hotels: list[dict], parsed: dict) -> str:
313
+ """Produce a Markdown string with hotel result cards."""
314
+ if not ranked_hotels:
315
+ req_list = ", ".join(parsed.get("required_features", [])) or "none specified"
316
+ return (
317
+ "## No Hotels Found\n\n"
318
+ "No hotels were found matching your requirements.\n\n"
319
+ f"**Your required features were:** {req_list}\n\n"
320
+ "**Suggestions:**\n"
321
+ "- Try a broader location (e.g. city name instead of neighborhood).\n"
322
+ "- Move some required features to preferences "
323
+ "(e.g. say 'ideally has a pool' instead of 'must have a pool').\n"
324
+ "- Some features like Wi-Fi are so common they're rarely listed — "
325
+ "try removing them.\n"
326
+ )
327
+
328
+ location = parsed.get("location", "your destination")
329
+ lines = [f"## Hotel Results for {location}\n"]
330
+ lines.append(f"Found **{len(ranked_hotels)}** hotel(s). "
331
+ "Ranked by match to your requirements.\n\n---\n")
332
+
333
+ for i, hotel in enumerate(ranked_hotels[:10], 1):
334
+ name = hotel.get("name", "Unknown Hotel")
335
+ link = hotel.get("link", "")
336
+ thumbnail = hotel.get("thumbnail", "")
337
+ address = hotel.get("address", "")
338
+ price = hotel.get("price", "N/A")
339
+ rating = hotel.get("rating")
340
+ reviews = hotel.get("reviews")
341
+ score = hotel.get("score", 0)
342
+ snippet = hotel.get("snippet", "")
343
+ amenities = hotel.get("amenities", [])
344
+
345
+ # Hotel name as a clickable link
346
+ if link and not is_travel_agency(link):
347
+ name_html = f'<a href="{link}" target="_blank" style="color: #64b5f6; text-decoration: none;">{name}</a>'
348
+ else:
349
+ name_html = name
350
+
351
+ # Gold stars from rating
352
+ stars_html = ""
353
+ if rating:
354
+ try:
355
+ r = float(rating)
356
+ full = int(r)
357
+ half = 1 if (r - full) >= 0.3 else 0
358
+ empty = 5 - full - half
359
+ stars = ("★" * full) + ("½" if half else "") + ("☆" * empty)
360
+ stars_html = f'<span style="color: goldenrod; font-size: 1.1em;">{stars}</span>'
361
+ except (ValueError, TypeError):
362
+ stars_html = ""
363
+
364
+ # Rating line
365
+ rating_line = ""
366
+ if rating:
367
+ rating_line = f'<b>Rating:</b> {rating} {stars_html}'
368
+ if reviews:
369
+ rating_line += f' ({reviews} reviews)'
370
+
371
+ # Address line
372
+ address_line = ""
373
+ if address:
374
+ address_line = f'<b>Address:</b> {address}'
375
+
376
+ # Price line
377
+ price_line = ""
378
+ if price and price != "N/A":
379
+ price_line = f'<b>Price:</b> <span style="color: white; font-weight: bold;">{price}</span>/night'
380
+
381
+ # Match score line
382
+ score_line = f'<b>Match Score:</b> {score}/100'
383
+
384
+ # Thumbnail
385
+ img_html = ""
386
+ if thumbnail:
387
+ img_html = (
388
+ f'<img src="{thumbnail}" alt="{name}"'
389
+ f' style="width: 180px; height: 130px; object-fit: cover;'
390
+ f' border-radius: 8px; flex-shrink: 0;" />'
391
+ )
392
+
393
+ # Info block (right of photo)
394
+ info_lines = [l for l in [rating_line, price_line, address_line, score_line] if l]
395
+ info_html = "<br>".join(info_lines)
396
+
397
+ # Snippet
398
+ snippet_html = ""
399
+ if snippet:
400
+ snippet_html = (
401
+ f'<div style="margin-top: 8px; color: #aaa; font-style: italic;">'
402
+ f'{snippet[:250]}</div>'
403
+ )
404
+
405
+ # Amenities
406
+ amenities_html = ""
407
+ if amenities:
408
+ items = " &bull; ".join(amenities[:10])
409
+ amenities_html = (
410
+ f'<div style="margin-top: 8px; padding: 8px 12px; '
411
+ f'background: #3a3a3a; border-radius: 8px; line-height: 1.5;">'
412
+ f'<b>Amenities:</b><br>{items}</div>'
413
+ )
414
+
415
+ # Feature match lines
416
+ matched_req = hotel.get("matched_required", [])
417
+ missing_req = hotel.get("missing_required", [])
418
+ matched_pref = hotel.get("matched_preferred", [])
419
+
420
+ features_parts = []
421
+ if matched_req:
422
+ features_parts.append(
423
+ f'<span style="color: green; font-weight: bold;">'
424
+ f'&#10004; Matched Required: {", ".join(matched_req)}</span>'
425
+ )
426
+ if missing_req:
427
+ features_parts.append(
428
+ f'<span style="color: red; font-weight: bold;">'
429
+ f'&#10008; Missing Required: {", ".join(missing_req)}</span>'
430
+ )
431
+ if matched_pref:
432
+ features_parts.append(
433
+ f'<span style="color: green;">'
434
+ f'&#10004; Matched Preferences: {", ".join(matched_pref)}</span>'
435
+ )
436
+ features_html = "<br>".join(features_parts)
437
+ if features_html:
438
+ features_html = f'<div style="margin-top: 8px;">{features_html}</div>'
439
+
440
+ # Assemble the card
441
+ card = f"""
442
+ <div style="border: 1px solid #444; border-radius: 12px; padding: 16px; margin-bottom: 16px; position: relative; background: #2b2b2b; color: #e0e0e0;">
443
+ <div style="font-size: 1.3em; font-weight: bold; margin-bottom: 10px;">
444
+ {i}. {name_html}
445
+ </div>
446
+ <div style="display: flex; gap: 16px; align-items: flex-start;">
447
+ {img_html}
448
+ <div style="flex: 1; min-width: 0;">
449
+ {info_html}
450
+ </div>
451
+ </div>
452
+ {snippet_html}
453
+ {amenities_html}
454
+ {features_html}
455
+ </div>
456
+ """
457
+ lines.append(card)
458
+
459
+ return "\n".join(lines)
460
+
461
+
462
+ # ---------------------------------------------------------------------------
463
+ # Public entry point
464
+ # ---------------------------------------------------------------------------
465
+
466
+ def hotel_search(user_text: str) -> str:
467
+ """End-to-end pipeline: parse -> search -> rank -> format."""
468
+ if not user_text or not user_text.strip():
469
+ return "Please enter a description of the hotel you're looking for."
470
+
471
+ if not os.environ.get("HF_TOKEN"):
472
+ return ("**Hugging Face token not configured.**\n\n"
473
+ "Please set the `HF_TOKEN` environment variable "
474
+ "(or Hugging Face Space secret).")
475
+
476
+ if not os.environ.get("SERPAPI_KEY"):
477
+ return ("**SerpAPI key not configured.**\n\n"
478
+ "Please set the `SERPAPI_KEY` environment variable "
479
+ "(or Hugging Face Space secret).")
480
+
481
+ try:
482
+ parsed = parse_request(user_text)
483
+ except Exception as exc:
484
+ return f"**Error parsing your request:** {exc}"
485
+
486
+ try:
487
+ raw_hotels = search_hotels(parsed)
488
+ except Exception as exc:
489
+ return f"**Error searching for hotels:** {exc}"
490
+
491
+ # Surface API errors to the user instead of silently hiding them
492
+ api_errors = [h["error"] for h in raw_hotels if h.get("error")]
493
+ if api_errors and not any(not h.get("error") for h in raw_hotels):
494
+ return (
495
+ "## Search Error\n\n"
496
+ f"The hotel search API returned an error:\n\n`{api_errors[0]}`\n\n"
497
+ "Please check that the SERPAPI_KEY is valid and try again."
498
+ )
499
+
500
+ ranked = rank_results(raw_hotels, parsed)
501
+ output = format_output(ranked, parsed)
502
+
503
+ interpretation = "\n\n---\n### How I Interpreted Your Request\n"
504
+ interpretation += f"- **Location:** {parsed.get('location', 'Not specified')}\n"
505
+ if parsed.get("check_in"):
506
+ interpretation += f"- **Check-in:** {parsed['check_in']}\n"
507
+ if parsed.get("check_out"):
508
+ interpretation += f"- **Check-out:** {parsed['check_out']}\n"
509
+ if parsed.get("max_price"):
510
+ interpretation += f"- **Max Price:** ${parsed['max_price']}/night\n"
511
+ if parsed.get("guests"):
512
+ interpretation += f"- **Guests:** {parsed['guests']}\n"
513
+ if parsed.get("required_features"):
514
+ interpretation += (f"- **Required Features:** "
515
+ f"{', '.join(parsed['required_features'])}\n")
516
+ if parsed.get("preferred_features"):
517
+ interpretation += (f"- **Preferred Features:** "
518
+ f"{', '.join(parsed['preferred_features'])}\n")
519
+
520
+ return output + interpretation
utils.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Utility functions for the Hotel Search App."""
2
+
3
+ import re
4
+ from urllib.parse import urlparse
5
+
6
+ BLOCKED_DOMAINS = {
7
+ "expedia.com", "booking.com", "hotels.com", "trivago.com",
8
+ "kayak.com", "priceline.com", "orbitz.com", "travelocity.com",
9
+ "agoda.com", "trip.com", "hotwire.com", "cheaptickets.com",
10
+ "tripadvisor.com", "google.com", "bing.com", "momondo.com",
11
+ "skyscanner.com", "makemytrip.com", "goibibo.com", "yatra.com",
12
+ "cleartrip.com", "lonelyplanet.com", "hostelworld.com",
13
+ "hotels.ng", "hrs.com", "destinia.com",
14
+ "travelzoo.com", "smartertravel.com", "travelpod.com",
15
+ "wotif.com", "lastminute.com", "opodo.com", "edreams.com",
16
+ "loveholidays.com", "secretescapes.com", "hotelscombined.com",
17
+ "travelsupermarket.com", "skyscanner.net", "cheapoair.com",
18
+ "onetravel.com", "getaroom.com", "snaptravel.com",
19
+ }
20
+
21
+
22
+ def is_travel_agency(url: str) -> bool:
23
+ """Return True if the URL belongs to a known travel agency or aggregator."""
24
+ try:
25
+ parsed = urlparse(url)
26
+ domain = parsed.netloc.lower().replace("www.", "")
27
+ for blocked in BLOCKED_DOMAINS:
28
+ if blocked in domain:
29
+ return True
30
+ return False
31
+ except Exception:
32
+ return False
33
+
34
+
35
+ def extract_direct_hotel_url(urls: list[str]) -> str | None:
36
+ """From a list of URLs, return the first one that is NOT a travel agency."""
37
+ for url in urls:
38
+ if url and not is_travel_agency(url):
39
+ return url
40
+ return None
41
+
42
+
43
+ def extract_price_from_text(text: str) -> float | None:
44
+ """Try to extract a dollar price from a text string."""
45
+ patterns = [
46
+ r"\$\s?(\d{1,5}(?:\.\d{2})?)",
47
+ r"(\d{1,5})\s*(?:dollars|usd|per night|/night|a night)",
48
+ ]
49
+ for pattern in patterns:
50
+ match = re.search(pattern, text, re.IGNORECASE)
51
+ if match:
52
+ try:
53
+ return float(match.group(1))
54
+ except ValueError:
55
+ continue
56
+ return None
57
+
58
+
59
+ def clean_snippet(text: str) -> str:
60
+ """Clean up a search result snippet."""
61
+ if not text:
62
+ return ""
63
+ text = re.sub(r"<[^>]+>", "", text)
64
+ text = re.sub(r"\s+", " ", text).strip()
65
+ return text