File size: 4,831 Bytes
4336e75
 
1cf9263
4336e75
1cf9263
4336e75
1cf9263
 
 
4336e75
1cf9263
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b86b079
1cf9263
 
b86b079
1cf9263
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8ee28b2
1cf9263
b86b079
1cf9263
 
4336e75
1cf9263
 
 
 
fe7b22f
1cf9263
 
 
 
fe7b22f
b86b079
4336e75
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
import pandas as pd
import gradio as gr
import re
from sentence_transformers import SentenceTransformer
from sklearn.neighbors import NearestNeighbors

# -------------------------------
# Load dataset
# -------------------------------
df = pd.read_csv("food_order_cleaned.csv")
df['rating'] = pd.to_numeric(df['rating'], errors='coerce')
df['search_text'] = (
    df['restaurant_name'].astype(str) +
    " | " + df['cuisine_type'].astype(str) +
    " | " + df['rating'].astype(str)
)

# -------------------------------
# Rule-based functions
# -------------------------------
def find_by_cuisine(cuisine, limit=10):
    mask = df['cuisine_type'].str.strip().str.lower() == cuisine.strip().lower()
    cols = ['restaurant_name','cuisine_type','cost_of_the_order','rating']
    return df.loc[mask, cols].head(limit)

def best_rated_by_cuisine(cuisine, top_n=10):
    mask = df['cuisine_type'].str.strip().str.lower() == cuisine.strip().lower()
    subset = df[mask].dropna(subset=['rating']).sort_values('rating', ascending=False)
    cols = ['restaurant_name','cuisine_type','cost_of_the_order','rating']
    return subset[cols].head(top_n)

def cheapest_high_rated(max_cost=None, min_rating=4.0, top_n=10):
    subset = df.dropna(subset=['rating'])
    subset = subset[subset['rating'] >= min_rating]
    if max_cost:
        subset = subset[subset['cost_of_the_order'] <= max_cost]
    subset = subset.sort_values('cost_of_the_order')
    cols = ['restaurant_name','cuisine_type','cost_of_the_order','rating']
    return subset[cols].head(top_n)

def personalized_recall(customer_id, day):
    mask = (
        df['customer_id'].astype(str) == str(customer_id)
    ) & (
        df['day_of_the_week'].str.lower() == day.lower()
    )
    cols = ['order_id','restaurant_name','cuisine_type','cost_of_the_order','rating','day_of_the_week']
    return df.loc[mask, cols]

# -------------------------------
# Semantic Search
# -------------------------------
model = SentenceTransformer("all-MiniLM-L6-v2")
corpus_embeddings = model.encode(df["search_text"].tolist(), show_progress_bar=True)
nn = NearestNeighbors(n_neighbors=10, metric="cosine").fit(corpus_embeddings)

def semantic_search(query, k=5):
    q_emb = model.encode([query])
    dists, idxs = nn.kneighbors(q_emb, n_neighbors=k)
    results = df.iloc[idxs[0]].copy()
    results["score"] = 1 - dists[0]
    cols = ['restaurant_name','cuisine_type','cost_of_the_order','rating','score']
    return results[cols]

# -------------------------------
# Query Router
# -------------------------------
def handle_query(message, customer_id=""):
    text = message.lower()

    # 1. cuisine search
    if "find" in text and "restaurant" in text:
        for cuisine in df["cuisine_type"].str.lower().unique():
            if cuisine in text:
                return find_by_cuisine(cuisine).to_html(index=False)
        return semantic_search(message).to_html(index=False)

    # 2. best-rated query
    if "best" in text:
        for cuisine in df["cuisine_type"].str.lower().unique():
            if cuisine in text:
                return best_rated_by_cuisine(cuisine).to_html(index=False)
        return semantic_search(message).to_html(index=False)

    # 3. cheap places
    if "cheap" in text or "value" in text:
        return cheapest_high_rated().to_html(index=False)

    # 4. personalized recall
    if "what did i order" in text:
        m = re.search(r"on (\w+)", text)
        if not customer_id:
            return "Please enter customer_id."
        if not m:
            return "Please specify the day (e.g., Tuesday)"
        day = m.group(1)
        r = personalized_recall(customer_id, day)
        if r.empty:
            return "No matching records."
        return r.to_html(index=False)

    return semantic_search(message).to_html(index=False)


# -------------------------------
# CHATBOT FUNCTION (DICTIONARY FORMAT)
# -------------------------------
def chatbot_fn(history, message, customer_id):
    reply_html = handle_query(message, customer_id)

    # append user message
    history.append({"role": "user", "content": message})

    # append assistant message
    history.append({"role": "assistant", "content": "Here are the results 👇"})

    return history, "", reply_html


# -------------------------------
# INTERFACE
# -------------------------------
with gr.Blocks() as demo:
    gr.Markdown("## 🍽️ Restaurant Guide Chatbot")

    chat = gr.Chatbot(label="Chat History")   # no type arg
    html_out = gr.HTML(label="Search Results")

    with gr.Row():
        msg = gr.Textbox(placeholder="Ask me anything…")
        cid = gr.Textbox(label="Customer ID (optional)")
        btn = gr.Button("Send")

    btn.click(
        chatbot_fn,
        inputs=[chat, msg, cid],
        outputs=[chat, msg, html_out]
    )

demo.launch()