Spaces:
Runtime error
Runtime error
Merge branch 'main' of https://huggingface.co/spaces/thinkcol/restaurants into main
Browse files- app.py +2 -2
- data.py +2 -0
- data/categories.bin +2 -2
- data/dishes.bin +2 -2
- data/restaurants.bin +2 -2
- scripts/create_embeddings.py +53 -1
app.py
CHANGED
|
@@ -68,7 +68,7 @@ async def on_message(message: str, message_id: str):
|
|
| 68 |
|
| 69 |
await chain.text(str(list(results))) # TODO maybe json format would be better?
|
| 70 |
|
| 71 |
-
restaurants = "\n".join(f"- ID: {r.id} |
|
| 72 |
|
| 73 |
final_choices_msg = await chain.llm(
|
| 74 |
"""
|
|
@@ -149,7 +149,7 @@ async def on_message(message: str, message_id: str):
|
|
| 149 |
msg.elements = [
|
| 150 |
# note: image always displays above text
|
| 151 |
cl.Image(name=restaurant.name, url=restaurant.image_url, display='inline', size='small'),
|
| 152 |
-
cl.Text(name=restaurant.name, content=restaurant.
|
| 153 |
cl.Text(name="Example Dishes:", content=dishes_as_string, display='inline'),
|
| 154 |
cl.Text(name="Category:", content=categories_as_string, display='inline'),
|
| 155 |
cl.Text(name="Estimated Average Price (HKD):", content=restaurant.price, display="inline"),
|
|
|
|
| 68 |
|
| 69 |
await chain.text(str(list(results))) # TODO maybe json format would be better?
|
| 70 |
|
| 71 |
+
restaurants = "\n".join(f"- ID: {r.id} | {r.text}" for r in results)
|
| 72 |
|
| 73 |
final_choices_msg = await chain.llm(
|
| 74 |
"""
|
|
|
|
| 149 |
msg.elements = [
|
| 150 |
# note: image always displays above text
|
| 151 |
cl.Image(name=restaurant.name, url=restaurant.image_url, display='inline', size='small'),
|
| 152 |
+
cl.Text(name=restaurant.name, content=restaurant.intro, display='inline'),
|
| 153 |
cl.Text(name="Example Dishes:", content=dishes_as_string, display='inline'),
|
| 154 |
cl.Text(name="Category:", content=categories_as_string, display='inline'),
|
| 155 |
cl.Text(name="Estimated Average Price (HKD):", content=restaurant.price, display="inline"),
|
data.py
CHANGED
|
@@ -45,12 +45,14 @@ class RestaurantDescription(OpenaiEmbeddingDoc):
|
|
| 45 |
id: str = '' # a number string
|
| 46 |
name: str
|
| 47 |
name_alt: str | None
|
|
|
|
| 48 |
categories: list[str]
|
| 49 |
dishes: list[str]
|
| 50 |
rating: float # 0-1
|
| 51 |
price: int # HKD
|
| 52 |
info_url: str
|
| 53 |
image_url: str
|
|
|
|
| 54 |
|
| 55 |
|
| 56 |
class Category(OpenaiEmbeddingDoc):
|
|
|
|
| 45 |
id: str = '' # a number string
|
| 46 |
name: str
|
| 47 |
name_alt: str | None
|
| 48 |
+
intro: str
|
| 49 |
categories: list[str]
|
| 50 |
dishes: list[str]
|
| 51 |
rating: float # 0-1
|
| 52 |
price: int # HKD
|
| 53 |
info_url: str
|
| 54 |
image_url: str
|
| 55 |
+
location: list[str]
|
| 56 |
|
| 57 |
|
| 58 |
class Category(OpenaiEmbeddingDoc):
|
data/categories.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4dddf959c29d7af260e7eeacd5a6dbd1c249b54fc492b21c6489dcf68f970402
|
| 3 |
+
size 1134750
|
data/dishes.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5e8436a53e09bff69275cd04b47a46c55a1e4e6e4f8ae1f856026e02060bffd9
|
| 3 |
+
size 7290051
|
data/restaurants.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9d379bf61329a9e615a2cdfe082e99df5fe63f3278876e5addcf18e6e059c4ec
|
| 3 |
+
size 1863120
|
scripts/create_embeddings.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
import csv
|
|
|
|
| 2 |
from ast import literal_eval
|
| 3 |
from pathlib import Path
|
| 4 |
from typing import TypeVar
|
|
@@ -35,6 +36,35 @@ def add_to_all(restaurant: RestaurantDescription, keys: list[str], mapping: dict
|
|
| 35 |
v.restaurants.append(restaurant.id)
|
| 36 |
|
| 37 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
restaurants, dish_list, category_list = None, None, None
|
| 39 |
|
| 40 |
|
|
@@ -45,6 +75,7 @@ def main():
|
|
| 45 |
csv_file = Path('restaurants.csv')
|
| 46 |
|
| 47 |
restaurants = DocList[RestaurantDescription]()
|
|
|
|
| 48 |
dishes = {}
|
| 49 |
categories = {}
|
| 50 |
|
|
@@ -58,30 +89,51 @@ def main():
|
|
| 58 |
name = row['name_lang1']
|
| 59 |
name_alt = None
|
| 60 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
ds = literal_eval(row['dishes'])
|
| 62 |
ds = [normalize_dish(d) for d in ds]
|
|
|
|
| 63 |
cs = literal_eval(row['categories'])
|
| 64 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
r = RestaurantDescription(
|
| 66 |
embedding=None, # batch create all embeddings later
|
|
|
|
| 67 |
id=row['id'],
|
| 68 |
name=name,
|
| 69 |
name_alt=name_alt,
|
| 70 |
-
|
| 71 |
price=int(row['price']),
|
| 72 |
rating=calculate_rating(row['score_cry'], row['score_o_k'], row['score_smile']),
|
| 73 |
categories=cs,
|
| 74 |
dishes=ds,
|
| 75 |
info_url=row['poi_url'],
|
| 76 |
image_url=row['door_photos'],
|
|
|
|
| 77 |
)
|
| 78 |
|
| 79 |
restaurants.append(r)
|
|
|
|
| 80 |
add_to_all(r, ds, dishes, Dish)
|
| 81 |
add_to_all(r, cs, categories, Category)
|
| 82 |
dish_list = DocList[Dish](dishes.values())
|
| 83 |
category_list = DocList[Category](categories.values())
|
| 84 |
|
|
|
|
|
|
|
|
|
|
| 85 |
embedding_settings = AzureOpenaiEmbeddings.load_from_env()
|
| 86 |
|
| 87 |
RestaurantDescription.create_embeddings(restaurants, **embedding_settings.to_settings_dict())
|
|
|
|
| 1 |
import csv
|
| 2 |
+
import json
|
| 3 |
from ast import literal_eval
|
| 4 |
from pathlib import Path
|
| 5 |
from typing import TypeVar
|
|
|
|
| 36 |
v.restaurants.append(restaurant.id)
|
| 37 |
|
| 38 |
|
| 39 |
+
def load_districts():
|
| 40 |
+
with Path('data/district_boundary.json').open('r', encoding='utf-8') as f:
|
| 41 |
+
districts = json.load(f)
|
| 42 |
+
from matplotlib.path import Path as Polyline
|
| 43 |
+
output = []
|
| 44 |
+
for d in districts['features']:
|
| 45 |
+
district = {
|
| 46 |
+
"name": d['properties']['District'],
|
| 47 |
+
"polygon": Polyline(d['geometry']['coordinates'][0])
|
| 48 |
+
}
|
| 49 |
+
output.append(district)
|
| 50 |
+
|
| 51 |
+
return output
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
DISTRICTS = load_districts()
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
def get_district_name(lat: str, lon: str):
|
| 58 |
+
lat = float(lat)
|
| 59 |
+
lon = float(lon)
|
| 60 |
+
|
| 61 |
+
matches = []
|
| 62 |
+
for district in DISTRICTS:
|
| 63 |
+
if district['polygon'].contains_point((lon, lat)):
|
| 64 |
+
matches.append(district['name'])
|
| 65 |
+
return matches
|
| 66 |
+
|
| 67 |
+
|
| 68 |
restaurants, dish_list, category_list = None, None, None
|
| 69 |
|
| 70 |
|
|
|
|
| 75 |
csv_file = Path('restaurants.csv')
|
| 76 |
|
| 77 |
restaurants = DocList[RestaurantDescription]()
|
| 78 |
+
restaurant_names = set()
|
| 79 |
dishes = {}
|
| 80 |
categories = {}
|
| 81 |
|
|
|
|
| 89 |
name = row['name_lang1']
|
| 90 |
name_alt = None
|
| 91 |
|
| 92 |
+
# for this demo, don't add multiple locations of the same restaurant chain
|
| 93 |
+
if name in restaurant_names:
|
| 94 |
+
continue
|
| 95 |
+
|
| 96 |
ds = literal_eval(row['dishes'])
|
| 97 |
ds = [normalize_dish(d) for d in ds]
|
| 98 |
+
ds = list(set(ds)) # unique
|
| 99 |
cs = literal_eval(row['categories'])
|
| 100 |
|
| 101 |
+
location = get_district_name(row['map_latitude'], row['map_longitude'])
|
| 102 |
+
|
| 103 |
+
text = f"""\
|
| 104 |
+
Name: {name}
|
| 105 |
+
Intro: {row['intro']}
|
| 106 |
+
Categories: {", ".join(cs)}
|
| 107 |
+
Dishes: {", ".join(ds)}
|
| 108 |
+
Location: {", ".join(location)}\
|
| 109 |
+
"""
|
| 110 |
+
|
| 111 |
r = RestaurantDescription(
|
| 112 |
embedding=None, # batch create all embeddings later
|
| 113 |
+
text=text,
|
| 114 |
id=row['id'],
|
| 115 |
name=name,
|
| 116 |
name_alt=name_alt,
|
| 117 |
+
intro=row['intro'],
|
| 118 |
price=int(row['price']),
|
| 119 |
rating=calculate_rating(row['score_cry'], row['score_o_k'], row['score_smile']),
|
| 120 |
categories=cs,
|
| 121 |
dishes=ds,
|
| 122 |
info_url=row['poi_url'],
|
| 123 |
image_url=row['door_photos'],
|
| 124 |
+
location=location,
|
| 125 |
)
|
| 126 |
|
| 127 |
restaurants.append(r)
|
| 128 |
+
restaurant_names.add(name)
|
| 129 |
add_to_all(r, ds, dishes, Dish)
|
| 130 |
add_to_all(r, cs, categories, Category)
|
| 131 |
dish_list = DocList[Dish](dishes.values())
|
| 132 |
category_list = DocList[Category](categories.values())
|
| 133 |
|
| 134 |
+
# import IPython
|
| 135 |
+
# IPython.embed()
|
| 136 |
+
|
| 137 |
embedding_settings = AzureOpenaiEmbeddings.load_from_env()
|
| 138 |
|
| 139 |
RestaurantDescription.create_embeddings(restaurants, **embedding_settings.to_settings_dict())
|