jocelynchan commited on
Commit
83e82de
·
2 Parent(s): 6949531 1ef3ac7

Merge branch 'main' of https://huggingface.co/spaces/thinkcol/restaurants into main

Browse files
app.py CHANGED
@@ -68,7 +68,7 @@ async def on_message(message: str, message_id: str):
68
 
69
  await chain.text(str(list(results))) # TODO maybe json format would be better?
70
 
71
- restaurants = "\n".join(f"- ID: {r.id} | Description: {r.text}" for r in results)
72
 
73
  final_choices_msg = await chain.llm(
74
  """
@@ -149,7 +149,7 @@ async def on_message(message: str, message_id: str):
149
  msg.elements = [
150
  # note: image always displays above text
151
  cl.Image(name=restaurant.name, url=restaurant.image_url, display='inline', size='small'),
152
- cl.Text(name=restaurant.name, content=restaurant.text, display='inline'),
153
  cl.Text(name="Example Dishes:", content=dishes_as_string, display='inline'),
154
  cl.Text(name="Category:", content=categories_as_string, display='inline'),
155
  cl.Text(name="Estimated Average Price (HKD):", content=restaurant.price, display="inline"),
 
68
 
69
  await chain.text(str(list(results))) # TODO maybe json format would be better?
70
 
71
+ restaurants = "\n".join(f"- ID: {r.id} | {r.text}" for r in results)
72
 
73
  final_choices_msg = await chain.llm(
74
  """
 
149
  msg.elements = [
150
  # note: image always displays above text
151
  cl.Image(name=restaurant.name, url=restaurant.image_url, display='inline', size='small'),
152
+ cl.Text(name=restaurant.name, content=restaurant.intro, display='inline'),
153
  cl.Text(name="Example Dishes:", content=dishes_as_string, display='inline'),
154
  cl.Text(name="Category:", content=categories_as_string, display='inline'),
155
  cl.Text(name="Estimated Average Price (HKD):", content=restaurant.price, display="inline"),
data.py CHANGED
@@ -45,12 +45,14 @@ class RestaurantDescription(OpenaiEmbeddingDoc):
45
  id: str = '' # a number string
46
  name: str
47
  name_alt: str | None
 
48
  categories: list[str]
49
  dishes: list[str]
50
  rating: float # 0-1
51
  price: int # HKD
52
  info_url: str
53
  image_url: str
 
54
 
55
 
56
  class Category(OpenaiEmbeddingDoc):
 
45
  id: str = '' # a number string
46
  name: str
47
  name_alt: str | None
48
+ intro: str
49
  categories: list[str]
50
  dishes: list[str]
51
  rating: float # 0-1
52
  price: int # HKD
53
  info_url: str
54
  image_url: str
55
+ location: list[str]
56
 
57
 
58
  class Category(OpenaiEmbeddingDoc):
data/categories.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8b67b72a7c15a455a0ef466c7cecd32295ecde6bf439bc78eac295f52fb6af35
3
- size 1135560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4dddf959c29d7af260e7eeacd5a6dbd1c249b54fc492b21c6489dcf68f970402
3
+ size 1134750
data/dishes.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a6a86200c149121da1192572f3c7d4e395d38c91ac324b999d3aa9caecfd92b1
3
- size 7290545
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e8436a53e09bff69275cd04b47a46c55a1e4e6e4f8ae1f856026e02060bffd9
3
+ size 7290051
data/restaurants.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a88e0c7ccbb48546152e834d24933fe12e2785ab367cfce1cee8144e8ddfb51a
3
- size 2091841
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d379bf61329a9e615a2cdfe082e99df5fe63f3278876e5addcf18e6e059c4ec
3
+ size 1863120
scripts/create_embeddings.py CHANGED
@@ -1,4 +1,5 @@
1
  import csv
 
2
  from ast import literal_eval
3
  from pathlib import Path
4
  from typing import TypeVar
@@ -35,6 +36,35 @@ def add_to_all(restaurant: RestaurantDescription, keys: list[str], mapping: dict
35
  v.restaurants.append(restaurant.id)
36
 
37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  restaurants, dish_list, category_list = None, None, None
39
 
40
 
@@ -45,6 +75,7 @@ def main():
45
  csv_file = Path('restaurants.csv')
46
 
47
  restaurants = DocList[RestaurantDescription]()
 
48
  dishes = {}
49
  categories = {}
50
 
@@ -58,30 +89,51 @@ def main():
58
  name = row['name_lang1']
59
  name_alt = None
60
 
 
 
 
 
61
  ds = literal_eval(row['dishes'])
62
  ds = [normalize_dish(d) for d in ds]
 
63
  cs = literal_eval(row['categories'])
64
 
 
 
 
 
 
 
 
 
 
 
65
  r = RestaurantDescription(
66
  embedding=None, # batch create all embeddings later
 
67
  id=row['id'],
68
  name=name,
69
  name_alt=name_alt,
70
- text=row['intro'],
71
  price=int(row['price']),
72
  rating=calculate_rating(row['score_cry'], row['score_o_k'], row['score_smile']),
73
  categories=cs,
74
  dishes=ds,
75
  info_url=row['poi_url'],
76
  image_url=row['door_photos'],
 
77
  )
78
 
79
  restaurants.append(r)
 
80
  add_to_all(r, ds, dishes, Dish)
81
  add_to_all(r, cs, categories, Category)
82
  dish_list = DocList[Dish](dishes.values())
83
  category_list = DocList[Category](categories.values())
84
 
 
 
 
85
  embedding_settings = AzureOpenaiEmbeddings.load_from_env()
86
 
87
  RestaurantDescription.create_embeddings(restaurants, **embedding_settings.to_settings_dict())
 
1
  import csv
2
+ import json
3
  from ast import literal_eval
4
  from pathlib import Path
5
  from typing import TypeVar
 
36
  v.restaurants.append(restaurant.id)
37
 
38
 
39
+ def load_districts():
40
+ with Path('data/district_boundary.json').open('r', encoding='utf-8') as f:
41
+ districts = json.load(f)
42
+ from matplotlib.path import Path as Polyline
43
+ output = []
44
+ for d in districts['features']:
45
+ district = {
46
+ "name": d['properties']['District'],
47
+ "polygon": Polyline(d['geometry']['coordinates'][0])
48
+ }
49
+ output.append(district)
50
+
51
+ return output
52
+
53
+
54
+ DISTRICTS = load_districts()
55
+
56
+
57
+ def get_district_name(lat: str, lon: str):
58
+ lat = float(lat)
59
+ lon = float(lon)
60
+
61
+ matches = []
62
+ for district in DISTRICTS:
63
+ if district['polygon'].contains_point((lon, lat)):
64
+ matches.append(district['name'])
65
+ return matches
66
+
67
+
68
  restaurants, dish_list, category_list = None, None, None
69
 
70
 
 
75
  csv_file = Path('restaurants.csv')
76
 
77
  restaurants = DocList[RestaurantDescription]()
78
+ restaurant_names = set()
79
  dishes = {}
80
  categories = {}
81
 
 
89
  name = row['name_lang1']
90
  name_alt = None
91
 
92
+ # for this demo, don't add multiple locations of the same restaurant chain
93
+ if name in restaurant_names:
94
+ continue
95
+
96
  ds = literal_eval(row['dishes'])
97
  ds = [normalize_dish(d) for d in ds]
98
+ ds = list(set(ds)) # unique
99
  cs = literal_eval(row['categories'])
100
 
101
+ location = get_district_name(row['map_latitude'], row['map_longitude'])
102
+
103
+ text = f"""\
104
+ Name: {name}
105
+ Intro: {row['intro']}
106
+ Categories: {", ".join(cs)}
107
+ Dishes: {", ".join(ds)}
108
+ Location: {", ".join(location)}\
109
+ """
110
+
111
  r = RestaurantDescription(
112
  embedding=None, # batch create all embeddings later
113
+ text=text,
114
  id=row['id'],
115
  name=name,
116
  name_alt=name_alt,
117
+ intro=row['intro'],
118
  price=int(row['price']),
119
  rating=calculate_rating(row['score_cry'], row['score_o_k'], row['score_smile']),
120
  categories=cs,
121
  dishes=ds,
122
  info_url=row['poi_url'],
123
  image_url=row['door_photos'],
124
+ location=location,
125
  )
126
 
127
  restaurants.append(r)
128
+ restaurant_names.add(name)
129
  add_to_all(r, ds, dishes, Dish)
130
  add_to_all(r, cs, categories, Category)
131
  dish_list = DocList[Dish](dishes.values())
132
  category_list = DocList[Category](categories.values())
133
 
134
+ # import IPython
135
+ # IPython.embed()
136
+
137
  embedding_settings = AzureOpenaiEmbeddings.load_from_env()
138
 
139
  RestaurantDescription.create_embeddings(restaurants, **embedding_settings.to_settings_dict())