Spaces:
Runtime error
Runtime error
| import csv | |
| import json | |
| from ast import literal_eval | |
| from pathlib import Path | |
| from typing import TypeVar | |
| from docarray import DocList | |
| from dotenv import load_dotenv | |
| from azure_openai import AzureOpenaiEmbeddings | |
| from data import RestaurantDescription, restaurant_index, Dish, Category, dish_index, category_index | |
| def calculate_rating(low: str, medium: str, high: str) -> float: | |
| low = int(low) | |
| medium = int(medium) | |
| high = int(high) | |
| total = low + medium + high | |
| return (medium*0.7 + high) / total | |
| def normalize_dish(dish_name: str) -> str: | |
| output = dish_name.replace('\xa0', '') | |
| return output.title() | |
| T = TypeVar('T') | |
| def add_to_all(restaurant: RestaurantDescription, keys: list[str], mapping: dict[T], cls: type[T]): | |
| keys = set(keys) # guard against duplicates | |
| for k in keys: | |
| v = mapping.get(k) | |
| if v is None: | |
| v = mapping[k] = cls(id=k, text=k, restaurants=[]) | |
| v.restaurants.append(restaurant.id) | |
| def load_districts(): | |
| with Path('data/district_boundary.json').open('r', encoding='utf-8') as f: | |
| districts = json.load(f) | |
| from matplotlib.path import Path as Polyline | |
| output = [] | |
| for d in districts['features']: | |
| district = { | |
| "name": d['properties']['District'], | |
| "polygon": Polyline(d['geometry']['coordinates'][0]) | |
| } | |
| output.append(district) | |
| return output | |
| DISTRICTS = load_districts() | |
| def get_district_name(lat: str, lon: str): | |
| lat = float(lat) | |
| lon = float(lon) | |
| matches = [] | |
| for district in DISTRICTS: | |
| if district['polygon'].contains_point((lon, lat)): | |
| matches.append(district['name']) | |
| return matches | |
| restaurants, dish_list, category_list = None, None, None | |
| def main(): | |
| global restaurants, dish_list, category_list | |
| load_dotenv() | |
| csv_file = Path('restaurants.csv') | |
| restaurants = DocList[RestaurantDescription]() | |
| restaurant_names = set() | |
| dishes = {} | |
| categories = {} | |
| with csv_file.open(encoding='utf-8-sig', newline='') as f: | |
| reader = csv.DictReader(f) | |
| for row in reader: | |
| if row['name_lang2']: | |
| name = row['name_lang2'] | |
| name_alt = row['name_lang1'] | |
| else: | |
| name = row['name_lang1'] | |
| name_alt = None | |
| # for this demo, don't add multiple locations of the same restaurant chain | |
| if name in restaurant_names: | |
| continue | |
| ds = literal_eval(row['dishes']) | |
| ds = [normalize_dish(d) for d in ds] | |
| ds = list(set(ds)) # unique | |
| cs = literal_eval(row['categories']) | |
| location = get_district_name(row['map_latitude'], row['map_longitude']) | |
| price = int(row['price']) | |
| if price < 100: | |
| price_bucket = 'cheap' | |
| elif 300 > price >= 100: | |
| price_bucket = 'moderate' | |
| elif price >= 300: | |
| price_bucket = 'expensive' | |
| extra_data = ' Romantic Dining.' if 'Romantic Dining' in cs else "" | |
| text = f"""\ | |
| Name: {name} | |
| Intro: {row['intro']}{extra_data} | |
| Dishes: {", ".join(ds)} | |
| Location: {", ".join(location)} | |
| Price: {price_bucket}\ | |
| """ | |
| r = RestaurantDescription( | |
| embedding=None, # batch create all embeddings later | |
| text=text, | |
| id=row['id'], | |
| name=name, | |
| name_alt=name_alt, | |
| intro=row['intro'], | |
| price=price, | |
| rating=calculate_rating(row['score_cry'], row['score_o_k'], row['score_smile']), | |
| categories=cs, | |
| dishes=ds, | |
| info_url=row['poi_url'], | |
| image_url=row['door_photos'], | |
| location=location, | |
| ) | |
| restaurants.append(r) | |
| restaurant_names.add(name) | |
| add_to_all(r, ds, dishes, Dish) | |
| add_to_all(r, cs, categories, Category) | |
| dish_list = DocList[Dish](dishes.values()) | |
| category_list = DocList[Category](categories.values()) | |
| import IPython | |
| IPython.embed() | |
| embedding_settings = AzureOpenaiEmbeddings.load_from_env() | |
| RestaurantDescription.create_embeddings(restaurants, **embedding_settings.to_settings_dict()) | |
| Dish.create_embeddings(dish_list, **embedding_settings.to_settings_dict()) | |
| Category.create_embeddings(category_list, **embedding_settings.to_settings_dict()) | |
| restaurant_index.index(restaurants) | |
| dish_index.index(dish_list) | |
| category_index.index(category_list) | |
| restaurant_index.persist() | |
| dish_index.persist() | |
| category_index.persist() | |
| if __name__ == '__main__': | |
| main() | |