JustscrAPIng commited on
Commit
e06eb17
·
verified ·
1 Parent(s): cd18fe1

add the model and python app

Browse files
app.py ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from setfit import SetFitModel
3
+ from sentence_transformers import SentenceTransformer, util
4
+ import re
5
+ import numpy as np
6
+
7
+ print("Loading Models...")
8
+ try:
9
+ food_model = SetFitModel.from_pretrained("filter-search-model")
10
+ vector_model = SentenceTransformer('all-MiniLM-L6-v2')
11
+ print("Models Loaded Successfully!")
12
+ except Exception as e:
13
+ print(f"Error loading models: {e}")
14
+ print("Did you upload the 'filter-search-model' folder correctly?")
15
+
16
+ tag_definitions = {
17
+ # --- RELIGION ---
18
+ "religion": {
19
+ 'none': 'secular non-religious atheism no religion',
20
+ 'taoist': 'taoist taoism yin yang dao',
21
+ 'chinese_folk': 'chinese folk religion ancestor worship shenism',
22
+ 'buddhist': 'buddhist temple buddhism monk zen lotus',
23
+ 'christian': 'christian church jesus catholic protestant cross bible',
24
+ 'hinduism': 'hindu temple hinduism shiva vishnu',
25
+ 'vietnamese_folk': 'vietnamese folk religion mother goddess dao mau thanh mau',
26
+ 'khmer': 'khmer religion theravada buddhism cambodian style',
27
+ 'muslim': 'muslim islam mosque halal allah'
28
+ },
29
+ # --- ARCHITECTURE STYLE ---
30
+ "arch_style": {
31
+ 'french': 'french colonial architecture indochina villa balcony yellow walls shutters',
32
+ 'modernism': 'modern architecture modernism brutalist concrete glass sleek minimal',
33
+ 'east asian': 'east asian oriental wooden structure curved roof',
34
+ 'contemporary': 'contemporary design new recent 21st century current',
35
+ 'baroque': 'baroque architecture ornate dramatic detailed grandiose european',
36
+ 'rococo': 'rococo style decorative pastel intricate playful',
37
+ 'vietnamese': 'traditional vietnamese architecture wooden red tile roof',
38
+ 'chinese': 'chinese architecture dragon courtyard feng shui',
39
+ 'romanesque': 'romanesque arches thick walls sturdy rounded',
40
+ 'gothic': 'gothic architecture pointed arches stained glass spires cathedral',
41
+ 'renaissance': 'renaissance symmetry domes columns classical proportion',
42
+ 'neo-romanesque': 'neo-romanesque revival 19th century style',
43
+ 'indochine': 'indochine style fusion french colonial and vietnamese tropical',
44
+ 'neo-baroque': 'neo-baroque revival grand opera house style',
45
+ 'hindu': 'hindu architecture cham style champa towers sandstone',
46
+ 'art deco': 'art deco retro geometric stylized 1920s 1930s',
47
+ 'khmer': 'khmer architecture angkor wat style stone temple prasat',
48
+ 'islamic': 'islamic architecture domes minarets geometric patterns',
49
+ 'flamboyant': 'flamboyant gothic flame-like intricate stone tracery',
50
+ 'beaux arts': 'beaux arts style monumental classical grand french school',
51
+ 'art nouveau': 'art nouveau organic lines flowery decorative curved'
52
+ },
53
+ # --- BUILDING TYPE ---
54
+ "building_type": {
55
+ 'marketplace': 'marketplace market bazaar shopping buy food groceries ben thanh',
56
+ 'community_centre': 'community center public gathering cultural house hall',
57
+ 'courthouse': 'courthouse law legal judge justice government',
58
+ 'hospital': 'hospital clinic doctor medical emergency healthcare',
59
+ 'mortuary': 'mortuary funeral home dead burial services',
60
+ 'place_of_worship': 'place of worship temple church pagoda shrine mosque pray spiritual',
61
+ 'institution': 'institution school university college education academy library',
62
+ 'tomb': 'tomb grave mausoleum cemetery burial site',
63
+ 'heritage': 'heritage site historical building landmark monument preservation',
64
+ 'zoo': 'zoo animals botanical garden nature park',
65
+ 'museum': 'museum exhibition history art gallery display',
66
+ 'commercial': 'commercial building shop store mall office business trade',
67
+ 'government': 'government building administrative town hall committee',
68
+ 'residential': 'residential house home villa apartment living',
69
+ 'library': 'library books reading study archive',
70
+ 'fountain': 'fountain water feature square plaza',
71
+ 'theatre': 'theatre opera house performance stage cinema arts',
72
+ 'pagoda': 'pagoda tiered tower buddhist temple asian',
73
+ 'park': 'park green space garden nature relax trees',
74
+ 'reservoir': 'reservoir water lake dam utility',
75
+ 'restaurant': 'restaurant place to eat food dining hungry lunch dinner eatery',
76
+ 'cafe': 'cafe coffee shop drink sip tea chill work',
77
+ }
78
+ }
79
+
80
+ # --- 3. PRE-COMPUTE VECTORS ---
81
+ print("Pre-computing vectors...")
82
+ encoded_db = {}
83
+ for category, tags_map in tag_definitions.items():
84
+ encoded_db[category] = {
85
+ 'embeddings': vector_model.encode(list(tags_map.values()), convert_to_tensor=True),
86
+ 'names': list(tags_map.keys())
87
+ }
88
+ print("Ready.")
89
+
90
+ # --- 4. THE EXTRACTION LOGIC ---
91
+ def extract_tags(user_text):
92
+ detected_data = {}
93
+
94
+ # A. Age Extraction
95
+ match = re.search(r'\b(1[7-9]|20)\d{2}s?\b', user_text)
96
+ detected_data['age'] = match.group(0).rstrip('s') if match else None
97
+
98
+ # B. Vector Search (Arch/Religion/Building)
99
+ user_embedding = vector_model.encode(user_text, convert_to_tensor=True)
100
+ base_threshold = 0.28
101
+
102
+ for category in ["religion", "arch_style", "building_type"]:
103
+ db_data = encoded_db[category]
104
+ scores = util.cos_sim(user_embedding, db_data['embeddings'])[0]
105
+
106
+ candidates = []
107
+ for idx, score in enumerate(scores):
108
+ if score > base_threshold:
109
+ candidates.append((db_data['names'][idx], score.item()))
110
+
111
+ candidates.sort(key=lambda x: x[1], reverse=True)
112
+
113
+ if candidates:
114
+ best_score = candidates[0][1]
115
+ relative_cutoff = best_score * 0.85
116
+ final_tags = [t[0] for t in candidates if t[1] >= relative_cutoff]
117
+ detected_data[category] = final_tags
118
+ else:
119
+ detected_data[category] = None
120
+
121
+ # C. Smart Food Logic (SetFit)
122
+ food_triggers = ['restaurant', 'cafe', 'street_food', 'marketplace']
123
+ buildings = detected_data.get('building_type') or []
124
+ is_food_venue = any(b in food_triggers for b in buildings)
125
+
126
+ # Run Prediction
127
+ prediction_array = food_model.predict([user_text])
128
+ prediction_clean = prediction_array.tolist()
129
+
130
+ # Trust logic: Only return food tags if venue is found OR if the prediction is extremely confident
131
+ # For now, we stick to your venue rule:
132
+ if is_food_venue:
133
+ detected_data['food_type'] = prediction_clean
134
+ else:
135
+ detected_data['food_type'] = None
136
+
137
+ return detected_data
138
+
139
+ # --- 5. LAUNCH API ---
140
+ # We define the Input/Output types clearly for the API
141
+ iface = gr.Interface(
142
+ fn=extract_tags,
143
+ inputs=gr.Textbox(label="User Search", placeholder="e.g. I want a french villa"),
144
+ outputs=gr.JSON(label="Extracted Tags"),
145
+ title="Vietnam Travel AI API",
146
+ description="Send a POST request to /api/predict with {'data': ['your query']} to get JSON tags."
147
+ )
148
+
149
+ iface.launch()
filter-search-model/1_Pooling/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 384,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": false,
9
+ "include_prompt": true
10
+ }
filter-search-model/README.md ADDED
@@ -0,0 +1,393 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - setfit
4
+ - sentence-transformers
5
+ - text-classification
6
+ - generated_from_setfit_trainer
7
+ widget:
8
+ - text: Show me a university location
9
+ - text: Best vietnam restaurant
10
+ - text: crepe pancake french dessert sweet
11
+ - text: Where can I find pot food?
12
+ - text: fountain water feature square plaza
13
+ metrics:
14
+ - accuracy
15
+ pipeline_tag: text-classification
16
+ library_name: setfit
17
+ inference: true
18
+ base_model: sentence-transformers/all-MiniLM-L6-v2
19
+ ---
20
+
21
+ # SetFit with sentence-transformers/all-MiniLM-L6-v2
22
+
23
+ This is a [SetFit](https://github.com/huggingface/setfit) model that can be used for Text Classification. This SetFit model uses [sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) as the Sentence Transformer embedding model. A [LogisticRegression](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html) instance is used for classification.
24
+
25
+ The model has been trained using an efficient few-shot learning technique that involves:
26
+
27
+ 1. Fine-tuning a [Sentence Transformer](https://www.sbert.net) with contrastive learning.
28
+ 2. Training a classification head with features from the fine-tuned Sentence Transformer.
29
+
30
+ ## Model Details
31
+
32
+ ### Model Description
33
+ - **Model Type:** SetFit
34
+ - **Sentence Transformer body:** [sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2)
35
+ - **Classification head:** a [LogisticRegression](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html) instance
36
+ - **Maximum Sequence Length:** 256 tokens
37
+ - **Number of Classes:** 90 classes
38
+ <!-- - **Training Dataset:** [Unknown](https://huggingface.co/datasets/unknown) -->
39
+ <!-- - **Language:** Unknown -->
40
+ <!-- - **License:** Unknown -->
41
+
42
+ ### Model Sources
43
+
44
+ - **Repository:** [SetFit on GitHub](https://github.com/huggingface/setfit)
45
+ - **Paper:** [Efficient Few-Shot Learning Without Prompts](https://arxiv.org/abs/2209.11055)
46
+ - **Blogpost:** [SetFit: Efficient Few-Shot Learning Without Prompts](https://huggingface.co/blog/setfit)
47
+
48
+ ### Model Labels
49
+ | Label | Examples |
50
+ |:----------------------|:----------------------------------------------------------------------------------------------------------------------------------------------------|
51
+ | mexican | <ul><li>'Best salsa restaurant'</li><li>'Best taco restaurant'</li><li>'Looking for a taco meal'</li></ul> |
52
+ | korean | <ul><li>'Looking for a k-food meal'</li><li>'Serving k-food dishes'</li><li>'I want to eat k-food'</li></ul> |
53
+ | zoo | <ul><li>'Find a nature in the city'</li><li>'Find a zoo in the city'</li><li>'I need to go to a zoo'</li></ul> |
54
+ | lebanese | <ul><li>'Looking for a hummus meal'</li><li>'I need some middle'</li><li>'I want to eat lebanese'</li></ul> |
55
+ | chinese | <ul><li>'Best noodles restaurant'</li><li>'Best wok restaurant'</li><li>'Delicious wok cuisine'</li></ul> |
56
+ | tomb | <ul><li>'Find a tomb in the city'</li><li>'Find a grave in the city'</li><li>'Where is the nearest tomb?'</li></ul> |
57
+ | french | <ul><li>'I am craving fine'</li><li>'Examples of colonial structures'</li><li>'I want to eat fine'</li></ul> |
58
+ | thai | <ul><li>'Best thai restaurant'</li><li>'Serving pad dishes'</li><li>'I need some curry'</li></ul> |
59
+ | steamed fish | <ul><li>'I am craving steamed fish'</li><li>'I need some steamed fish'</li><li>'Looking for a steamed fish meal'</li></ul> |
60
+ | modernism | <ul><li>'Show me sleek architecture'</li><li>'Find me a sleek villa'</li><li>'Show me concrete architecture'</li></ul> |
61
+ | diner | <ul><li>'Delicious comfort cuisine'</li><li>'I need some cheap'</li><li>'Best eatery restaurant'</li></ul> |
62
+ | marketplace | <ul><li>'Show me a marketplace location'</li><li>'I need to go to a groceries'</li><li>'Find a market in the city'</li></ul> |
63
+ | reservoir | <ul><li>'Show me a dam location'</li><li>'reservoir water lake dam utility'</li><li>'Find a reservoir in the city'</li></ul> |
64
+ | coffee | <ul><li>'I am craving coffee'</li><li>'Delicious phe cuisine'</li><li>'Looking for a cappuccino meal'</li></ul> |
65
+ | hue style | <ul><li>'I am craving vietnam'</li><li>'Looking for a noodle meal'</li><li>'Where can I find central food?'</li></ul> |
66
+ | khmer | <ul><li>'Examples of stone structures'</li><li>'Locations for cambodian followers'</li><li>'Find me a temple villa'</li></ul> |
67
+ | commercial | <ul><li>'Where is the nearest business?'</li><li>'Show me a mall location'</li><li>'Where is the nearest commercial?'</li></ul> |
68
+ | japanese | <ul><li>'Delicious sashimi cuisine'</li><li>'Best udon restaurant'</li><li>'Looking for a tempura meal'</li></ul> |
69
+ | vietnamese | <ul><li>'Best cuisine restaurant'</li><li>'Where can I find local food?'</li><li>'Looking for a tam meal'</li></ul> |
70
+ | contemporary | <ul><li>'Looking for recent vibes'</li><li>'Find me a design villa'</li><li>'Looking for 21st vibes'</li></ul> |
71
+ | pizza | <ul><li>'I need some pizza'</li><li>'I am craving italian'</li><li>'Where can I find pie food?'</li></ul> |
72
+ | theatre | <ul><li>'I need to go to a theatre'</li><li>'Show me a theatre location'</li><li>'theatre opera house performance stage cinema arts'</li></ul> |
73
+ | library | <ul><li>'I need to go to a study'</li><li>'Where is the nearest archive?'</li><li>'Find a books in the city'</li></ul> |
74
+ | neo-romanesque | <ul><li>'Find me a 19th villa'</li><li>'Show me 19th architecture'</li><li>'Looking for 19th vibes'</li></ul> |
75
+ | sushi | <ul><li>'Looking for a fish meal'</li><li>'I am craving maki'</li><li>'I am craving nigiri'</li></ul> |
76
+ | nướng (grill) | <ul><li>'I want to eat nướng (grill)'</li><li>'I am craving nướng (grill)'</li><li>'Serving nướng (grill) dishes'</li></ul> |
77
+ | indochine | <ul><li>'Find me a and villa'</li><li>'I want to see a fusion style building'</li><li>'Houses with and design'</li></ul> |
78
+ | mediterranean | <ul><li>'I want to eat greek'</li><li>'Delicious healthy cuisine'</li><li>'Serving greek dishes'</li></ul> |
79
+ | sandwich | <ul><li>'Serving sub dishes'</li><li>'I want to eat panini'</li><li>'Looking for a sub meal'</li></ul> |
80
+ | wings | <ul><li>'I want to eat wings'</li><li>'Serving buffalo dishes'</li><li>'Where can I find buffalo food?'</li></ul> |
81
+ | international | <ul><li>'Looking for a foreign meal'</li><li>'Serving western dishes'</li><li>'I want to eat imported'</li></ul> |
82
+ | mortuary | <ul><li>'Where is the nearest burial?'</li><li>'Show me a mortuary location'</li><li>'Show me a services location'</li></ul> |
83
+ | ice cream | <ul><li>'I need some sweet'</li><li>'Delicious cold cuisine'</li><li>'Serving gelato dishes'</li></ul> |
84
+ | baroque | <ul><li>'Houses with baroque design'</li><li>'Find me a ornate villa'</li><li>'I want to see a european style building'</li></ul> |
85
+ | romanesque | <ul><li>'Show me rounded architecture'</li><li>'Looking for thick vibes'</li><li>'Houses with thick design'</li></ul> |
86
+ | hospital | <ul><li>'I need to go to a doctor'</li><li>'Show me a emergency location'</li><li>'Where is the nearest clinic?'</li></ul> |
87
+ | art nouveau | <ul><li>'I want to see a lines style building'</li><li>'Show me lines architecture'</li><li>'Looking for organic vibes'</li></ul> |
88
+ | curry | <ul><li>'Delicious coconut cuisine'</li><li>'I want to eat coconut'</li><li>'I want to eat stew'</li></ul> |
89
+ | neo-baroque | <ul><li>'Looking for house vibes'</li><li>'Houses with opera design'</li><li>'Show me opera architecture'</li></ul> |
90
+ | argentinian | <ul><li>'Where can I find argentinian food?'</li><li>'Serving american dishes'</li><li>'argentinian steak latin american meat asado'</li></ul> |
91
+ | grill | <ul><li>'I want to eat fire'</li><li>'Best smoke restaurant'</li><li>'Best meat restaurant'</li></ul> |
92
+ | christian | <ul><li>'Locations for church followers'</li><li>'I want to visit a catholic place of worship'</li><li>'Locations for cross followers'</li></ul> |
93
+ | burger | <ul><li>'Delicious burger cuisine'</li><li>'Best fast restaurant'</li><li>'I need some fast'</li></ul> |
94
+ | rococo | <ul><li>'Looking for pastel vibes'</li><li>'Find me a decorative villa'</li><li>'Show me playful architecture'</li></ul> |
95
+ | muslim | <ul><li>'Find a mosque temple'</li><li>'Locations for muslim followers'</li><li>'Find a halal temple'</li></ul> |
96
+ | hinduism | <ul><li>'Find a hindu temple'</li><li>'I want to visit a hindu place of worship'</li><li>'I want to visit a shiva place of worship'</li></ul> |
97
+ | seafood | <ul><li>'Looking for a snail meal'</li><li>'Delicious crab cuisine'</li><li>'I need some shell'</li></ul> |
98
+ | asian | <ul><li>'Looking for a eastern meal'</li><li>'I need some oriental'</li><li>'I want to eat asian'</li></ul> |
99
+ | cafe | <ul><li>'Find a sip in the city'</li><li>'Where is the nearest drink?'</li><li>'I need to go to a drink'</li></ul> |
100
+ | park | <ul><li>'I need to go to a trees'</li><li>'Show me a space location'</li><li>'Find a relax in the city'</li></ul> |
101
+ | local food | <ul><li>'Where can I find specialty food?'</li><li>'Looking for a street meal'</li><li>'Serving specialty dishes'</li></ul> |
102
+ | thai hotpot | <ul><li>'I need some sour'</li><li>'Best sour restaurant'</li><li>'Best thai hotpot restaurant'</li></ul> |
103
+ | flamboyant | <ul><li>'Examples of flame-like structures'</li><li>'Houses with flame-like design'</li><li>'Find me a flame-like villa'</li></ul> |
104
+ | gothic | <ul><li>'Show me pointed architecture'</li><li>'Show me gothic architecture'</li><li>'Examples of gothic structures'</li></ul> |
105
+ | buddhist | <ul><li>'I want to visit a monk place of worship'</li><li>'Locations for buddhism followers'</li><li>'Locations for buddhist followers'</li></ul> |
106
+ | art deco | <ul><li>'I want to see a stylized style building'</li><li>'Houses with retro design'</li><li>'Find me a retro villa'</li></ul> |
107
+ | pagoda | <ul><li>'Where is the nearest asian?'</li><li>'Show me a tiered location'</li><li>'Where is the nearest tower?'</li></ul> |
108
+ | islamic | <ul><li>'I want to see a patterns style building'</li><li>'Examples of minarets structures'</li><li>'Examples of patterns structures'</li></ul> |
109
+ | heritage | <ul><li>'Where is the nearest heritage?'</li><li>'Find a monument in the city'</li><li>'I need to go to a heritage'</li></ul> |
110
+ | government | <ul><li>'Find a committee in the city'</li><li>'government building administrative town hall committee'</li><li>'Show me a town location'</li></ul> |
111
+ | steakhouse | <ul><li>'Delicious steakhouse cuisine'</li><li>'Serving steakhouse dishes'</li><li>'I am craving steakhouse'</li></ul> |
112
+ | hotpot | <ul><li>'I want to eat pot'</li><li>'hotpot lau soup pot shabu shabu sharing'</li><li>'Where can I find hotpot food?'</li></ul> |
113
+ | banh cuon | <ul><li>'Delicious breakfast cuisine'</li><li>'Looking for a rice meal'</li><li>'I am craving light'</li></ul> |
114
+ | noodles | <ul><li>'Where can I find vermicelli food?'</li><li>'I am craving soup'</li><li>'I need some vermicelli'</li></ul> |
115
+ | fine dining | <ul><li>'I am craving fine dining'</li><li>'I am craving upscale'</li><li>'Looking for a luxury meal'</li></ul> |
116
+ | stir-fried vegetables | <ul><li>'Serving vegetarian dishes'</li><li>'I need some vegetarian'</li><li>'Best glory restaurant'</li></ul> |
117
+ | courthouse | <ul><li>'I need to go to a law'</li><li>'Find a legal in the city'</li><li>'I need to go to a justice'</li></ul> |
118
+ | none | <ul><li>'I want to visit a atheism place of worship'</li><li>'Find a atheism temple'</li><li>'Locations for atheism followers'</li></ul> |
119
+ | grilled fish | <ul><li>'Where can I find grilled fish food?'</li><li>'grilled fish ca nuong seafood'</li><li>'Best grilled fish restaurant'</li></ul> |
120
+ | italian | <ul><li>'I want to eat pasta'</li><li>'Delicious rome cuisine'</li><li>'Where can I find spaghetti food?'</li></ul> |
121
+ | beef bowl | <ul><li>'I need some gyudon'</li><li>'Where can I find gyudon food?'</li><li>'Where can I find beef bowl food?'</li></ul> |
122
+ | east asian | <ul><li>'Find me a roof villa'</li><li>'Show me roof architecture'</li><li>'Find me a structure villa'</li></ul> |
123
+ | museum | <ul><li>'I need to go to a gallery'</li><li>'Find a exhibition in the city'</li><li>'Find a art in the city'</li></ul> |
124
+ | place_of_worship | <ul><li>'Show me a pray location'</li><li>'I need to go to a pagoda'</li><li>'Find a church in the city'</li></ul> |
125
+ | barbecue | <ul><li>'Where can I find smokehouse food?'</li><li>'Serving brisket dishes'</li><li>'barbecue bbq ribs brisket smokehouse'</li></ul> |
126
+ | residential | <ul><li>'I need to go to a villa'</li><li>'I need to go to a apartment'</li><li>'Show me a living location'</li></ul> |
127
+ | renaissance | <ul><li>'I want to see a domes style building'</li><li>'Houses with classical design'</li><li>'Find me a symmetry villa'</li></ul> |
128
+ | crepe | <ul><li>'Best crepe restaurant'</li><li>'Serving pancake dishes'</li><li>'Serving crepe dishes'</li></ul> |
129
+ | institution | <ul><li>'Show me a institution location'</li><li>'Show me a education location'</li><li>'Where is the nearest institution?'</li></ul> |
130
+ | indian | <ul><li>'Where can I find naan food?'</li><li>'Serving masala dishes'</li><li>'I am craving naan'</li></ul> |
131
+ | community_centre | <ul><li>'Show me a public location'</li><li>'Where is the nearest house?'</li><li>'Find a center in the city'</li></ul> |
132
+ | fountain | <ul><li>'Show me a water location'</li><li>'I need to go to a water'</li><li>'Find a water in the city'</li></ul> |
133
+ | hindu | <ul><li>'Find me a hindu villa'</li><li>'Houses with hindu design'</li><li>'I want to see a champa style building'</li></ul> |
134
+ | chinese_folk | <ul><li>'Locations for worship followers'</li><li>'Find a ancestor temple'</li><li>'Locations for shenism followers'</li></ul> |
135
+ | restaurant | <ul><li>'I need to go to a dining'</li><li>'Find a lunch in the city'</li><li>'Where is the nearest dinner?'</li></ul> |
136
+ | beaux arts | <ul><li>'Houses with school design'</li><li>'Show me beaux arts architecture'</li><li>'Houses with beaux arts design'</li></ul> |
137
+ | vietnamese_folk | <ul><li>'I want to visit a mother place of worship'</li><li>'Locations for thanh followers'</li><li>'Locations for mau followers'</li></ul> |
138
+ | american | <ul><li>'Serving usa dishes'</li><li>'Delicious usa cuisine'</li><li>'american food usa burger fries steak'</li></ul> |
139
+ | taoist | <ul><li>'I want to visit a yin place of worship'</li><li>'I want to visit a taoism place of worship'</li><li>'Find a taoist temple'</li></ul> |
140
+ | pasta | <ul><li>'Delicious carbonara cuisine'</li><li>'I want to eat carbonara'</li><li>'Where can I find carbonara food?'</li></ul> |
141
+
142
+ ## Uses
143
+
144
+ ### Direct Use for Inference
145
+
146
+ First install the SetFit library:
147
+
148
+ ```bash
149
+ pip install setfit
150
+ ```
151
+
152
+ Then you can load this model and run inference.
153
+
154
+ ```python
155
+ from setfit import SetFitModel
156
+
157
+ # Download from the 🤗 Hub
158
+ model = SetFitModel.from_pretrained("setfit_model_id")
159
+ # Run inference
160
+ preds = model("Best vietnam restaurant")
161
+ ```
162
+
163
+ <!--
164
+ ### Downstream Use
165
+
166
+ *List how someone could finetune this model on their own dataset.*
167
+ -->
168
+
169
+ <!--
170
+ ### Out-of-Scope Use
171
+
172
+ *List how the model may foreseeably be misused and address what users ought not to do with the model.*
173
+ -->
174
+
175
+ <!--
176
+ ## Bias, Risks and Limitations
177
+
178
+ *What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
179
+ -->
180
+
181
+ <!--
182
+ ### Recommendations
183
+
184
+ *What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
185
+ -->
186
+
187
+ ## Training Details
188
+
189
+ ### Training Set Metrics
190
+ | Training set | Min | Median | Max |
191
+ |:-------------|:----|:-------|:----|
192
+ | Word count | 3 | 4.8024 | 10 |
193
+
194
+ | Label | Training Sample Count |
195
+ |:----------------------|:----------------------|
196
+ | american | 17 |
197
+ | argentinian | 36 |
198
+ | art deco | 32 |
199
+ | art nouveau | 20 |
200
+ | asian | 24 |
201
+ | banh cuon | 46 |
202
+ | barbecue | 21 |
203
+ | baroque | 34 |
204
+ | beaux arts | 14 |
205
+ | beef bowl | 15 |
206
+ | buddhist | 23 |
207
+ | burger | 23 |
208
+ | cafe | 27 |
209
+ | chinese | 73 |
210
+ | chinese_folk | 17 |
211
+ | christian | 25 |
212
+ | coffee | 36 |
213
+ | commercial | 25 |
214
+ | community_centre | 33 |
215
+ | contemporary | 42 |
216
+ | courthouse | 25 |
217
+ | crepe | 15 |
218
+ | curry | 24 |
219
+ | diner | 46 |
220
+ | east asian | 31 |
221
+ | fine dining | 37 |
222
+ | flamboyant | 15 |
223
+ | fountain | 18 |
224
+ | french | 83 |
225
+ | gothic | 28 |
226
+ | government | 13 |
227
+ | grill | 48 |
228
+ | grilled fish | 16 |
229
+ | heritage | 20 |
230
+ | hindu | 31 |
231
+ | hinduism | 17 |
232
+ | hospital | 22 |
233
+ | hotpot | 28 |
234
+ | hue style | 43 |
235
+ | ice cream | 36 |
236
+ | indian | 30 |
237
+ | indochine | 24 |
238
+ | institution | 26 |
239
+ | international | 34 |
240
+ | islamic | 18 |
241
+ | italian | 20 |
242
+ | japanese | 45 |
243
+ | khmer | 44 |
244
+ | korean | 37 |
245
+ | lebanese | 31 |
246
+ | library | 14 |
247
+ | local food | 37 |
248
+ | marketplace | 28 |
249
+ | mediterranean | 45 |
250
+ | mexican | 29 |
251
+ | modernism | 40 |
252
+ | mortuary | 24 |
253
+ | museum | 23 |
254
+ | muslim | 20 |
255
+ | neo-baroque | 23 |
256
+ | neo-romanesque | 18 |
257
+ | none | 20 |
258
+ | noodles | 14 |
259
+ | nướng (grill) | 16 |
260
+ | pagoda | 15 |
261
+ | park | 16 |
262
+ | pasta | 8 |
263
+ | pizza | 35 |
264
+ | place_of_worship | 28 |
265
+ | renaissance | 30 |
266
+ | reservoir | 16 |
267
+ | residential | 16 |
268
+ | restaurant | 27 |
269
+ | rococo | 29 |
270
+ | romanesque | 27 |
271
+ | sandwich | 23 |
272
+ | seafood | 39 |
273
+ | steakhouse | 14 |
274
+ | steamed fish | 17 |
275
+ | stir-fried vegetables | 43 |
276
+ | sushi | 36 |
277
+ | taoist | 16 |
278
+ | thai | 45 |
279
+ | thai hotpot | 23 |
280
+ | theatre | 23 |
281
+ | tomb | 16 |
282
+ | vietnamese | 77 |
283
+ | vietnamese_folk | 18 |
284
+ | wings | 33 |
285
+ | zoo | 21 |
286
+
287
+ ### Training Hyperparameters
288
+ - batch_size: (16, 16)
289
+ - num_epochs: (1, 16)
290
+ - max_steps: 2000
291
+ - sampling_strategy: oversampling
292
+ - body_learning_rate: (2e-05, 1e-05)
293
+ - head_learning_rate: 0.01
294
+ - loss: CosineSimilarityLoss
295
+ - distance_metric: cosine_distance
296
+ - margin: 0.25
297
+ - end_to_end: False
298
+ - use_amp: False
299
+ - warmup_proportion: 0.1
300
+ - l2_weight: 0.01
301
+ - seed: 42
302
+ - run_name: my_fresh_run_v2
303
+ - evaluation_strategy: steps
304
+ - eval_max_steps: -1
305
+ - load_best_model_at_end: True
306
+
307
+ ### Training Results
308
+ | Epoch | Step | Training Loss | Validation Loss |
309
+ |:------:|:----:|:-------------:|:---------------:|
310
+ | 0.0005 | 1 | 0.2922 | - |
311
+ | 0.025 | 50 | 0.2078 | - |
312
+ | 0.05 | 100 | 0.1794 | - |
313
+ | 0.075 | 150 | 0.1687 | - |
314
+ | 0.1 | 200 | 0.1498 | 0.1183 |
315
+ | 0.125 | 250 | 0.1504 | - |
316
+ | 0.15 | 300 | 0.1304 | - |
317
+ | 0.175 | 350 | 0.1323 | - |
318
+ | 0.2 | 400 | 0.1249 | 0.1001 |
319
+ | 0.225 | 450 | 0.1234 | - |
320
+ | 0.25 | 500 | 0.1223 | - |
321
+ | 0.275 | 550 | 0.1133 | - |
322
+ | 0.3 | 600 | 0.1106 | 0.0899 |
323
+ | 0.325 | 650 | 0.1026 | - |
324
+ | 0.35 | 700 | 0.1069 | - |
325
+ | 0.375 | 750 | 0.0975 | - |
326
+ | 0.4 | 800 | 0.0994 | 0.0772 |
327
+ | 0.425 | 850 | 0.1006 | - |
328
+ | 0.45 | 900 | 0.0979 | - |
329
+ | 0.475 | 950 | 0.0817 | - |
330
+ | 0.5 | 1000 | 0.0966 | 0.0683 |
331
+ | 0.525 | 1050 | 0.0856 | - |
332
+ | 0.55 | 1100 | 0.082 | - |
333
+ | 0.575 | 1150 | 0.0767 | - |
334
+ | 0.6 | 1200 | 0.0788 | 0.0620 |
335
+ | 0.625 | 1250 | 0.0705 | - |
336
+ | 0.65 | 1300 | 0.0713 | - |
337
+ | 0.675 | 1350 | 0.0711 | - |
338
+ | 0.7 | 1400 | 0.0713 | 0.0575 |
339
+ | 0.725 | 1450 | 0.074 | - |
340
+ | 0.75 | 1500 | 0.0618 | - |
341
+ | 0.775 | 1550 | 0.0701 | - |
342
+ | 0.8 | 1600 | 0.0674 | 0.0539 |
343
+ | 0.825 | 1650 | 0.0685 | - |
344
+ | 0.85 | 1700 | 0.0678 | - |
345
+ | 0.875 | 1750 | 0.0671 | - |
346
+ | 0.9 | 1800 | 0.0657 | 0.0517 |
347
+ | 0.925 | 1850 | 0.0593 | - |
348
+ | 0.95 | 1900 | 0.0641 | - |
349
+ | 0.975 | 1950 | 0.0659 | - |
350
+ | 1.0 | 2000 | 0.0629 | 0.0508 |
351
+
352
+ ### Framework Versions
353
+ - Python: 3.12.12
354
+ - SetFit: 1.1.3
355
+ - Sentence Transformers: 5.1.2
356
+ - Transformers: 4.57.2
357
+ - PyTorch: 2.9.0+cu126
358
+ - Datasets: 4.0.0
359
+ - Tokenizers: 0.22.1
360
+
361
+ ## Citation
362
+
363
+ ### BibTeX
364
+ ```bibtex
365
+ @article{https://doi.org/10.48550/arxiv.2209.11055,
366
+ doi = {10.48550/ARXIV.2209.11055},
367
+ url = {https://arxiv.org/abs/2209.11055},
368
+ author = {Tunstall, Lewis and Reimers, Nils and Jo, Unso Eun Seo and Bates, Luke and Korat, Daniel and Wasserblat, Moshe and Pereg, Oren},
369
+ keywords = {Computation and Language (cs.CL), FOS: Computer and information sciences, FOS: Computer and information sciences},
370
+ title = {Efficient Few-Shot Learning Without Prompts},
371
+ publisher = {arXiv},
372
+ year = {2022},
373
+ copyright = {Creative Commons Attribution 4.0 International}
374
+ }
375
+ ```
376
+
377
+ <!--
378
+ ## Glossary
379
+
380
+ *Clearly define terms in order to be accessible across audiences.*
381
+ -->
382
+
383
+ <!--
384
+ ## Model Card Authors
385
+
386
+ *Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
387
+ -->
388
+
389
+ <!--
390
+ ## Model Card Contact
391
+
392
+ *Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
393
+ -->
filter-search-model/config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertModel"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "dtype": "float32",
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 384,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 1536,
14
+ "layer_norm_eps": 1e-12,
15
+ "max_position_embeddings": 512,
16
+ "model_type": "bert",
17
+ "num_attention_heads": 12,
18
+ "num_hidden_layers": 6,
19
+ "pad_token_id": 0,
20
+ "position_embedding_type": "absolute",
21
+ "transformers_version": "4.57.2",
22
+ "type_vocab_size": 2,
23
+ "use_cache": true,
24
+ "vocab_size": 30522
25
+ }
filter-search-model/config_sentence_transformers.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "__version__": {
3
+ "sentence_transformers": "5.1.2",
4
+ "transformers": "4.57.2",
5
+ "pytorch": "2.9.0+cu126"
6
+ },
7
+ "model_type": "SentenceTransformer",
8
+ "prompts": {
9
+ "query": "",
10
+ "document": ""
11
+ },
12
+ "default_prompt_name": null,
13
+ "similarity_fn_name": "cosine"
14
+ }
filter-search-model/config_setfit.json ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "labels": [
3
+ "american",
4
+ "argentinian",
5
+ "art deco",
6
+ "art nouveau",
7
+ "asian",
8
+ "banh cuon",
9
+ "barbecue",
10
+ "baroque",
11
+ "beaux arts",
12
+ "beef bowl",
13
+ "buddhist",
14
+ "burger",
15
+ "cafe",
16
+ "chinese",
17
+ "chinese_folk",
18
+ "christian",
19
+ "coffee",
20
+ "commercial",
21
+ "community_centre",
22
+ "contemporary",
23
+ "courthouse",
24
+ "crepe",
25
+ "curry",
26
+ "diner",
27
+ "east asian",
28
+ "fine dining",
29
+ "flamboyant",
30
+ "fountain",
31
+ "french",
32
+ "gothic",
33
+ "government",
34
+ "grill",
35
+ "grilled fish",
36
+ "heritage",
37
+ "hindu",
38
+ "hinduism",
39
+ "hospital",
40
+ "hotpot",
41
+ "hue style",
42
+ "ice cream",
43
+ "indian",
44
+ "indochine",
45
+ "institution",
46
+ "international",
47
+ "islamic",
48
+ "italian",
49
+ "japanese",
50
+ "khmer",
51
+ "korean",
52
+ "lebanese",
53
+ "library",
54
+ "local food",
55
+ "marketplace",
56
+ "mediterranean",
57
+ "mexican",
58
+ "modernism",
59
+ "mortuary",
60
+ "museum",
61
+ "muslim",
62
+ "neo-baroque",
63
+ "neo-romanesque",
64
+ "none",
65
+ "noodles",
66
+ "n\u01b0\u1edbng (grill)",
67
+ "pagoda",
68
+ "park",
69
+ "pasta",
70
+ "pizza",
71
+ "place_of_worship",
72
+ "renaissance",
73
+ "reservoir",
74
+ "residential",
75
+ "restaurant",
76
+ "rococo",
77
+ "romanesque",
78
+ "sandwich",
79
+ "seafood",
80
+ "steakhouse",
81
+ "steamed fish",
82
+ "stir-fried vegetables",
83
+ "sushi",
84
+ "taoist",
85
+ "thai",
86
+ "thai hotpot",
87
+ "theatre",
88
+ "tomb",
89
+ "vietnamese",
90
+ "vietnamese_folk",
91
+ "wings",
92
+ "zoo"
93
+ ],
94
+ "normalize_embeddings": false
95
+ }
filter-search-model/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45f9966ed1b4481b29786c463dc03ddea3ab6529ebbb4229a0c11ba64ea8ad16
3
+ size 90864192
filter-search-model/model_head.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:220dc87103dba7842ddbc931c391a678dfd3ba39670ca503843bef1886204247
3
+ size 285607
filter-search-model/modules.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ },
14
+ {
15
+ "idx": 2,
16
+ "name": "2",
17
+ "path": "2_Normalize",
18
+ "type": "sentence_transformers.models.Normalize"
19
+ }
20
+ ]
filter-search-model/sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 256,
3
+ "do_lower_case": false
4
+ }
filter-search-model/special_tokens_map.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": {
3
+ "content": "[CLS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "mask_token": {
10
+ "content": "[MASK]",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "sep_token": {
24
+ "content": "[SEP]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "unk_token": {
31
+ "content": "[UNK]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
+ }
filter-search-model/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
filter-search-model/tokenizer_config.json ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": false,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": true,
48
+ "extra_special_tokens": {},
49
+ "mask_token": "[MASK]",
50
+ "max_length": 128,
51
+ "model_max_length": 256,
52
+ "never_split": null,
53
+ "pad_to_multiple_of": null,
54
+ "pad_token": "[PAD]",
55
+ "pad_token_type_id": 0,
56
+ "padding_side": "right",
57
+ "sep_token": "[SEP]",
58
+ "stride": 0,
59
+ "strip_accents": null,
60
+ "tokenize_chinese_chars": true,
61
+ "tokenizer_class": "BertTokenizer",
62
+ "truncation_side": "right",
63
+ "truncation_strategy": "longest_first",
64
+ "unk_token": "[UNK]"
65
+ }
filter-search-model/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ setfit
2
+ sentence-transformers
3
+ scikit-learn
4
+ pandas
5
+ gradio