Spaces:
Sleeping
Sleeping
add the model and python app
Browse files- app.py +149 -0
- filter-search-model/1_Pooling/config.json +10 -0
- filter-search-model/README.md +393 -0
- filter-search-model/config.json +25 -0
- filter-search-model/config_sentence_transformers.json +14 -0
- filter-search-model/config_setfit.json +95 -0
- filter-search-model/model.safetensors +3 -0
- filter-search-model/model_head.pkl +3 -0
- filter-search-model/modules.json +20 -0
- filter-search-model/sentence_bert_config.json +4 -0
- filter-search-model/special_tokens_map.json +37 -0
- filter-search-model/tokenizer.json +0 -0
- filter-search-model/tokenizer_config.json +65 -0
- filter-search-model/vocab.txt +0 -0
- requirements.txt +5 -0
app.py
ADDED
|
@@ -0,0 +1,149 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
from setfit import SetFitModel
|
| 3 |
+
from sentence_transformers import SentenceTransformer, util
|
| 4 |
+
import re
|
| 5 |
+
import numpy as np
|
| 6 |
+
|
| 7 |
+
print("Loading Models...")
|
| 8 |
+
try:
|
| 9 |
+
food_model = SetFitModel.from_pretrained("filter-search-model")
|
| 10 |
+
vector_model = SentenceTransformer('all-MiniLM-L6-v2')
|
| 11 |
+
print("Models Loaded Successfully!")
|
| 12 |
+
except Exception as e:
|
| 13 |
+
print(f"Error loading models: {e}")
|
| 14 |
+
print("Did you upload the 'filter-search-model' folder correctly?")
|
| 15 |
+
|
| 16 |
+
tag_definitions = {
|
| 17 |
+
# --- RELIGION ---
|
| 18 |
+
"religion": {
|
| 19 |
+
'none': 'secular non-religious atheism no religion',
|
| 20 |
+
'taoist': 'taoist taoism yin yang dao',
|
| 21 |
+
'chinese_folk': 'chinese folk religion ancestor worship shenism',
|
| 22 |
+
'buddhist': 'buddhist temple buddhism monk zen lotus',
|
| 23 |
+
'christian': 'christian church jesus catholic protestant cross bible',
|
| 24 |
+
'hinduism': 'hindu temple hinduism shiva vishnu',
|
| 25 |
+
'vietnamese_folk': 'vietnamese folk religion mother goddess dao mau thanh mau',
|
| 26 |
+
'khmer': 'khmer religion theravada buddhism cambodian style',
|
| 27 |
+
'muslim': 'muslim islam mosque halal allah'
|
| 28 |
+
},
|
| 29 |
+
# --- ARCHITECTURE STYLE ---
|
| 30 |
+
"arch_style": {
|
| 31 |
+
'french': 'french colonial architecture indochina villa balcony yellow walls shutters',
|
| 32 |
+
'modernism': 'modern architecture modernism brutalist concrete glass sleek minimal',
|
| 33 |
+
'east asian': 'east asian oriental wooden structure curved roof',
|
| 34 |
+
'contemporary': 'contemporary design new recent 21st century current',
|
| 35 |
+
'baroque': 'baroque architecture ornate dramatic detailed grandiose european',
|
| 36 |
+
'rococo': 'rococo style decorative pastel intricate playful',
|
| 37 |
+
'vietnamese': 'traditional vietnamese architecture wooden red tile roof',
|
| 38 |
+
'chinese': 'chinese architecture dragon courtyard feng shui',
|
| 39 |
+
'romanesque': 'romanesque arches thick walls sturdy rounded',
|
| 40 |
+
'gothic': 'gothic architecture pointed arches stained glass spires cathedral',
|
| 41 |
+
'renaissance': 'renaissance symmetry domes columns classical proportion',
|
| 42 |
+
'neo-romanesque': 'neo-romanesque revival 19th century style',
|
| 43 |
+
'indochine': 'indochine style fusion french colonial and vietnamese tropical',
|
| 44 |
+
'neo-baroque': 'neo-baroque revival grand opera house style',
|
| 45 |
+
'hindu': 'hindu architecture cham style champa towers sandstone',
|
| 46 |
+
'art deco': 'art deco retro geometric stylized 1920s 1930s',
|
| 47 |
+
'khmer': 'khmer architecture angkor wat style stone temple prasat',
|
| 48 |
+
'islamic': 'islamic architecture domes minarets geometric patterns',
|
| 49 |
+
'flamboyant': 'flamboyant gothic flame-like intricate stone tracery',
|
| 50 |
+
'beaux arts': 'beaux arts style monumental classical grand french school',
|
| 51 |
+
'art nouveau': 'art nouveau organic lines flowery decorative curved'
|
| 52 |
+
},
|
| 53 |
+
# --- BUILDING TYPE ---
|
| 54 |
+
"building_type": {
|
| 55 |
+
'marketplace': 'marketplace market bazaar shopping buy food groceries ben thanh',
|
| 56 |
+
'community_centre': 'community center public gathering cultural house hall',
|
| 57 |
+
'courthouse': 'courthouse law legal judge justice government',
|
| 58 |
+
'hospital': 'hospital clinic doctor medical emergency healthcare',
|
| 59 |
+
'mortuary': 'mortuary funeral home dead burial services',
|
| 60 |
+
'place_of_worship': 'place of worship temple church pagoda shrine mosque pray spiritual',
|
| 61 |
+
'institution': 'institution school university college education academy library',
|
| 62 |
+
'tomb': 'tomb grave mausoleum cemetery burial site',
|
| 63 |
+
'heritage': 'heritage site historical building landmark monument preservation',
|
| 64 |
+
'zoo': 'zoo animals botanical garden nature park',
|
| 65 |
+
'museum': 'museum exhibition history art gallery display',
|
| 66 |
+
'commercial': 'commercial building shop store mall office business trade',
|
| 67 |
+
'government': 'government building administrative town hall committee',
|
| 68 |
+
'residential': 'residential house home villa apartment living',
|
| 69 |
+
'library': 'library books reading study archive',
|
| 70 |
+
'fountain': 'fountain water feature square plaza',
|
| 71 |
+
'theatre': 'theatre opera house performance stage cinema arts',
|
| 72 |
+
'pagoda': 'pagoda tiered tower buddhist temple asian',
|
| 73 |
+
'park': 'park green space garden nature relax trees',
|
| 74 |
+
'reservoir': 'reservoir water lake dam utility',
|
| 75 |
+
'restaurant': 'restaurant place to eat food dining hungry lunch dinner eatery',
|
| 76 |
+
'cafe': 'cafe coffee shop drink sip tea chill work',
|
| 77 |
+
}
|
| 78 |
+
}
|
| 79 |
+
|
| 80 |
+
# --- 3. PRE-COMPUTE VECTORS ---
|
| 81 |
+
print("Pre-computing vectors...")
|
| 82 |
+
encoded_db = {}
|
| 83 |
+
for category, tags_map in tag_definitions.items():
|
| 84 |
+
encoded_db[category] = {
|
| 85 |
+
'embeddings': vector_model.encode(list(tags_map.values()), convert_to_tensor=True),
|
| 86 |
+
'names': list(tags_map.keys())
|
| 87 |
+
}
|
| 88 |
+
print("Ready.")
|
| 89 |
+
|
| 90 |
+
# --- 4. THE EXTRACTION LOGIC ---
|
| 91 |
+
def extract_tags(user_text):
|
| 92 |
+
detected_data = {}
|
| 93 |
+
|
| 94 |
+
# A. Age Extraction
|
| 95 |
+
match = re.search(r'\b(1[7-9]|20)\d{2}s?\b', user_text)
|
| 96 |
+
detected_data['age'] = match.group(0).rstrip('s') if match else None
|
| 97 |
+
|
| 98 |
+
# B. Vector Search (Arch/Religion/Building)
|
| 99 |
+
user_embedding = vector_model.encode(user_text, convert_to_tensor=True)
|
| 100 |
+
base_threshold = 0.28
|
| 101 |
+
|
| 102 |
+
for category in ["religion", "arch_style", "building_type"]:
|
| 103 |
+
db_data = encoded_db[category]
|
| 104 |
+
scores = util.cos_sim(user_embedding, db_data['embeddings'])[0]
|
| 105 |
+
|
| 106 |
+
candidates = []
|
| 107 |
+
for idx, score in enumerate(scores):
|
| 108 |
+
if score > base_threshold:
|
| 109 |
+
candidates.append((db_data['names'][idx], score.item()))
|
| 110 |
+
|
| 111 |
+
candidates.sort(key=lambda x: x[1], reverse=True)
|
| 112 |
+
|
| 113 |
+
if candidates:
|
| 114 |
+
best_score = candidates[0][1]
|
| 115 |
+
relative_cutoff = best_score * 0.85
|
| 116 |
+
final_tags = [t[0] for t in candidates if t[1] >= relative_cutoff]
|
| 117 |
+
detected_data[category] = final_tags
|
| 118 |
+
else:
|
| 119 |
+
detected_data[category] = None
|
| 120 |
+
|
| 121 |
+
# C. Smart Food Logic (SetFit)
|
| 122 |
+
food_triggers = ['restaurant', 'cafe', 'street_food', 'marketplace']
|
| 123 |
+
buildings = detected_data.get('building_type') or []
|
| 124 |
+
is_food_venue = any(b in food_triggers for b in buildings)
|
| 125 |
+
|
| 126 |
+
# Run Prediction
|
| 127 |
+
prediction_array = food_model.predict([user_text])
|
| 128 |
+
prediction_clean = prediction_array.tolist()
|
| 129 |
+
|
| 130 |
+
# Trust logic: Only return food tags if venue is found OR if the prediction is extremely confident
|
| 131 |
+
# For now, we stick to your venue rule:
|
| 132 |
+
if is_food_venue:
|
| 133 |
+
detected_data['food_type'] = prediction_clean
|
| 134 |
+
else:
|
| 135 |
+
detected_data['food_type'] = None
|
| 136 |
+
|
| 137 |
+
return detected_data
|
| 138 |
+
|
| 139 |
+
# --- 5. LAUNCH API ---
|
| 140 |
+
# We define the Input/Output types clearly for the API
|
| 141 |
+
iface = gr.Interface(
|
| 142 |
+
fn=extract_tags,
|
| 143 |
+
inputs=gr.Textbox(label="User Search", placeholder="e.g. I want a french villa"),
|
| 144 |
+
outputs=gr.JSON(label="Extracted Tags"),
|
| 145 |
+
title="Vietnam Travel AI API",
|
| 146 |
+
description="Send a POST request to /api/predict with {'data': ['your query']} to get JSON tags."
|
| 147 |
+
)
|
| 148 |
+
|
| 149 |
+
iface.launch()
|
filter-search-model/1_Pooling/config.json
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"word_embedding_dimension": 384,
|
| 3 |
+
"pooling_mode_cls_token": false,
|
| 4 |
+
"pooling_mode_mean_tokens": true,
|
| 5 |
+
"pooling_mode_max_tokens": false,
|
| 6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
| 7 |
+
"pooling_mode_weightedmean_tokens": false,
|
| 8 |
+
"pooling_mode_lasttoken": false,
|
| 9 |
+
"include_prompt": true
|
| 10 |
+
}
|
filter-search-model/README.md
ADDED
|
@@ -0,0 +1,393 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
tags:
|
| 3 |
+
- setfit
|
| 4 |
+
- sentence-transformers
|
| 5 |
+
- text-classification
|
| 6 |
+
- generated_from_setfit_trainer
|
| 7 |
+
widget:
|
| 8 |
+
- text: Show me a university location
|
| 9 |
+
- text: Best vietnam restaurant
|
| 10 |
+
- text: crepe pancake french dessert sweet
|
| 11 |
+
- text: Where can I find pot food?
|
| 12 |
+
- text: fountain water feature square plaza
|
| 13 |
+
metrics:
|
| 14 |
+
- accuracy
|
| 15 |
+
pipeline_tag: text-classification
|
| 16 |
+
library_name: setfit
|
| 17 |
+
inference: true
|
| 18 |
+
base_model: sentence-transformers/all-MiniLM-L6-v2
|
| 19 |
+
---
|
| 20 |
+
|
| 21 |
+
# SetFit with sentence-transformers/all-MiniLM-L6-v2
|
| 22 |
+
|
| 23 |
+
This is a [SetFit](https://github.com/huggingface/setfit) model that can be used for Text Classification. This SetFit model uses [sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) as the Sentence Transformer embedding model. A [LogisticRegression](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html) instance is used for classification.
|
| 24 |
+
|
| 25 |
+
The model has been trained using an efficient few-shot learning technique that involves:
|
| 26 |
+
|
| 27 |
+
1. Fine-tuning a [Sentence Transformer](https://www.sbert.net) with contrastive learning.
|
| 28 |
+
2. Training a classification head with features from the fine-tuned Sentence Transformer.
|
| 29 |
+
|
| 30 |
+
## Model Details
|
| 31 |
+
|
| 32 |
+
### Model Description
|
| 33 |
+
- **Model Type:** SetFit
|
| 34 |
+
- **Sentence Transformer body:** [sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2)
|
| 35 |
+
- **Classification head:** a [LogisticRegression](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html) instance
|
| 36 |
+
- **Maximum Sequence Length:** 256 tokens
|
| 37 |
+
- **Number of Classes:** 90 classes
|
| 38 |
+
<!-- - **Training Dataset:** [Unknown](https://huggingface.co/datasets/unknown) -->
|
| 39 |
+
<!-- - **Language:** Unknown -->
|
| 40 |
+
<!-- - **License:** Unknown -->
|
| 41 |
+
|
| 42 |
+
### Model Sources
|
| 43 |
+
|
| 44 |
+
- **Repository:** [SetFit on GitHub](https://github.com/huggingface/setfit)
|
| 45 |
+
- **Paper:** [Efficient Few-Shot Learning Without Prompts](https://arxiv.org/abs/2209.11055)
|
| 46 |
+
- **Blogpost:** [SetFit: Efficient Few-Shot Learning Without Prompts](https://huggingface.co/blog/setfit)
|
| 47 |
+
|
| 48 |
+
### Model Labels
|
| 49 |
+
| Label | Examples |
|
| 50 |
+
|:----------------------|:----------------------------------------------------------------------------------------------------------------------------------------------------|
|
| 51 |
+
| mexican | <ul><li>'Best salsa restaurant'</li><li>'Best taco restaurant'</li><li>'Looking for a taco meal'</li></ul> |
|
| 52 |
+
| korean | <ul><li>'Looking for a k-food meal'</li><li>'Serving k-food dishes'</li><li>'I want to eat k-food'</li></ul> |
|
| 53 |
+
| zoo | <ul><li>'Find a nature in the city'</li><li>'Find a zoo in the city'</li><li>'I need to go to a zoo'</li></ul> |
|
| 54 |
+
| lebanese | <ul><li>'Looking for a hummus meal'</li><li>'I need some middle'</li><li>'I want to eat lebanese'</li></ul> |
|
| 55 |
+
| chinese | <ul><li>'Best noodles restaurant'</li><li>'Best wok restaurant'</li><li>'Delicious wok cuisine'</li></ul> |
|
| 56 |
+
| tomb | <ul><li>'Find a tomb in the city'</li><li>'Find a grave in the city'</li><li>'Where is the nearest tomb?'</li></ul> |
|
| 57 |
+
| french | <ul><li>'I am craving fine'</li><li>'Examples of colonial structures'</li><li>'I want to eat fine'</li></ul> |
|
| 58 |
+
| thai | <ul><li>'Best thai restaurant'</li><li>'Serving pad dishes'</li><li>'I need some curry'</li></ul> |
|
| 59 |
+
| steamed fish | <ul><li>'I am craving steamed fish'</li><li>'I need some steamed fish'</li><li>'Looking for a steamed fish meal'</li></ul> |
|
| 60 |
+
| modernism | <ul><li>'Show me sleek architecture'</li><li>'Find me a sleek villa'</li><li>'Show me concrete architecture'</li></ul> |
|
| 61 |
+
| diner | <ul><li>'Delicious comfort cuisine'</li><li>'I need some cheap'</li><li>'Best eatery restaurant'</li></ul> |
|
| 62 |
+
| marketplace | <ul><li>'Show me a marketplace location'</li><li>'I need to go to a groceries'</li><li>'Find a market in the city'</li></ul> |
|
| 63 |
+
| reservoir | <ul><li>'Show me a dam location'</li><li>'reservoir water lake dam utility'</li><li>'Find a reservoir in the city'</li></ul> |
|
| 64 |
+
| coffee | <ul><li>'I am craving coffee'</li><li>'Delicious phe cuisine'</li><li>'Looking for a cappuccino meal'</li></ul> |
|
| 65 |
+
| hue style | <ul><li>'I am craving vietnam'</li><li>'Looking for a noodle meal'</li><li>'Where can I find central food?'</li></ul> |
|
| 66 |
+
| khmer | <ul><li>'Examples of stone structures'</li><li>'Locations for cambodian followers'</li><li>'Find me a temple villa'</li></ul> |
|
| 67 |
+
| commercial | <ul><li>'Where is the nearest business?'</li><li>'Show me a mall location'</li><li>'Where is the nearest commercial?'</li></ul> |
|
| 68 |
+
| japanese | <ul><li>'Delicious sashimi cuisine'</li><li>'Best udon restaurant'</li><li>'Looking for a tempura meal'</li></ul> |
|
| 69 |
+
| vietnamese | <ul><li>'Best cuisine restaurant'</li><li>'Where can I find local food?'</li><li>'Looking for a tam meal'</li></ul> |
|
| 70 |
+
| contemporary | <ul><li>'Looking for recent vibes'</li><li>'Find me a design villa'</li><li>'Looking for 21st vibes'</li></ul> |
|
| 71 |
+
| pizza | <ul><li>'I need some pizza'</li><li>'I am craving italian'</li><li>'Where can I find pie food?'</li></ul> |
|
| 72 |
+
| theatre | <ul><li>'I need to go to a theatre'</li><li>'Show me a theatre location'</li><li>'theatre opera house performance stage cinema arts'</li></ul> |
|
| 73 |
+
| library | <ul><li>'I need to go to a study'</li><li>'Where is the nearest archive?'</li><li>'Find a books in the city'</li></ul> |
|
| 74 |
+
| neo-romanesque | <ul><li>'Find me a 19th villa'</li><li>'Show me 19th architecture'</li><li>'Looking for 19th vibes'</li></ul> |
|
| 75 |
+
| sushi | <ul><li>'Looking for a fish meal'</li><li>'I am craving maki'</li><li>'I am craving nigiri'</li></ul> |
|
| 76 |
+
| nướng (grill) | <ul><li>'I want to eat nướng (grill)'</li><li>'I am craving nướng (grill)'</li><li>'Serving nướng (grill) dishes'</li></ul> |
|
| 77 |
+
| indochine | <ul><li>'Find me a and villa'</li><li>'I want to see a fusion style building'</li><li>'Houses with and design'</li></ul> |
|
| 78 |
+
| mediterranean | <ul><li>'I want to eat greek'</li><li>'Delicious healthy cuisine'</li><li>'Serving greek dishes'</li></ul> |
|
| 79 |
+
| sandwich | <ul><li>'Serving sub dishes'</li><li>'I want to eat panini'</li><li>'Looking for a sub meal'</li></ul> |
|
| 80 |
+
| wings | <ul><li>'I want to eat wings'</li><li>'Serving buffalo dishes'</li><li>'Where can I find buffalo food?'</li></ul> |
|
| 81 |
+
| international | <ul><li>'Looking for a foreign meal'</li><li>'Serving western dishes'</li><li>'I want to eat imported'</li></ul> |
|
| 82 |
+
| mortuary | <ul><li>'Where is the nearest burial?'</li><li>'Show me a mortuary location'</li><li>'Show me a services location'</li></ul> |
|
| 83 |
+
| ice cream | <ul><li>'I need some sweet'</li><li>'Delicious cold cuisine'</li><li>'Serving gelato dishes'</li></ul> |
|
| 84 |
+
| baroque | <ul><li>'Houses with baroque design'</li><li>'Find me a ornate villa'</li><li>'I want to see a european style building'</li></ul> |
|
| 85 |
+
| romanesque | <ul><li>'Show me rounded architecture'</li><li>'Looking for thick vibes'</li><li>'Houses with thick design'</li></ul> |
|
| 86 |
+
| hospital | <ul><li>'I need to go to a doctor'</li><li>'Show me a emergency location'</li><li>'Where is the nearest clinic?'</li></ul> |
|
| 87 |
+
| art nouveau | <ul><li>'I want to see a lines style building'</li><li>'Show me lines architecture'</li><li>'Looking for organic vibes'</li></ul> |
|
| 88 |
+
| curry | <ul><li>'Delicious coconut cuisine'</li><li>'I want to eat coconut'</li><li>'I want to eat stew'</li></ul> |
|
| 89 |
+
| neo-baroque | <ul><li>'Looking for house vibes'</li><li>'Houses with opera design'</li><li>'Show me opera architecture'</li></ul> |
|
| 90 |
+
| argentinian | <ul><li>'Where can I find argentinian food?'</li><li>'Serving american dishes'</li><li>'argentinian steak latin american meat asado'</li></ul> |
|
| 91 |
+
| grill | <ul><li>'I want to eat fire'</li><li>'Best smoke restaurant'</li><li>'Best meat restaurant'</li></ul> |
|
| 92 |
+
| christian | <ul><li>'Locations for church followers'</li><li>'I want to visit a catholic place of worship'</li><li>'Locations for cross followers'</li></ul> |
|
| 93 |
+
| burger | <ul><li>'Delicious burger cuisine'</li><li>'Best fast restaurant'</li><li>'I need some fast'</li></ul> |
|
| 94 |
+
| rococo | <ul><li>'Looking for pastel vibes'</li><li>'Find me a decorative villa'</li><li>'Show me playful architecture'</li></ul> |
|
| 95 |
+
| muslim | <ul><li>'Find a mosque temple'</li><li>'Locations for muslim followers'</li><li>'Find a halal temple'</li></ul> |
|
| 96 |
+
| hinduism | <ul><li>'Find a hindu temple'</li><li>'I want to visit a hindu place of worship'</li><li>'I want to visit a shiva place of worship'</li></ul> |
|
| 97 |
+
| seafood | <ul><li>'Looking for a snail meal'</li><li>'Delicious crab cuisine'</li><li>'I need some shell'</li></ul> |
|
| 98 |
+
| asian | <ul><li>'Looking for a eastern meal'</li><li>'I need some oriental'</li><li>'I want to eat asian'</li></ul> |
|
| 99 |
+
| cafe | <ul><li>'Find a sip in the city'</li><li>'Where is the nearest drink?'</li><li>'I need to go to a drink'</li></ul> |
|
| 100 |
+
| park | <ul><li>'I need to go to a trees'</li><li>'Show me a space location'</li><li>'Find a relax in the city'</li></ul> |
|
| 101 |
+
| local food | <ul><li>'Where can I find specialty food?'</li><li>'Looking for a street meal'</li><li>'Serving specialty dishes'</li></ul> |
|
| 102 |
+
| thai hotpot | <ul><li>'I need some sour'</li><li>'Best sour restaurant'</li><li>'Best thai hotpot restaurant'</li></ul> |
|
| 103 |
+
| flamboyant | <ul><li>'Examples of flame-like structures'</li><li>'Houses with flame-like design'</li><li>'Find me a flame-like villa'</li></ul> |
|
| 104 |
+
| gothic | <ul><li>'Show me pointed architecture'</li><li>'Show me gothic architecture'</li><li>'Examples of gothic structures'</li></ul> |
|
| 105 |
+
| buddhist | <ul><li>'I want to visit a monk place of worship'</li><li>'Locations for buddhism followers'</li><li>'Locations for buddhist followers'</li></ul> |
|
| 106 |
+
| art deco | <ul><li>'I want to see a stylized style building'</li><li>'Houses with retro design'</li><li>'Find me a retro villa'</li></ul> |
|
| 107 |
+
| pagoda | <ul><li>'Where is the nearest asian?'</li><li>'Show me a tiered location'</li><li>'Where is the nearest tower?'</li></ul> |
|
| 108 |
+
| islamic | <ul><li>'I want to see a patterns style building'</li><li>'Examples of minarets structures'</li><li>'Examples of patterns structures'</li></ul> |
|
| 109 |
+
| heritage | <ul><li>'Where is the nearest heritage?'</li><li>'Find a monument in the city'</li><li>'I need to go to a heritage'</li></ul> |
|
| 110 |
+
| government | <ul><li>'Find a committee in the city'</li><li>'government building administrative town hall committee'</li><li>'Show me a town location'</li></ul> |
|
| 111 |
+
| steakhouse | <ul><li>'Delicious steakhouse cuisine'</li><li>'Serving steakhouse dishes'</li><li>'I am craving steakhouse'</li></ul> |
|
| 112 |
+
| hotpot | <ul><li>'I want to eat pot'</li><li>'hotpot lau soup pot shabu shabu sharing'</li><li>'Where can I find hotpot food?'</li></ul> |
|
| 113 |
+
| banh cuon | <ul><li>'Delicious breakfast cuisine'</li><li>'Looking for a rice meal'</li><li>'I am craving light'</li></ul> |
|
| 114 |
+
| noodles | <ul><li>'Where can I find vermicelli food?'</li><li>'I am craving soup'</li><li>'I need some vermicelli'</li></ul> |
|
| 115 |
+
| fine dining | <ul><li>'I am craving fine dining'</li><li>'I am craving upscale'</li><li>'Looking for a luxury meal'</li></ul> |
|
| 116 |
+
| stir-fried vegetables | <ul><li>'Serving vegetarian dishes'</li><li>'I need some vegetarian'</li><li>'Best glory restaurant'</li></ul> |
|
| 117 |
+
| courthouse | <ul><li>'I need to go to a law'</li><li>'Find a legal in the city'</li><li>'I need to go to a justice'</li></ul> |
|
| 118 |
+
| none | <ul><li>'I want to visit a atheism place of worship'</li><li>'Find a atheism temple'</li><li>'Locations for atheism followers'</li></ul> |
|
| 119 |
+
| grilled fish | <ul><li>'Where can I find grilled fish food?'</li><li>'grilled fish ca nuong seafood'</li><li>'Best grilled fish restaurant'</li></ul> |
|
| 120 |
+
| italian | <ul><li>'I want to eat pasta'</li><li>'Delicious rome cuisine'</li><li>'Where can I find spaghetti food?'</li></ul> |
|
| 121 |
+
| beef bowl | <ul><li>'I need some gyudon'</li><li>'Where can I find gyudon food?'</li><li>'Where can I find beef bowl food?'</li></ul> |
|
| 122 |
+
| east asian | <ul><li>'Find me a roof villa'</li><li>'Show me roof architecture'</li><li>'Find me a structure villa'</li></ul> |
|
| 123 |
+
| museum | <ul><li>'I need to go to a gallery'</li><li>'Find a exhibition in the city'</li><li>'Find a art in the city'</li></ul> |
|
| 124 |
+
| place_of_worship | <ul><li>'Show me a pray location'</li><li>'I need to go to a pagoda'</li><li>'Find a church in the city'</li></ul> |
|
| 125 |
+
| barbecue | <ul><li>'Where can I find smokehouse food?'</li><li>'Serving brisket dishes'</li><li>'barbecue bbq ribs brisket smokehouse'</li></ul> |
|
| 126 |
+
| residential | <ul><li>'I need to go to a villa'</li><li>'I need to go to a apartment'</li><li>'Show me a living location'</li></ul> |
|
| 127 |
+
| renaissance | <ul><li>'I want to see a domes style building'</li><li>'Houses with classical design'</li><li>'Find me a symmetry villa'</li></ul> |
|
| 128 |
+
| crepe | <ul><li>'Best crepe restaurant'</li><li>'Serving pancake dishes'</li><li>'Serving crepe dishes'</li></ul> |
|
| 129 |
+
| institution | <ul><li>'Show me a institution location'</li><li>'Show me a education location'</li><li>'Where is the nearest institution?'</li></ul> |
|
| 130 |
+
| indian | <ul><li>'Where can I find naan food?'</li><li>'Serving masala dishes'</li><li>'I am craving naan'</li></ul> |
|
| 131 |
+
| community_centre | <ul><li>'Show me a public location'</li><li>'Where is the nearest house?'</li><li>'Find a center in the city'</li></ul> |
|
| 132 |
+
| fountain | <ul><li>'Show me a water location'</li><li>'I need to go to a water'</li><li>'Find a water in the city'</li></ul> |
|
| 133 |
+
| hindu | <ul><li>'Find me a hindu villa'</li><li>'Houses with hindu design'</li><li>'I want to see a champa style building'</li></ul> |
|
| 134 |
+
| chinese_folk | <ul><li>'Locations for worship followers'</li><li>'Find a ancestor temple'</li><li>'Locations for shenism followers'</li></ul> |
|
| 135 |
+
| restaurant | <ul><li>'I need to go to a dining'</li><li>'Find a lunch in the city'</li><li>'Where is the nearest dinner?'</li></ul> |
|
| 136 |
+
| beaux arts | <ul><li>'Houses with school design'</li><li>'Show me beaux arts architecture'</li><li>'Houses with beaux arts design'</li></ul> |
|
| 137 |
+
| vietnamese_folk | <ul><li>'I want to visit a mother place of worship'</li><li>'Locations for thanh followers'</li><li>'Locations for mau followers'</li></ul> |
|
| 138 |
+
| american | <ul><li>'Serving usa dishes'</li><li>'Delicious usa cuisine'</li><li>'american food usa burger fries steak'</li></ul> |
|
| 139 |
+
| taoist | <ul><li>'I want to visit a yin place of worship'</li><li>'I want to visit a taoism place of worship'</li><li>'Find a taoist temple'</li></ul> |
|
| 140 |
+
| pasta | <ul><li>'Delicious carbonara cuisine'</li><li>'I want to eat carbonara'</li><li>'Where can I find carbonara food?'</li></ul> |
|
| 141 |
+
|
| 142 |
+
## Uses
|
| 143 |
+
|
| 144 |
+
### Direct Use for Inference
|
| 145 |
+
|
| 146 |
+
First install the SetFit library:
|
| 147 |
+
|
| 148 |
+
```bash
|
| 149 |
+
pip install setfit
|
| 150 |
+
```
|
| 151 |
+
|
| 152 |
+
Then you can load this model and run inference.
|
| 153 |
+
|
| 154 |
+
```python
|
| 155 |
+
from setfit import SetFitModel
|
| 156 |
+
|
| 157 |
+
# Download from the 🤗 Hub
|
| 158 |
+
model = SetFitModel.from_pretrained("setfit_model_id")
|
| 159 |
+
# Run inference
|
| 160 |
+
preds = model("Best vietnam restaurant")
|
| 161 |
+
```
|
| 162 |
+
|
| 163 |
+
<!--
|
| 164 |
+
### Downstream Use
|
| 165 |
+
|
| 166 |
+
*List how someone could finetune this model on their own dataset.*
|
| 167 |
+
-->
|
| 168 |
+
|
| 169 |
+
<!--
|
| 170 |
+
### Out-of-Scope Use
|
| 171 |
+
|
| 172 |
+
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
| 173 |
+
-->
|
| 174 |
+
|
| 175 |
+
<!--
|
| 176 |
+
## Bias, Risks and Limitations
|
| 177 |
+
|
| 178 |
+
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
|
| 179 |
+
-->
|
| 180 |
+
|
| 181 |
+
<!--
|
| 182 |
+
### Recommendations
|
| 183 |
+
|
| 184 |
+
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
|
| 185 |
+
-->
|
| 186 |
+
|
| 187 |
+
## Training Details
|
| 188 |
+
|
| 189 |
+
### Training Set Metrics
|
| 190 |
+
| Training set | Min | Median | Max |
|
| 191 |
+
|:-------------|:----|:-------|:----|
|
| 192 |
+
| Word count | 3 | 4.8024 | 10 |
|
| 193 |
+
|
| 194 |
+
| Label | Training Sample Count |
|
| 195 |
+
|:----------------------|:----------------------|
|
| 196 |
+
| american | 17 |
|
| 197 |
+
| argentinian | 36 |
|
| 198 |
+
| art deco | 32 |
|
| 199 |
+
| art nouveau | 20 |
|
| 200 |
+
| asian | 24 |
|
| 201 |
+
| banh cuon | 46 |
|
| 202 |
+
| barbecue | 21 |
|
| 203 |
+
| baroque | 34 |
|
| 204 |
+
| beaux arts | 14 |
|
| 205 |
+
| beef bowl | 15 |
|
| 206 |
+
| buddhist | 23 |
|
| 207 |
+
| burger | 23 |
|
| 208 |
+
| cafe | 27 |
|
| 209 |
+
| chinese | 73 |
|
| 210 |
+
| chinese_folk | 17 |
|
| 211 |
+
| christian | 25 |
|
| 212 |
+
| coffee | 36 |
|
| 213 |
+
| commercial | 25 |
|
| 214 |
+
| community_centre | 33 |
|
| 215 |
+
| contemporary | 42 |
|
| 216 |
+
| courthouse | 25 |
|
| 217 |
+
| crepe | 15 |
|
| 218 |
+
| curry | 24 |
|
| 219 |
+
| diner | 46 |
|
| 220 |
+
| east asian | 31 |
|
| 221 |
+
| fine dining | 37 |
|
| 222 |
+
| flamboyant | 15 |
|
| 223 |
+
| fountain | 18 |
|
| 224 |
+
| french | 83 |
|
| 225 |
+
| gothic | 28 |
|
| 226 |
+
| government | 13 |
|
| 227 |
+
| grill | 48 |
|
| 228 |
+
| grilled fish | 16 |
|
| 229 |
+
| heritage | 20 |
|
| 230 |
+
| hindu | 31 |
|
| 231 |
+
| hinduism | 17 |
|
| 232 |
+
| hospital | 22 |
|
| 233 |
+
| hotpot | 28 |
|
| 234 |
+
| hue style | 43 |
|
| 235 |
+
| ice cream | 36 |
|
| 236 |
+
| indian | 30 |
|
| 237 |
+
| indochine | 24 |
|
| 238 |
+
| institution | 26 |
|
| 239 |
+
| international | 34 |
|
| 240 |
+
| islamic | 18 |
|
| 241 |
+
| italian | 20 |
|
| 242 |
+
| japanese | 45 |
|
| 243 |
+
| khmer | 44 |
|
| 244 |
+
| korean | 37 |
|
| 245 |
+
| lebanese | 31 |
|
| 246 |
+
| library | 14 |
|
| 247 |
+
| local food | 37 |
|
| 248 |
+
| marketplace | 28 |
|
| 249 |
+
| mediterranean | 45 |
|
| 250 |
+
| mexican | 29 |
|
| 251 |
+
| modernism | 40 |
|
| 252 |
+
| mortuary | 24 |
|
| 253 |
+
| museum | 23 |
|
| 254 |
+
| muslim | 20 |
|
| 255 |
+
| neo-baroque | 23 |
|
| 256 |
+
| neo-romanesque | 18 |
|
| 257 |
+
| none | 20 |
|
| 258 |
+
| noodles | 14 |
|
| 259 |
+
| nướng (grill) | 16 |
|
| 260 |
+
| pagoda | 15 |
|
| 261 |
+
| park | 16 |
|
| 262 |
+
| pasta | 8 |
|
| 263 |
+
| pizza | 35 |
|
| 264 |
+
| place_of_worship | 28 |
|
| 265 |
+
| renaissance | 30 |
|
| 266 |
+
| reservoir | 16 |
|
| 267 |
+
| residential | 16 |
|
| 268 |
+
| restaurant | 27 |
|
| 269 |
+
| rococo | 29 |
|
| 270 |
+
| romanesque | 27 |
|
| 271 |
+
| sandwich | 23 |
|
| 272 |
+
| seafood | 39 |
|
| 273 |
+
| steakhouse | 14 |
|
| 274 |
+
| steamed fish | 17 |
|
| 275 |
+
| stir-fried vegetables | 43 |
|
| 276 |
+
| sushi | 36 |
|
| 277 |
+
| taoist | 16 |
|
| 278 |
+
| thai | 45 |
|
| 279 |
+
| thai hotpot | 23 |
|
| 280 |
+
| theatre | 23 |
|
| 281 |
+
| tomb | 16 |
|
| 282 |
+
| vietnamese | 77 |
|
| 283 |
+
| vietnamese_folk | 18 |
|
| 284 |
+
| wings | 33 |
|
| 285 |
+
| zoo | 21 |
|
| 286 |
+
|
| 287 |
+
### Training Hyperparameters
|
| 288 |
+
- batch_size: (16, 16)
|
| 289 |
+
- num_epochs: (1, 16)
|
| 290 |
+
- max_steps: 2000
|
| 291 |
+
- sampling_strategy: oversampling
|
| 292 |
+
- body_learning_rate: (2e-05, 1e-05)
|
| 293 |
+
- head_learning_rate: 0.01
|
| 294 |
+
- loss: CosineSimilarityLoss
|
| 295 |
+
- distance_metric: cosine_distance
|
| 296 |
+
- margin: 0.25
|
| 297 |
+
- end_to_end: False
|
| 298 |
+
- use_amp: False
|
| 299 |
+
- warmup_proportion: 0.1
|
| 300 |
+
- l2_weight: 0.01
|
| 301 |
+
- seed: 42
|
| 302 |
+
- run_name: my_fresh_run_v2
|
| 303 |
+
- evaluation_strategy: steps
|
| 304 |
+
- eval_max_steps: -1
|
| 305 |
+
- load_best_model_at_end: True
|
| 306 |
+
|
| 307 |
+
### Training Results
|
| 308 |
+
| Epoch | Step | Training Loss | Validation Loss |
|
| 309 |
+
|:------:|:----:|:-------------:|:---------------:|
|
| 310 |
+
| 0.0005 | 1 | 0.2922 | - |
|
| 311 |
+
| 0.025 | 50 | 0.2078 | - |
|
| 312 |
+
| 0.05 | 100 | 0.1794 | - |
|
| 313 |
+
| 0.075 | 150 | 0.1687 | - |
|
| 314 |
+
| 0.1 | 200 | 0.1498 | 0.1183 |
|
| 315 |
+
| 0.125 | 250 | 0.1504 | - |
|
| 316 |
+
| 0.15 | 300 | 0.1304 | - |
|
| 317 |
+
| 0.175 | 350 | 0.1323 | - |
|
| 318 |
+
| 0.2 | 400 | 0.1249 | 0.1001 |
|
| 319 |
+
| 0.225 | 450 | 0.1234 | - |
|
| 320 |
+
| 0.25 | 500 | 0.1223 | - |
|
| 321 |
+
| 0.275 | 550 | 0.1133 | - |
|
| 322 |
+
| 0.3 | 600 | 0.1106 | 0.0899 |
|
| 323 |
+
| 0.325 | 650 | 0.1026 | - |
|
| 324 |
+
| 0.35 | 700 | 0.1069 | - |
|
| 325 |
+
| 0.375 | 750 | 0.0975 | - |
|
| 326 |
+
| 0.4 | 800 | 0.0994 | 0.0772 |
|
| 327 |
+
| 0.425 | 850 | 0.1006 | - |
|
| 328 |
+
| 0.45 | 900 | 0.0979 | - |
|
| 329 |
+
| 0.475 | 950 | 0.0817 | - |
|
| 330 |
+
| 0.5 | 1000 | 0.0966 | 0.0683 |
|
| 331 |
+
| 0.525 | 1050 | 0.0856 | - |
|
| 332 |
+
| 0.55 | 1100 | 0.082 | - |
|
| 333 |
+
| 0.575 | 1150 | 0.0767 | - |
|
| 334 |
+
| 0.6 | 1200 | 0.0788 | 0.0620 |
|
| 335 |
+
| 0.625 | 1250 | 0.0705 | - |
|
| 336 |
+
| 0.65 | 1300 | 0.0713 | - |
|
| 337 |
+
| 0.675 | 1350 | 0.0711 | - |
|
| 338 |
+
| 0.7 | 1400 | 0.0713 | 0.0575 |
|
| 339 |
+
| 0.725 | 1450 | 0.074 | - |
|
| 340 |
+
| 0.75 | 1500 | 0.0618 | - |
|
| 341 |
+
| 0.775 | 1550 | 0.0701 | - |
|
| 342 |
+
| 0.8 | 1600 | 0.0674 | 0.0539 |
|
| 343 |
+
| 0.825 | 1650 | 0.0685 | - |
|
| 344 |
+
| 0.85 | 1700 | 0.0678 | - |
|
| 345 |
+
| 0.875 | 1750 | 0.0671 | - |
|
| 346 |
+
| 0.9 | 1800 | 0.0657 | 0.0517 |
|
| 347 |
+
| 0.925 | 1850 | 0.0593 | - |
|
| 348 |
+
| 0.95 | 1900 | 0.0641 | - |
|
| 349 |
+
| 0.975 | 1950 | 0.0659 | - |
|
| 350 |
+
| 1.0 | 2000 | 0.0629 | 0.0508 |
|
| 351 |
+
|
| 352 |
+
### Framework Versions
|
| 353 |
+
- Python: 3.12.12
|
| 354 |
+
- SetFit: 1.1.3
|
| 355 |
+
- Sentence Transformers: 5.1.2
|
| 356 |
+
- Transformers: 4.57.2
|
| 357 |
+
- PyTorch: 2.9.0+cu126
|
| 358 |
+
- Datasets: 4.0.0
|
| 359 |
+
- Tokenizers: 0.22.1
|
| 360 |
+
|
| 361 |
+
## Citation
|
| 362 |
+
|
| 363 |
+
### BibTeX
|
| 364 |
+
```bibtex
|
| 365 |
+
@article{https://doi.org/10.48550/arxiv.2209.11055,
|
| 366 |
+
doi = {10.48550/ARXIV.2209.11055},
|
| 367 |
+
url = {https://arxiv.org/abs/2209.11055},
|
| 368 |
+
author = {Tunstall, Lewis and Reimers, Nils and Jo, Unso Eun Seo and Bates, Luke and Korat, Daniel and Wasserblat, Moshe and Pereg, Oren},
|
| 369 |
+
keywords = {Computation and Language (cs.CL), FOS: Computer and information sciences, FOS: Computer and information sciences},
|
| 370 |
+
title = {Efficient Few-Shot Learning Without Prompts},
|
| 371 |
+
publisher = {arXiv},
|
| 372 |
+
year = {2022},
|
| 373 |
+
copyright = {Creative Commons Attribution 4.0 International}
|
| 374 |
+
}
|
| 375 |
+
```
|
| 376 |
+
|
| 377 |
+
<!--
|
| 378 |
+
## Glossary
|
| 379 |
+
|
| 380 |
+
*Clearly define terms in order to be accessible across audiences.*
|
| 381 |
+
-->
|
| 382 |
+
|
| 383 |
+
<!--
|
| 384 |
+
## Model Card Authors
|
| 385 |
+
|
| 386 |
+
*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
|
| 387 |
+
-->
|
| 388 |
+
|
| 389 |
+
<!--
|
| 390 |
+
## Model Card Contact
|
| 391 |
+
|
| 392 |
+
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
|
| 393 |
+
-->
|
filter-search-model/config.json
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"BertModel"
|
| 4 |
+
],
|
| 5 |
+
"attention_probs_dropout_prob": 0.1,
|
| 6 |
+
"classifier_dropout": null,
|
| 7 |
+
"dtype": "float32",
|
| 8 |
+
"gradient_checkpointing": false,
|
| 9 |
+
"hidden_act": "gelu",
|
| 10 |
+
"hidden_dropout_prob": 0.1,
|
| 11 |
+
"hidden_size": 384,
|
| 12 |
+
"initializer_range": 0.02,
|
| 13 |
+
"intermediate_size": 1536,
|
| 14 |
+
"layer_norm_eps": 1e-12,
|
| 15 |
+
"max_position_embeddings": 512,
|
| 16 |
+
"model_type": "bert",
|
| 17 |
+
"num_attention_heads": 12,
|
| 18 |
+
"num_hidden_layers": 6,
|
| 19 |
+
"pad_token_id": 0,
|
| 20 |
+
"position_embedding_type": "absolute",
|
| 21 |
+
"transformers_version": "4.57.2",
|
| 22 |
+
"type_vocab_size": 2,
|
| 23 |
+
"use_cache": true,
|
| 24 |
+
"vocab_size": 30522
|
| 25 |
+
}
|
filter-search-model/config_sentence_transformers.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"__version__": {
|
| 3 |
+
"sentence_transformers": "5.1.2",
|
| 4 |
+
"transformers": "4.57.2",
|
| 5 |
+
"pytorch": "2.9.0+cu126"
|
| 6 |
+
},
|
| 7 |
+
"model_type": "SentenceTransformer",
|
| 8 |
+
"prompts": {
|
| 9 |
+
"query": "",
|
| 10 |
+
"document": ""
|
| 11 |
+
},
|
| 12 |
+
"default_prompt_name": null,
|
| 13 |
+
"similarity_fn_name": "cosine"
|
| 14 |
+
}
|
filter-search-model/config_setfit.json
ADDED
|
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"labels": [
|
| 3 |
+
"american",
|
| 4 |
+
"argentinian",
|
| 5 |
+
"art deco",
|
| 6 |
+
"art nouveau",
|
| 7 |
+
"asian",
|
| 8 |
+
"banh cuon",
|
| 9 |
+
"barbecue",
|
| 10 |
+
"baroque",
|
| 11 |
+
"beaux arts",
|
| 12 |
+
"beef bowl",
|
| 13 |
+
"buddhist",
|
| 14 |
+
"burger",
|
| 15 |
+
"cafe",
|
| 16 |
+
"chinese",
|
| 17 |
+
"chinese_folk",
|
| 18 |
+
"christian",
|
| 19 |
+
"coffee",
|
| 20 |
+
"commercial",
|
| 21 |
+
"community_centre",
|
| 22 |
+
"contemporary",
|
| 23 |
+
"courthouse",
|
| 24 |
+
"crepe",
|
| 25 |
+
"curry",
|
| 26 |
+
"diner",
|
| 27 |
+
"east asian",
|
| 28 |
+
"fine dining",
|
| 29 |
+
"flamboyant",
|
| 30 |
+
"fountain",
|
| 31 |
+
"french",
|
| 32 |
+
"gothic",
|
| 33 |
+
"government",
|
| 34 |
+
"grill",
|
| 35 |
+
"grilled fish",
|
| 36 |
+
"heritage",
|
| 37 |
+
"hindu",
|
| 38 |
+
"hinduism",
|
| 39 |
+
"hospital",
|
| 40 |
+
"hotpot",
|
| 41 |
+
"hue style",
|
| 42 |
+
"ice cream",
|
| 43 |
+
"indian",
|
| 44 |
+
"indochine",
|
| 45 |
+
"institution",
|
| 46 |
+
"international",
|
| 47 |
+
"islamic",
|
| 48 |
+
"italian",
|
| 49 |
+
"japanese",
|
| 50 |
+
"khmer",
|
| 51 |
+
"korean",
|
| 52 |
+
"lebanese",
|
| 53 |
+
"library",
|
| 54 |
+
"local food",
|
| 55 |
+
"marketplace",
|
| 56 |
+
"mediterranean",
|
| 57 |
+
"mexican",
|
| 58 |
+
"modernism",
|
| 59 |
+
"mortuary",
|
| 60 |
+
"museum",
|
| 61 |
+
"muslim",
|
| 62 |
+
"neo-baroque",
|
| 63 |
+
"neo-romanesque",
|
| 64 |
+
"none",
|
| 65 |
+
"noodles",
|
| 66 |
+
"n\u01b0\u1edbng (grill)",
|
| 67 |
+
"pagoda",
|
| 68 |
+
"park",
|
| 69 |
+
"pasta",
|
| 70 |
+
"pizza",
|
| 71 |
+
"place_of_worship",
|
| 72 |
+
"renaissance",
|
| 73 |
+
"reservoir",
|
| 74 |
+
"residential",
|
| 75 |
+
"restaurant",
|
| 76 |
+
"rococo",
|
| 77 |
+
"romanesque",
|
| 78 |
+
"sandwich",
|
| 79 |
+
"seafood",
|
| 80 |
+
"steakhouse",
|
| 81 |
+
"steamed fish",
|
| 82 |
+
"stir-fried vegetables",
|
| 83 |
+
"sushi",
|
| 84 |
+
"taoist",
|
| 85 |
+
"thai",
|
| 86 |
+
"thai hotpot",
|
| 87 |
+
"theatre",
|
| 88 |
+
"tomb",
|
| 89 |
+
"vietnamese",
|
| 90 |
+
"vietnamese_folk",
|
| 91 |
+
"wings",
|
| 92 |
+
"zoo"
|
| 93 |
+
],
|
| 94 |
+
"normalize_embeddings": false
|
| 95 |
+
}
|
filter-search-model/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:45f9966ed1b4481b29786c463dc03ddea3ab6529ebbb4229a0c11ba64ea8ad16
|
| 3 |
+
size 90864192
|
filter-search-model/model_head.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:220dc87103dba7842ddbc931c391a678dfd3ba39670ca503843bef1886204247
|
| 3 |
+
size 285607
|
filter-search-model/modules.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"idx": 0,
|
| 4 |
+
"name": "0",
|
| 5 |
+
"path": "",
|
| 6 |
+
"type": "sentence_transformers.models.Transformer"
|
| 7 |
+
},
|
| 8 |
+
{
|
| 9 |
+
"idx": 1,
|
| 10 |
+
"name": "1",
|
| 11 |
+
"path": "1_Pooling",
|
| 12 |
+
"type": "sentence_transformers.models.Pooling"
|
| 13 |
+
},
|
| 14 |
+
{
|
| 15 |
+
"idx": 2,
|
| 16 |
+
"name": "2",
|
| 17 |
+
"path": "2_Normalize",
|
| 18 |
+
"type": "sentence_transformers.models.Normalize"
|
| 19 |
+
}
|
| 20 |
+
]
|
filter-search-model/sentence_bert_config.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"max_seq_length": 256,
|
| 3 |
+
"do_lower_case": false
|
| 4 |
+
}
|
filter-search-model/special_tokens_map.json
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cls_token": {
|
| 3 |
+
"content": "[CLS]",
|
| 4 |
+
"lstrip": false,
|
| 5 |
+
"normalized": false,
|
| 6 |
+
"rstrip": false,
|
| 7 |
+
"single_word": false
|
| 8 |
+
},
|
| 9 |
+
"mask_token": {
|
| 10 |
+
"content": "[MASK]",
|
| 11 |
+
"lstrip": false,
|
| 12 |
+
"normalized": false,
|
| 13 |
+
"rstrip": false,
|
| 14 |
+
"single_word": false
|
| 15 |
+
},
|
| 16 |
+
"pad_token": {
|
| 17 |
+
"content": "[PAD]",
|
| 18 |
+
"lstrip": false,
|
| 19 |
+
"normalized": false,
|
| 20 |
+
"rstrip": false,
|
| 21 |
+
"single_word": false
|
| 22 |
+
},
|
| 23 |
+
"sep_token": {
|
| 24 |
+
"content": "[SEP]",
|
| 25 |
+
"lstrip": false,
|
| 26 |
+
"normalized": false,
|
| 27 |
+
"rstrip": false,
|
| 28 |
+
"single_word": false
|
| 29 |
+
},
|
| 30 |
+
"unk_token": {
|
| 31 |
+
"content": "[UNK]",
|
| 32 |
+
"lstrip": false,
|
| 33 |
+
"normalized": false,
|
| 34 |
+
"rstrip": false,
|
| 35 |
+
"single_word": false
|
| 36 |
+
}
|
| 37 |
+
}
|
filter-search-model/tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
filter-search-model/tokenizer_config.json
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"added_tokens_decoder": {
|
| 3 |
+
"0": {
|
| 4 |
+
"content": "[PAD]",
|
| 5 |
+
"lstrip": false,
|
| 6 |
+
"normalized": false,
|
| 7 |
+
"rstrip": false,
|
| 8 |
+
"single_word": false,
|
| 9 |
+
"special": true
|
| 10 |
+
},
|
| 11 |
+
"100": {
|
| 12 |
+
"content": "[UNK]",
|
| 13 |
+
"lstrip": false,
|
| 14 |
+
"normalized": false,
|
| 15 |
+
"rstrip": false,
|
| 16 |
+
"single_word": false,
|
| 17 |
+
"special": true
|
| 18 |
+
},
|
| 19 |
+
"101": {
|
| 20 |
+
"content": "[CLS]",
|
| 21 |
+
"lstrip": false,
|
| 22 |
+
"normalized": false,
|
| 23 |
+
"rstrip": false,
|
| 24 |
+
"single_word": false,
|
| 25 |
+
"special": true
|
| 26 |
+
},
|
| 27 |
+
"102": {
|
| 28 |
+
"content": "[SEP]",
|
| 29 |
+
"lstrip": false,
|
| 30 |
+
"normalized": false,
|
| 31 |
+
"rstrip": false,
|
| 32 |
+
"single_word": false,
|
| 33 |
+
"special": true
|
| 34 |
+
},
|
| 35 |
+
"103": {
|
| 36 |
+
"content": "[MASK]",
|
| 37 |
+
"lstrip": false,
|
| 38 |
+
"normalized": false,
|
| 39 |
+
"rstrip": false,
|
| 40 |
+
"single_word": false,
|
| 41 |
+
"special": true
|
| 42 |
+
}
|
| 43 |
+
},
|
| 44 |
+
"clean_up_tokenization_spaces": false,
|
| 45 |
+
"cls_token": "[CLS]",
|
| 46 |
+
"do_basic_tokenize": true,
|
| 47 |
+
"do_lower_case": true,
|
| 48 |
+
"extra_special_tokens": {},
|
| 49 |
+
"mask_token": "[MASK]",
|
| 50 |
+
"max_length": 128,
|
| 51 |
+
"model_max_length": 256,
|
| 52 |
+
"never_split": null,
|
| 53 |
+
"pad_to_multiple_of": null,
|
| 54 |
+
"pad_token": "[PAD]",
|
| 55 |
+
"pad_token_type_id": 0,
|
| 56 |
+
"padding_side": "right",
|
| 57 |
+
"sep_token": "[SEP]",
|
| 58 |
+
"stride": 0,
|
| 59 |
+
"strip_accents": null,
|
| 60 |
+
"tokenize_chinese_chars": true,
|
| 61 |
+
"tokenizer_class": "BertTokenizer",
|
| 62 |
+
"truncation_side": "right",
|
| 63 |
+
"truncation_strategy": "longest_first",
|
| 64 |
+
"unk_token": "[UNK]"
|
| 65 |
+
}
|
filter-search-model/vocab.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
requirements.txt
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
setfit
|
| 2 |
+
sentence-transformers
|
| 3 |
+
scikit-learn
|
| 4 |
+
pandas
|
| 5 |
+
gradio
|