import requests
import gradio as gr
import pandas as pd
import time
import json
import re
import os
import torch
from sentence_transformers import SentenceTransformer, util
# ---------------- Cache cleanup ----------------
os.system("rm -rf /home/user/.cache/huggingface /home/user/.cache/torch")
API_URL = "https://yata.yt/api/v1/travel/export/"
_cache = {"data": None, "timestamp": 0, "last_update": "Unknown"}
# ---------------- Load category map ----------------
with open("items.json", "r", encoding="utf-8") as f:
items_data = json.load(f)["items"]
ITEM_TO_TYPE = {v["name"]: v["type"].lower() for v in items_data.values() if "name" in v and "type" in v}
ALL_ITEMS = list(ITEM_TO_TYPE.keys())
ALL_ITEMS_LOWER = {name.lower(): name for name in ALL_ITEMS}
ALL_CATEGORIES = sorted(set(ITEM_TO_TYPE.values()))
ITEM_FILE_MTIME = os.path.getmtime("items.json")
# ---------------- Semantic model ----------------
print("๐ง Loading semantic model (MiniLM)...")
embedder = SentenceTransformer("sentence-transformers/paraphrase-MiniLM-L3-v2")
print("โ
Semantic model ready")
ITEM_EMBEDS = {name: embedder.encode(name, convert_to_tensor=True) for name in ITEM_TO_TYPE}
# ---------------- Auto-generate + cache category embeddings ----------------
CACHE_DIR = "cache"
os.makedirs(CACHE_DIR, exist_ok=True)
ALIASES_FILE = os.path.join(CACHE_DIR, "category_aliases.json")
EMB_FILE = os.path.join(CACHE_DIR, "category_embeds.pt")
META_FILE = os.path.join(CACHE_DIR, "meta.json")
def load_cached_embeddings():
if not (os.path.exists(ALIASES_FILE) and os.path.exists(EMB_FILE) and os.path.exists(META_FILE)):
return None, None
try:
with open(META_FILE, "r", encoding="utf-8") as f:
meta = json.load(f)
if meta.get("items_mtime") != ITEM_FILE_MTIME:
return None, None
with open(ALIASES_FILE, "r", encoding="utf-8") as f:
aliases = json.load(f)
embeds = torch.load(EMB_FILE)
print("โ
Loaded cached category embeddings.")
return aliases, embeds
except Exception:
return None, None
def save_cached_embeddings(aliases, embeds):
try:
with open(ALIASES_FILE, "w", encoding="utf-8") as f:
json.dump(aliases, f, indent=2)
torch.save(embeds, EMB_FILE)
with open(META_FILE, "w", encoding="utf-8") as f:
json.dump({"items_mtime": ITEM_FILE_MTIME, "time": time.time()}, f)
except Exception as e:
print(f"โ ๏ธ Cache save failed: {e}")
def auto_alias_categories(embedder, all_categories, all_item_names, top_k=6, threshold=0.38):
print("๐ค Building category aliases dynamically...")
cat_embs = {c: embedder.encode(c, convert_to_tensor=True) for c in all_categories}
item_embs = {i: embedder.encode(i, convert_to_tensor=True) for i in all_item_names}
aliases = {}
for cat, cat_emb in cat_embs.items():
sims = {i: float(util.cos_sim(cat_emb, emb)) for i, emb in item_embs.items()}
top_related = [i for i, s in sorted(sims.items(), key=lambda x: x[1], reverse=True)[:top_k] if s > threshold]
aliases[cat] = list(set([cat] + top_related))
return aliases
CATEGORY_ALIASES, CATEGORY_EMBEDS = load_cached_embeddings()
if not CATEGORY_ALIASES or not CATEGORY_EMBEDS:
CATEGORY_ALIASES = auto_alias_categories(embedder, ALL_CATEGORIES, list(ITEM_TO_TYPE.keys()))
CATEGORY_EMBEDS = {
cat: sum([embedder.encode(a, convert_to_tensor=True) for a in aliases]) / len(aliases)
for cat, aliases in CATEGORY_ALIASES.items()
}
save_cached_embeddings(CATEGORY_ALIASES, CATEGORY_EMBEDS)
else:
print("โ
Using cached dynamic category embeddings.")
# ---------------- Country mapping ----------------
COUNTRY_NAMES = {
"ARG": "Argentina", "MEX": "Mexico", "CAN": "Canada", "UNI": "United Kingdom",
"JAP": "Japan", "SOU": "South Africa", "SWI": "Switzerland", "UAE": "United Arab Emirates",
"CHI": "China", "HAW": "Hawaii", "CAY": "Cayman Islands"
}
COUNTRY_ALIASES = {
"uk": "UNI", "england": "UNI", "united kingdom": "UNI",
"uae": "UAE", "united arab emirates": "UAE",
"south africa": "SOU", "switzerland": "SWI",
"cayman": "CAY", "cayman islands": "CAY",
"argentina": "ARG", "mexico": "MEX", "canada": "CAN",
"japan": "JAP", "china": "CHI", "hawaii": "HAW"
}
def normalize_country_query(q: str) -> str | None:
q = (q or "").strip().lower()
if not q:
return None
if q in COUNTRY_ALIASES:
return COUNTRY_ALIASES[q]
if len(q) == 3 and q.upper() in COUNTRY_NAMES:
return q.upper()
return None
# ---------------- Helpers ----------------
def parse_freeform_query(text: str):
if not text:
return "", ""
text = text.strip().lower()
m = re.match(r"(.+?)\s+in\s+(.+)", text, flags=re.IGNORECASE)
if m:
return m.group(1).strip(), m.group(2).strip()
parts = text.split()
if len(parts) == 2:
first, second = parts
if normalize_country_query(first):
return second, first
elif normalize_country_query(second):
return first, second
return text, ""
def semantic_match(query, top_k=15):
if not query:
return {"category": None, "items": []}
query = query.strip().lower()
q_emb = embedder.encode(query, convert_to_tensor=True)
sims_items = {n: float(util.cos_sim(q_emb, emb)) for n, emb in ITEM_EMBEDS.items()}
ranked_items = sorted(sims_items.items(), key=lambda x: x[1], reverse=True)
item_hits = [n for n, score in ranked_items[:top_k] if score > 0.35]
sims_cats = {c: float(util.cos_sim(q_emb, emb)) for c, emb in CATEGORY_EMBEDS.items()}
ranked_cats = sorted(sims_cats.items(), key=lambda x: x[1], reverse=True)
top_cat, cat_score = (ranked_cats[0] if ranked_cats else (None, 0.0))
related_items = []
if top_cat and cat_score > 0.35:
related_items = [n for n, t in ITEM_TO_TYPE.items() if t == top_cat]
combined = list(set(item_hits + related_items))
return {"category": top_cat if related_items else None, "items": combined}
# ---------------- Fetch YATA ----------------
def fetch_yata(force_refresh=False):
if not force_refresh and _cache["data"] and (time.time() - _cache["timestamp"] < 300):
return _cache["data"], _cache["last_update"]
try:
resp = requests.get(API_URL, timeout=10)
resp.raise_for_status()
data = resp.json()
_cache.update({
"data": data,
"timestamp": time.time(),
"last_update": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()) # UTC ISO
})
return data, _cache["last_update"]
except Exception as e:
print(f"โ Fetch error: {e}")
return {"stocks": {}}, "Fetch failed"
def get_live_categories(data):
live_cats = set()
for _, cdata in data.get("stocks", {}).items():
for item in cdata.get("stocks", []):
name = item.get("name")
cat = ITEM_TO_TYPE.get(name)
if cat:
live_cats.add(cat.lower())
return sorted(live_cats)
# ---------------- Core logic: single query ----------------
def query_inventory(query_text="", category="", country_name="", capacity=10, refresh=False):
data, last_update = fetch_yata(force_refresh=refresh)
rows = []
# Parse freeform if present
parsed_item, parsed_country = parse_freeform_query(query_text)
if not country_name and parsed_country:
country_name = parsed_country
item_term = parsed_item
# Detect if user meant an exact item (e.g., "xanax")
item_lower = (item_term or "").lower()
exact_item_name = ALL_ITEMS_LOWER.get(item_lower)
sem = semantic_match(item_term) if item_term and not exact_item_name else {"category": None, "items": []}
semantic_items = sem["items"]
semantic_category = sem["category"]
# Country gating (strict)
user_code = normalize_country_query(country_name)
for code_raw, cdata in data.get("stocks", {}).items():
code = code_raw.upper()
cname = COUNTRY_NAMES.get(code, code)
if country_name:
if user_code:
if code != user_code:
continue
elif country_name.lower() not in cname.lower():
continue
update_ts = cdata.get("update")
update_str = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime(update_ts)) if update_ts else "Unknown"
for item in cdata.get("stocks", []):
iname = item.get("name", "")
itype = ITEM_TO_TYPE.get(iname, "").lower()
qty = item.get("quantity", 0)
cost = item.get("cost", 0)
# Strict item filtering
if item_term:
if exact_item_name:
item_ok = (iname.lower() == item_lower) # exact item only
else:
item_ok = (
(item_lower and item_lower in iname.lower()) or
(semantic_category and itype == semantic_category.lower()) or
(iname in semantic_items)
)
elif category:
item_ok = (category.lower() == itype)
else:
item_ok = True
if item_ok:
rows.append({
"Country": cname,
"Item": iname,
"Category": itype.title(),
"Quantity": qty,
"Cost": cost,
"Max Capacity Cost": cost * capacity,
"Updated": update_str
})
if not rows:
return pd.DataFrame([{"Result": "No inventory found for that query."}]), f"Last update: {last_update}"
df = pd.DataFrame(rows).sort_values(by=["Country", "Item"])
for col in ["Quantity", "Cost", "Max Capacity Cost"]:
df[col] = df[col].apply(lambda x: f"{x:,.0f}" if isinstance(x, (int, float)) else x)
return df, f"Last update: {last_update}"
# ---------------- Multi-query (convenience buttons) ----------------
def run_multi(phrases, capacity):
"""
Execute multiple 'item in country' phrases with strict per-country filtering and no duplicates.
"""
data, last_update = fetch_yata(False)
# Group requested item_terms by normalized country code
tasks_by_code = {} # code -> [item_term, ...]
for phrase in phrases:
item_term, country_term = parse_freeform_query(phrase)
code = normalize_country_query(country_term) or ""
if not code:
# If country not recognized (shouldn't happen with our lists), fall back to pass-through single query
code = "__ALL__"
tasks_by_code.setdefault(code, []).append(item_term)
rows = []
for code_raw, cdata in data.get("stocks", {}).items():
code = code_raw.upper()
if code not in tasks_by_code:
continue
cname = COUNTRY_NAMES.get(code, code)
update_ts = cdata.get("update")
update_str = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime(update_ts)) if update_ts else "Unknown"
# Precompute semantic intents for each term in this country
compiled_terms = []
for term in tasks_by_code[code]:
t = (term or "").strip().lower()
exact_item_name = ALL_ITEMS_LOWER.get(t)
if exact_item_name:
compiled_terms.append({"mode": "exact", "value": t})
else:
sem = semantic_match(t) if t else {"category": None, "items": []}
compiled_terms.append({
"mode": "fuzzy",
"value": t,
"category": (sem["category"] or "").lower() if sem["category"] else "",
"items": set(sem["items"])
})
# Scan this country's items once
for item in cdata.get("stocks", []):
iname = item.get("name", "")
itype = ITEM_TO_TYPE.get(iname, "").lower()
qty = item.get("quantity", 0)
cost = item.get("cost", 0)
matched = False
for ct in compiled_terms:
if ct["mode"] == "exact":
if iname.lower() == ct["value"]:
matched = True
break
else:
q = ct["value"]
if (q and q in iname.lower()) or (ct["category"] and itype == ct["category"]) or (iname in ct["items"]):
matched = True
break
if matched:
rows.append({
"Country": cname,
"Item": iname,
"Category": itype.title(),
"Quantity": qty,
"Cost": cost,
"Max Capacity Cost": cost * capacity,
"Updated": update_str
})
if not rows:
return pd.DataFrame([{"Result": "No results for that set."}]), f"Last update: {last_update}"
# Deduplicate rows by (Country, Item, Updated)
df = pd.DataFrame(rows).drop_duplicates(subset=["Country", "Item", "Updated"]).sort_values(by=["Country", "Item"])
for col in ["Quantity", "Cost", "Max Capacity Cost"]:
df[col] = df[col].apply(lambda x: f"{x:,.0f}" if isinstance(x, (int, float)) else x)
return df, f"Last update: {last_update}"
# ---------------- Wrappers ----------------
def run_query(query_text, category, country, capacity, refresh):
data, _ = fetch_yata(force_refresh=refresh)
df, ts = query_inventory(query_text, category, country, capacity, refresh)
live_categories = get_live_categories(data)
return df, ts, gr.update(choices=[""] + live_categories)
# ---------------- Gradio UI ----------------
with gr.Blocks(title="๐งณ Torn Foreign Stocks") as iface:
gr.Markdown("## ๐งณ Torn Foreign Stocks")
gr.Markdown("_Search YATA's Foreign Stocks_")
# Convenience buttons
with gr.Row():
btn_short = gr.Button("๐ธ Flushies (short haul)")
btn_medium = gr.Button("๐งธ Flushies (medium haul)")
btn_long = gr.Button("๐ Flushies (long haul)")
btn_xanax = gr.Button("๐ Xanax (SA)")
btn_temps = gr.Button("๐งจ Temps")
query_box = gr.Textbox(label="Search (semantic, e.g. 'flowers in England')")
category_drop = gr.Dropdown(label="Category (optional exact match)", choices=[""] + ALL_CATEGORIES)
country_box = gr.Textbox(label="Country (optional, e.g. UK, Cayman, Japan)")
capacity_slider = gr.Number(label="Travel Capacity", value=10, minimum=5, maximum=88, precision=0)
refresh_check = gr.Checkbox(label="Force refresh (ignore cache)", value=False)
result_df = gr.Dataframe(label="Results")
meta_box = gr.Textbox(label="Metadata / Last Update")
run_btn = gr.Button("๐ Search / Refresh")
run_btn.click(run_query,
inputs=[query_box, category_drop, country_box, capacity_slider, refresh_check],
outputs=[result_df, meta_box, category_drop])
# Convenience button bindings (use run_multi with per-country grouping)
btn_short.click(lambda c: run_multi(
["flowers in mexico", "flowers in cayman islands", "flowers in canada",
"plushies in mexico", "plushies in cayman islands", "plushies in canada"], c),
inputs=[capacity_slider], outputs=[result_df, meta_box])
btn_medium.click(lambda c: run_multi(
["flowers in hawaii", "flowers in united kingdom", "flowers in argentina",
"flowers in switzerland", "flowers in japan",
"plushies in hawaii", "plushies in united kingdom", "plushies in argentina",
"plushies in switzerland", "plushies in japan"], c),
inputs=[capacity_slider], outputs=[result_df, meta_box])
btn_long.click(lambda c: run_multi(
["flowers in uae", "flowers in china", "flowers in south africa",
"plushies in uae", "plushies in china", "plushies in south africa"], c),
inputs=[capacity_slider], outputs=[result_df, meta_box])
btn_xanax.click(lambda c: run_multi(["xanax in south africa"], c),
inputs=[capacity_slider], outputs=[result_df, meta_box])
btn_temps.click(lambda c: run_multi(
["tear gas in argentina", "smoke grenade in south africa", "flash grenade in switzerland"], c),
inputs=[capacity_slider], outputs=[result_df, meta_box])
# --- JS: global error banner (captures JS errors & unhandled promise rejections) ---
gr.HTML("""
""")
# --- JS: convert all UTC ISO timestamps to browser's local time ---
gr.HTML("""
""")
try:
iface.launch()
except Exception as e:
import traceback
traceback.print_exc()