Spaces:
Sleeping
Sleeping
| # app.py β Unit Converter AI β Hugging Face Spaces Edition | |
| # | |
| # Architecture: | |
| # - Gradio UI | |
| # - AI layer 1 (Jan-v3-4B via HF Inference Providers): intent parsing β JSON | |
| # - Python/UCUM engine: deterministic unit math | |
| # - AI layer 2 (Jan-v3-4B via HF Inference Providers): engineering explanation | |
| # | |
| # Deployment: upload this file + ucum_units.json + requirements.txt to a | |
| # Hugging Face Space (Gradio SDK). Set HF_TOKEN as a Space Secret. | |
| import os, json, re, math | |
| import gradio as gr | |
| from openai import OpenAI | |
| import unicodedata as _ud | |
| # ββ HF Inference Provider configuration βββββββββββββββββββββββββββββββββββββββ | |
| # HF_TOKEN is injected automatically from the Space's Secrets β never hardcoded. | |
| HF_TOKEN = os.environ.get("HF_TOKEN", "") | |
| HF_API_BASE = "https://router.huggingface.co/v1" | |
| HF_MODEL = "janhq/Jan-v3-4B-base-instruct:featherless-ai" | |
| # ββ Load UCUM data ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # ucum_units.json lives next to this file in the Space repository. | |
| _SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) | |
| _UCUM_PATH = os.path.join(_SCRIPT_DIR, "ucum_units.json") | |
| with open(_UCUM_PATH, encoding="utf-8") as _f: | |
| _data = json.load(_f) | |
| _prefixes = _data["prefixes"] | |
| _units = _data["units"] | |
| _by_code = {u["ucum_code"]: u for u in _units} | |
| BASE_DIMS = ["m", "s", "g", "rad", "K", "C", "cd"] | |
| BASE_NAMES = { | |
| "m": "meter", "s": "second", "g": "gram", | |
| "rad": "radian", "K": "kelvin", "C": "coulomb", "cd": "candela", | |
| } | |
| # ββ AI client ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _client(): | |
| return OpenAI(base_url=HF_API_BASE, api_key=HF_TOKEN) | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # LOAD UCUM DATA | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| BASE_DIMS = ["m", "s", "g", "rad", "K", "C", "cd"] | |
| BASE_NAMES = { | |
| "m": "meter", "s": "second", "g": "gram", | |
| "rad": "radian", "K": "kelvin", "C": "coulomb", "cd": "candela", | |
| } | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # EXPRESSION NORMALIZER | |
| # Converts user-typed notation to UCUM dot notation before parsing. | |
| # kg/(m*s^2) -> kg/(m.s2) W/m^2 -> W/m2 N*m -> N.m mΒ² -> m2 | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| import unicodedata as _ud | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # EXPRESSION NORMALIZER β converts UCUM notation to canonical form | |
| # kg/(m*s^2) β kg/m.s2 W/m^2 β W/m2 N*m β N.m mΒ² β m2 | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def normalize_expr(expr: str) -> str: | |
| if not expr: | |
| return expr | |
| expr = expr.strip() | |
| expr = expr.translate(str.maketrans("β°ΒΉΒ²Β³β΄β΅βΆβ·βΈβΉβ»", "0123456789-")) | |
| expr = re.sub(r"[*ΓΒ·]", ".", expr) | |
| expr = expr.replace("^", "") | |
| expr = re.sub(r"\s*\.\s*", ".", expr) | |
| expr = re.sub(r"\s*/\s*", "/", expr) | |
| return expr.strip() | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # ENGLISH-TO-UCUM TOKENIZER | |
| # Converts any natural-language unit expression to valid UCUM dot/slash form. | |
| # | |
| # Design: | |
| # 1. Build a comprehensive nameβcode alias table from the UCUM JSON data, | |
| # including programmatically generated prefix+name combinations that cover | |
| # every prefixable unit (millisecond, kilonewton, megajoule, nanohenryβ¦). | |
| # 2. Normalise input: map "per"β"/", expand parentheses, strip superscripts. | |
| # 3. Split on "/" boundaries to separate numerator from denominator segments. | |
| # 4. Within each segment, tokenise on whitespace and multiplication glyphs, | |
| # handle "cubic"/"square"/"squared" adjectives, and hyphen-split compound | |
| # words (e.g. "foot-pound" β ["foot","pound"]). | |
| # 5. Resolve each token/phrase longest-first: check _by_code FIRST (prevents | |
| # alias collisions where e.g. alias['s']='[S]' Svedberg would override | |
| # 's'=second), then _EN_ALIAS, then prefix-code stripping. | |
| # 6. Reassemble as UCUM: numerator atoms joined with ".", over denominator. | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _asc(s: str) -> str: | |
| """Strip accents and normalise to lowercase ASCII.""" | |
| return "".join( | |
| c for c in _ud.normalize("NFD", s) if _ud.category(c) != "Mn" | |
| ).lower().replace("\xa0", " ").strip() | |
| _PREFIX_SORTED = sorted(_prefixes.keys(), key=len, reverse=True) | |
| def _build_prefix_name_table() -> dict: | |
| """ | |
| Programmatically generate every prefix_name+unit_name combination. | |
| E.g.: milli+secondβms, kilo+newtonβkN, nano+henryβnH, mega+ohmβMOhm. | |
| Also handles: accent normalisation (ampΓ¨reβampere), British/American | |
| spellings (metre/meter, litre/liter), and vowel elision (kilo+ohmβkilohm, | |
| mega+ohmβmegohm). | |
| """ | |
| prefixable = { | |
| code: u for code, u in _by_code.items() | |
| if not code.startswith("[") and not any(c in code for c in ".'%*^") | |
| } | |
| table: dict = {} | |
| def store(k: str, v: str) -> None: | |
| table[k] = v | |
| if not k.endswith("s") and not k.endswith("z"): | |
| table[k + "s"] = v | |
| for pcode, pv in _prefixes.items(): | |
| pname = _asc(pv["name"]) | |
| for ucode, u in prefixable.items(): | |
| uname = _asc(u["name"]) | |
| ucum = pcode + ucode | |
| variants = [uname] | |
| if uname == "meter": variants.append("metre") | |
| if uname == "liter": variants.append("litre") | |
| for uvar in variants: | |
| store(pname + uvar, ucum) | |
| # Vowel elision: kilo+ohmβkilohm, mega+ohmβmegohm | |
| if pname and pname[-1] in "aeiou" and uvar and uvar[0] in "aeiou": | |
| store(pname[:-1] + uvar, ucum) | |
| return table | |
| _PREFIX_NAME_TABLE = _build_prefix_name_table() | |
| def _build_en_alias() -> dict: | |
| """ | |
| Build a comprehensive English-name β UCUM-code alias table. | |
| Precedence (last write wins): JSON names β prefix+name table β manual overrides. | |
| Manual overrides are last so they always win. | |
| """ | |
| alias: dict = {} | |
| # Pass 1: from JSON unit names (reversed so first entry wins for duplicates) | |
| for u in reversed(_units): | |
| code = u["ucum_code"] | |
| raw = _asc(u["name"]) | |
| if raw: | |
| alias[raw] = code | |
| # Bracket-stripped variants: '[mi_i]' β alias['mi_i'] = '[mi_i]' | |
| if code.startswith("[") and code.endswith("]"): | |
| alias[code[1:-1].lower()] = code | |
| # Pass 2: programmatic prefix+name combinations | |
| alias.update(_PREFIX_NAME_TABLE) | |
| # Pass 3: manual overrides β comprehensive, applied last so they always win | |
| overrides = { | |
| # ββ Length ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| "meter":"m","meters":"m","metre":"m","metres":"m", | |
| "kilometer":"km","kilometers":"km","kilometre":"km","kilometres":"km", | |
| "centimeter":"cm","centimeters":"cm","centimetre":"cm","centimetres":"cm", | |
| "millimeter":"mm","millimeters":"mm","millimetre":"mm","millimetres":"mm", | |
| "micrometer":"um","micrometers":"um","micrometre":"um", | |
| "nanometer":"nm","nanometers":"nm","nanometre":"nm", | |
| "foot":"[ft_i]","feet":"[ft_i]","ft":"[ft_i]", | |
| "inch":"[in_i]","inches":"[in_i]","in":"[in_i]", | |
| "yard":"[yd_i]","yards":"[yd_i]","yd":"[yd_i]", | |
| "mile":"[mi_i]","miles":"[mi_i]","mi":"[mi_i]", | |
| "nautical mile":"[nmi_i]","nautical miles":"[nmi_i]","nmi":"[nmi_i]", | |
| "fathom":"[fth_i]","fathoms":"[fth_i]", | |
| "angstrom":"Ao","angstroms":"Ao","angstroem":"Ao", | |
| "parsec":"pc","parsecs":"pc", | |
| "light-year":"[ly]","light year":"[ly]","light years":"[ly]", | |
| "astronomical unit":"AU","au":"AU", | |
| "hand":"[hd_i]","mil":"[mil_i]", | |
| # ββ Area ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| "acre":"[acr_us]","acres":"[acr_us]", | |
| "square meter":"m2","square meters":"m2","square metre":"m2","square metres":"m2", | |
| "square kilometer":"km2","square kilometre":"km2", | |
| "square foot":"[sft_i]","square feet":"[sft_i]","sq ft":"[sft_i]","sq. ft":"[sft_i]", | |
| "square inch":"[sin_i]","square inches":"[sin_i]","sq in":"[sin_i]", | |
| "square yard":"[syd_i]","square yards":"[syd_i]", | |
| "square mile":"[smi_us]","square miles":"[smi_us]","sq mi":"[smi_us]", | |
| # ββ Volume βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| "liter":"l","liters":"l","litre":"l","litres":"l", | |
| "milliliter":"ml","milliliters":"ml","millilitre":"ml","millilitres":"ml", | |
| "microliter":"ul","microliters":"ul","microlitre":"ul", | |
| "deciliter":"dl","deciliters":"dl","decilitre":"dl","decilitres":"dl", | |
| "dl":"dl","dL":"dl", | |
| "cubic meter":"m3","cubic meters":"m3","cubic metre":"m3", | |
| "cubic centimeter":"cm3","cubic centimeters":"cm3","cubic centimetre":"cm3", | |
| "cubic millimeter":"mm3", | |
| "cubic foot":"[cft_i]","cubic feet":"[cft_i]","cu ft":"[cft_i]", | |
| "cubic inch":"[cin_i]","cubic inches":"[cin_i]","cu in":"[cin_i]", | |
| "cubic yard":"[cyd_i]","cubic yards":"[cyd_i]", | |
| "gallon":"[gal_us]","gallons":"[gal_us]","gal":"[gal_us]", | |
| "quart":"[qt_us]","quarts":"[qt_us]","qt":"[qt_us]", | |
| "pint":"[pt_us]","pints":"[pt_us]","pt":"[pt_us]", | |
| "cup":"[cup_us]","cups":"[cup_us]", | |
| "tablespoon":"[tbs_us]","tablespoons":"[tbs_us]","tbsp":"[tbs_us]", | |
| "teaspoon":"[tsp_us]","teaspoons":"[tsp_us]","tsp":"[tsp_us]", | |
| "fluid ounce":"[foz_us]","fluid ounces":"[foz_us]","fl oz":"[foz_us]", | |
| "barrel":"[bbl_us]","barrels":"[bbl_us]","bbl":"[bbl_us]", | |
| "bushel":"[bu_us]","bushels":"[bu_us]", | |
| # ββ Mass βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| "gram":"g","grams":"g", | |
| "kilogram":"kg","kilograms":"kg", | |
| "milligram":"mg","milligrams":"mg", | |
| "microgram":"ug","micrograms":"ug", | |
| "nanogram":"ng","nanograms":"ng", | |
| "tonne":"t","tonnes":"t","metric ton":"t","metric tons":"t", | |
| "pound":"[lb_av]","pounds":"[lb_av]","lb":"[lb_av]","lbm":"[lb_av]", | |
| "ounce":"[oz_av]","ounces":"[oz_av]","oz":"[oz_av]", | |
| "grain":"[gr]","grains":"[gr]","gr":"[gr]", | |
| "stone":"[stone_av]", | |
| "short ton":"[ston_av]","short tons":"[ston_av]", | |
| "long ton":"[lton_av]","long tons":"[lton_av]", | |
| "dram":"[dr_av]","drams":"[dr_av]", | |
| # ββ Force βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| "newton":"N","newtons":"N", | |
| "kilonewton":"kN","kilonewtons":"kN", | |
| "meganewton":"MN","meganewtons":"MN", | |
| "millinewton":"mN","micronewton":"uN", | |
| "nanonewton":"nN","nanonewtons":"nN", | |
| "dyne":"dyn","dynes":"dyn", | |
| "pound-force":"[lbf_av]","pounds-force":"[lbf_av]", | |
| "pound force":"[lbf_av]","pounds force":"[lbf_av]", | |
| "lbf":"[lbf_av]", | |
| "gram-force":"gf","gram force":"gf", | |
| "kilogram-force":"kgf","kilogram force":"kgf","kgf":"kgf", | |
| # ββ Pressure βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| "pascal":"Pa","pascals":"Pa", | |
| "kilopascal":"kPa","kilopascals":"kPa", | |
| "megapascal":"MPa","megapascals":"MPa", | |
| "gigapascal":"GPa","gigapascals":"GPa", | |
| "bar":"bar","bars":"bar", | |
| "millibar":"mbar","millibars":"mbar", | |
| "atmosphere":"atm","atmospheres":"atm","atm":"atm", | |
| "psi":"[psi]","technical atmosphere":"att", | |
| # ββ Energy βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| "joule":"J","joules":"J", | |
| "kilojoule":"kJ","kilojoules":"kJ", | |
| "megajoule":"MJ","megajoules":"MJ", | |
| "gigajoule":"GJ","gigajoules":"GJ", | |
| # IMPORTANT: 'cal' must map to thermochemical calorie, NOT [Cal] (nutrition). | |
| # The bracket-strip auto-alias puts [Cal]β'cal', so we override it here. | |
| "cal":"cal","calorie":"cal","calories":"cal", | |
| "kilocalorie":"kcal","kilocalories":"kcal","kcal":"kcal", | |
| "btu":"[Btu_IT]","btus":"[Btu_IT]", | |
| "british thermal unit":"[Btu_IT]","british thermal units":"[Btu_IT]", | |
| "electronvolt":"eV","electronvolts":"eV","ev":"eV", | |
| "erg":"erg","ergs":"erg", | |
| # kWh and friends β stored as UCUM compound strings for direct passthrough | |
| "kilowatt-hour":"kW.h","kilowatt-hours":"kW.h", | |
| "kilowatt hour":"kW.h","kilowatt hours":"kW.h", | |
| "kwh":"kW.h","kWh":"kW.h","KWh":"kW.h", | |
| "watt-hour":"W.h","watt hour":"W.h","wh":"W.h","Wh":"W.h", | |
| "megawatt-hour":"MW.h","megawatt hour":"MW.h","mwh":"MW.h","MWh":"MW.h", | |
| # ββ Power ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| "watt":"W","watts":"W", | |
| "kilowatt":"kW","kilowatts":"kW", | |
| "megawatt":"MW","megawatts":"MW", | |
| "gigawatt":"GW","gigawatts":"GW", | |
| "terawatt":"TW","terawatts":"TW", | |
| "milliwatt":"mW","milliwatts":"mW", | |
| "microwatt":"uW","microwatts":"uW", | |
| "horsepower":"[HP]","hp":"[HP]", | |
| # ββ Temperature ββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| "kelvin":"K","kelvins":"K", | |
| "celsius":"Cel","centigrade":"Cel", | |
| "fahrenheit":"[degF]","rankine":"[degR]","reaumur":"[degRe]", | |
| "degree celsius":"Cel","degrees celsius":"Cel", | |
| "degree centigrade":"Cel","degrees centigrade":"Cel", | |
| "degree fahrenheit":"[degF]","degrees fahrenheit":"[degF]", | |
| "degree kelvin":"K","degrees kelvin":"K", | |
| "degree rankine":"[degR]","degrees rankine":"[degR]", | |
| "deg c":"Cel","deg f":"[degF]","degc":"Cel","degf":"[degF]", | |
| # Two-letter temperature shortcuts caught by phrase_match before single char | |
| "degree f":"[degF]","degrees f":"[degF]", | |
| "degree c":"Cel","degrees c":"Cel", | |
| "degree k":"K","degrees k":"K", | |
| "degree r":"[degR]","degrees r":"[degR]", | |
| # ββ Time βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| "second":"s","seconds":"s","sec":"s","secs":"s", | |
| "millisecond":"ms","milliseconds":"ms", | |
| "microsecond":"us","microseconds":"us", | |
| "nanosecond":"ns","nanoseconds":"ns", | |
| "picosecond":"ps","picoseconds":"ps", | |
| "femtosecond":"fs","femtoseconds":"fs", | |
| "minute":"min","minutes":"min", | |
| "hour":"h","hours":"h","hr":"h","hrs":"h", | |
| "day":"d","days":"d", | |
| "week":"wk","weeks":"wk", | |
| "month":"mo","months":"mo", | |
| "year":"a","years":"a", | |
| # ββ Frequency ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| "hertz":"Hz","kilohertz":"kHz","megahertz":"MHz", | |
| "gigahertz":"GHz","terahertz":"THz", | |
| "rpm":"min-1","rps":"s-1", | |
| # ββ Electrical βββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| "ampere":"A","amperes":"A","amp":"A","amps":"A", | |
| "milliampere":"mA","milliamperes":"mA", | |
| "microampere":"uA","microamperes":"uA", | |
| "nanoampere":"nA","kiloampere":"kA", | |
| "volt":"V","volts":"V", | |
| "kilovolt":"kV","millivolt":"mV","millivolts":"mV","microvolt":"uV", | |
| "ohm":"Ohm","ohms":"Ohm", | |
| "kilohm":"kOhm","kilohms":"kOhm", | |
| "megohm":"MOhm","megohms":"MOhm", | |
| "megaohm":"MOhm","megaohms":"MOhm", | |
| "milliohm":"mOhm","milliohms":"mOhm", | |
| "farad":"F","farads":"F", | |
| "microfarad":"uF","picofarad":"pF","nanofarad":"nF", | |
| "henry":"H","henries":"H", | |
| "millihenry":"mH","microhenry":"uH","nanohenry":"nH", | |
| "siemens":"S","mho":"mho", | |
| "weber":"Wb","webers":"Wb", | |
| "tesla":"T","teslas":"T","millitesla":"mT","microtesla":"uT", | |
| "gauss":"G","gausses":"G", | |
| "coulomb":"C","coulombs":"C","microcoulomb":"uC","nanocoulomb":"nC", | |
| # ββ Radiation / light ββββββββββββββββββββββββββββββββββββββββββββββββ | |
| "candela":"cd","candelas":"cd", | |
| "lumen":"lm","lumens":"lm","lux":"lx", | |
| "becquerel":"Bq","becquerels":"Bq", | |
| "kilobecquerel":"kBq","megabecquerel":"MBq", | |
| "gray":"Gy","grays":"Gy","milligray":"mGy", | |
| "sievert":"Sv","sieverts":"Sv", | |
| "millisievert":"mSv","microsievert":"uSv", | |
| "roentgen":"R","roentgens":"R", | |
| "curie":"Ci","curies":"Ci","millicurie":"mCi","millicuries":"mCi","microcurie":"uCi", | |
| # ββ Amount / concentration βββββββββββββββββββββββββββββββββββββββββββ | |
| "mole":"mol","moles":"mol", | |
| "millimole":"mmol","millimoles":"mmol", | |
| "micromole":"umol","micromoles":"umol", | |
| "nanomole":"nmol","nanomoles":"nmol", | |
| "equivalent":"eq","equivalents":"eq","osmole":"osm", | |
| # ββ Angle ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| "radian":"rad","radians":"rad", | |
| "degree":"deg","degrees":"deg", | |
| "steradian":"sr","steradians":"sr","gon":"gon", | |
| # ββ Data βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| "bit":"bit","bits":"bit","byte":"By","bytes":"By", | |
| "kilobyte":"kBy","kilobytes":"kBy","megabyte":"MBy","megabytes":"MBy", | |
| "gigabyte":"GBy","gigabytes":"GBy","terabyte":"TBy","terabytes":"TBy", | |
| "baud":"Bd", | |
| # ββ Velocity shortcuts ββββββββββββββββββββββββββββββββββββββββββββββββ | |
| "knot":"[kn_i]","knots":"[kn_i]", | |
| "mph":"[mi_i]/h","kph":"km/h", | |
| "ft/s":"[ft_i]/s","ft/min":"[ft_i]/min", | |
| # ββ Viscosity βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| "poise":"P","centipoise":"cP","stokes":"St","centistokes":"cSt", | |
| # ββ Misc βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| "percent":"%","katal":"kat", | |
| "diopter":"[diop]","dioptre":"[diop]", | |
| "tex":"tex","denier":"[den]","smoot":"[smoot]", | |
| "neper":"Np","bel":"B","decibel":"dB", | |
| # ββ Torque two-word phrases βββββββββββββββββββββββββββββββββββββββββββ | |
| # Must be here so phrase_match catches them before single-token fallback | |
| # resolves "pound" β [lb_av] (mass) instead of [lbf_av] (force). | |
| "foot pound":"[ft_i].[lbf_av]","foot pounds":"[ft_i].[lbf_av]", | |
| "ft lb":"[ft_i].[lbf_av]","ft lbf":"[ft_i].[lbf_av]", | |
| "lbf ft":"[lbf_av].[ft_i]", | |
| "pound foot":"[lbf_av].[ft_i]","pound feet":"[lbf_av].[ft_i]", | |
| "pound ft":"[lbf_av].[ft_i]", | |
| # ββ Common density / compound shorthands the LLM produces βββββββββββββ | |
| "lb/ft3":"[lb_av]/[ft_i]3","lb/ft^3":"[lb_av]/[ft_i]3", | |
| "lbm/ft3":"[lb_av]/[ft_i]3","lbm/ft^3":"[lb_av]/[ft_i]3", | |
| "g/cm3":"g/cm3","g/ml":"g/ml", | |
| "kg/m3":"kg/m3","kg/m^3":"kg/m3", | |
| } | |
| alias.update(overrides) | |
| return alias | |
| _EN_ALIAS = _build_en_alias() | |
| _CUBIC_WORDS = {"cubic", "cu"} | |
| _SQ_WORDS = {"square", "sq"} | |
| # ββ Tokenizer internals βββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _tok_strip_exp(token: str) -> tuple: | |
| """Split trailing integer exponent: 'm3'β('m',3), 's-2'β('s',-2).""" | |
| m = re.match(r"^(.*?)(-?\d+)$", token) | |
| if m and m.group(1): | |
| return m.group(1), int(m.group(2)) | |
| return token, 1 | |
| def _tok_resolve_single(token: str) -> str | None: | |
| """Resolve one unit word/abbreviation to a UCUM code or compound string.""" | |
| if not token: | |
| return None | |
| # 1. Exact UCUM code β checked FIRST so 's'βsecond, not alias['s']='[S]' | |
| if token in _by_code: | |
| return token | |
| tl = token.lower() | |
| ta = _asc(token) | |
| # 2. Alias lookups (original case, lowercase, accent-stripped) | |
| for key in (token, tl, ta): | |
| if key in _EN_ALIAS: | |
| return _EN_ALIAS[key] | |
| # 3. Case-sensitive prefix-code stripping (e.g. 'km', 'MPa', 'kW') | |
| for pcode in _PREFIX_SORTED: | |
| if token.startswith(pcode) and len(token) > len(pcode): | |
| rest = token[len(pcode):] | |
| if rest in _by_code: | |
| return pcode + rest | |
| # 4. Case-insensitive prefix + alias (e.g. 'nanonewtons' that weren't in table) | |
| for pcode in _PREFIX_SORTED: | |
| pl = pcode.lower() | |
| if tl.startswith(pl) and len(tl) > len(pl): | |
| rest = tl[len(pl):] | |
| for rkey in (rest, rest.rstrip("s")): | |
| if rkey in _EN_ALIAS: | |
| base = _EN_ALIAS[rkey] | |
| if "." not in base and "/" not in base: | |
| return pcode + base | |
| if rest in _by_code: | |
| return pcode + rest | |
| return None | |
| def _tok_phrase_match(words: list, start: int) -> tuple | None: | |
| """ | |
| Try longest-first phrase match starting at words[start]. | |
| Returns (ucum_code_or_compound, words_consumed) or None. | |
| CRITICAL: for single-token matches, _by_code is checked BEFORE _EN_ALIAS | |
| to prevent collisions like alias['s']='[S]' (Svedberg) overriding 's'=second. | |
| """ | |
| end = min(start + 6, len(words)) | |
| for length in range(end - start, 0, -1): | |
| phrase = " ".join(words[start:start + length]) | |
| # Single token: _by_code wins over alias | |
| if length == 1 and phrase in _by_code: | |
| return phrase, 1 | |
| # Multi-word or alias lookup | |
| for key in (phrase, phrase.lower(), _asc(phrase)): | |
| if key in _EN_ALIAS: | |
| return _EN_ALIAS[key], length | |
| # Multi-token direct code check | |
| if length > 1 and phrase in _by_code: | |
| return phrase, length | |
| return None | |
| def _tok_segment(text: str) -> list | None: | |
| """ | |
| Tokenise one multiplication segment into UCUM atom strings. | |
| Returns list of UCUM atoms, or None if any token is unresolvable. | |
| """ | |
| # Normalise | |
| text = text.translate(str.maketrans("β°ΒΉΒ²Β³β΄β΅βΆβ·βΈβΉβ»", "0123456789-")) | |
| text = text.replace("(", " ").replace(")", " ") | |
| text = re.sub(r"[*ΓΒ·β’]", " ", text) | |
| text = text.replace("^", "") | |
| # Dots between alphanumeric/bracket tokens β spaces (keep bracket contents intact) | |
| text = re.sub(r"(?<=[A-Za-z0-9\]_])\.(?=[A-Za-z0-9\[\(])", " ", text) | |
| raw = [p for p in text.split() if p] | |
| # Expand adjectives and split hyphens | |
| expanded: list = [] | |
| i = 0 | |
| while i < len(raw): | |
| part = raw[i]; pl = part.lower() | |
| # "cubic X" β "X3" | |
| if pl in _CUBIC_WORDS and i + 1 < len(raw): | |
| expanded.append(raw[i + 1] + "3"); i += 2; continue | |
| # "square X" / "sq X" β "X2" | |
| if pl in _SQ_WORDS and i + 1 < len(raw): | |
| expanded.append(raw[i + 1] + "2"); i += 2; continue | |
| # "squared" trailing β apply Β² to last pushed token | |
| if pl == "squared" and expanded: | |
| base, exp = _tok_strip_exp(expanded[-1]) | |
| expanded[-1] = f"{base}{2 * exp if exp != 1 else 2}"; i += 1; continue | |
| # Hyphen-split (not negative exponents, not bracketed codes) | |
| if "-" in part and not part.startswith("["): | |
| if re.match(r"^[A-Za-z_\[\]]+\-\d+$", part): | |
| expanded.append(part) | |
| else: | |
| expanded.extend(s for s in re.split(r"-", part) if s) | |
| else: | |
| expanded.append(part) | |
| i += 1 | |
| # Phrase match and resolve | |
| result: list = [] | |
| i = 0 | |
| while i < len(expanded): | |
| match = _tok_phrase_match(expanded, i) | |
| if match: | |
| code, consumed = match; i += consumed | |
| else: | |
| token = expanded[i] | |
| base, exp = _tok_strip_exp(token) | |
| code = _tok_resolve_single(base) | |
| if code is None: | |
| return None | |
| if exp != 1 and "." not in code and "/" not in code: | |
| code = f"{code}{exp}" | |
| i += 1 | |
| # Expand compound alias values (e.g. "kW.h" β ["kW", "h"]) | |
| if "." in code and "/" not in code: | |
| result.extend(code.split(".")) | |
| else: | |
| result.append(code) | |
| return result if result else None | |
| def english_to_ucum(expr: str) -> str | None: | |
| """ | |
| Convert any English unit expression to UCUM dot/slash notation. | |
| Returns the UCUM string on success, or None if any token is unresolvable. | |
| Examples: | |
| "kilograms per cubic meter" β "kg/m3" | |
| "pounds per cubic foot" β "[lb_av]/[ft_i]3" | |
| "foot-pounds" β "[ft_i].[lbf_av]" | |
| "kilowatt hours" β "kW.h" | |
| "BTU per pound per degree F" β "[Btu_IT]/[lb_av].[degF]" | |
| "joules per kilogram kelvin" β "J/kg.K" | |
| "pascal seconds" β "Pa.s" | |
| "meters per second squared" β "m/s2" | |
| "kg/m^3" β "kg/m3" (already-UCUM passthrough) | |
| """ | |
| if not expr: | |
| return None | |
| stripped = expr.strip() | |
| # Fast path: full-expression alias (kWh, mph, ft/s, etc.) | |
| for key in (stripped, stripped.lower(), _asc(stripped)): | |
| if key in _EN_ALIAS: | |
| return _EN_ALIAS[key] | |
| # Pre-normalise | |
| text = stripped.translate(str.maketrans("β°ΒΉΒ²Β³β΄β΅βΆβ·βΈβΉβ»", "0123456789-")) | |
| # "per" / "divided by" / "over" β "/" | |
| text = re.sub(r"\b(per|divided\s+by|over)\b", "/", text, flags=re.IGNORECASE) | |
| text = re.sub(r"\s*/\s*", "/", text) | |
| # Strip single outer parens | |
| text = text.strip() | |
| if text.startswith("(") and text.endswith(")"): | |
| depth = 0 | |
| for ci, ch in enumerate(text): | |
| if ch == "(": depth += 1 | |
| elif ch == ")": | |
| depth -= 1 | |
| if depth == 0 and ci == len(text) - 1: | |
| text = text[1:-1].strip(); break | |
| # Expand inner parenthesised groups: J/(kgΒ·K) β J/kgΒ·K | |
| text = re.sub(r"\(([^()]+)\)", r"\1", text) | |
| # Split on "/" | |
| slash_parts = text.split("/") | |
| num_text = slash_parts[0].strip() | |
| den_texts = [s.strip() for s in slash_parts[1:] if s.strip()] | |
| num_atoms = _tok_segment(num_text) | |
| if num_atoms is None: | |
| return None | |
| den_all: list = [] | |
| for dt in den_texts: | |
| atoms = _tok_segment(dt) | |
| if atoms is None: | |
| return None | |
| den_all.extend(atoms) | |
| # Flatten compound atoms that contain "/" (e.g. "[mi_i]/h" from "mph") | |
| clean_num: list = []; extra_den: list = [] | |
| for a in num_atoms: | |
| if "/" in a: | |
| pts = a.split("/", 1) | |
| clean_num.extend(p for p in pts[0].split(".") if p) | |
| extra_den.extend(p for p in pts[1].split(".") if p) | |
| else: | |
| clean_num.append(a) | |
| clean_den: list = [] | |
| for a in den_all: | |
| if "/" in a: | |
| pts = a.split("/", 1) | |
| clean_den.extend(p for p in pts[0].split(".") if p) | |
| clean_num.extend(p for p in pts[1].split(".") if p) | |
| else: | |
| clean_den.append(a) | |
| final_den = clean_den + extra_den | |
| ucum_num = ".".join(clean_num) | |
| if not ucum_num: | |
| return None | |
| return ucum_num if not final_den else f"{ucum_num}/{'.'.join(final_den)}" | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # UNIT RESOLVER β looks up a single UCUM code (possibly prefixed) | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def resolve_unit(code: str): | |
| """Returns (dim_vector, factor, name, property, is_special) or None.""" | |
| code = normalize_expr(code) | |
| if not code: | |
| return None | |
| if code in _by_code: | |
| u = _by_code[code] | |
| return (u["dim_vector"], u["factor_to_base"], | |
| u["name"], u["property"], u.get("is_special", False)) | |
| # Alias lookup β only use if alias resolves to a simple (non-compound) code | |
| aliased = _EN_ALIAS.get(code) or _EN_ALIAS.get(code.lower()) | |
| if aliased and isinstance(aliased, str) and "." not in aliased and "/" not in aliased: | |
| if aliased in _by_code: | |
| u = _by_code[aliased] | |
| return (u["dim_vector"], u["factor_to_base"], | |
| u["name"], u["property"], u.get("is_special", False)) | |
| # Prefix stripping | |
| for pcode in _PREFIX_SORTED: | |
| if code.startswith(pcode) and len(code) > len(pcode): | |
| rest = code[len(pcode):] | |
| if rest in _by_code: | |
| u = _by_code[rest] | |
| base_fac = u["factor_to_base"] | |
| if base_fac is None: | |
| return None | |
| return (u["dim_vector"], _prefixes[pcode]["value"] * base_fac, | |
| _prefixes[pcode]["name"] + u["name"], | |
| u["property"], False) | |
| return None | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # COMPOUND UNIT PARSER β handles kg.m/s2, N.m, Pa.s, W/m2, J/(kg.K), kW.h | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _zero_dims(): | |
| return {d: 0 for d in BASE_DIMS} | |
| def _add_dims(a, b, sign=1): | |
| return {k: a.get(k, 0) + sign * b.get(k, 0) for k in BASE_DIMS} | |
| def _scale_dims(dv, exp): | |
| return {k: dv.get(k, 0) * exp for k in BASE_DIMS} | |
| def _parse_segment(s: str): | |
| s = s.strip().strip("()") | |
| atoms = s.split(".") | |
| total_dv = _zero_dims() | |
| total_fac = 1.0 | |
| names = [] | |
| for atom in atoms: | |
| atom = atom.strip() | |
| if not atom: | |
| continue | |
| m = re.match(r"^(.*?)(-?\d+)$", atom) | |
| if m and m.group(1): | |
| base_tok, exp = m.group(1), int(m.group(2)) | |
| else: | |
| base_tok, exp = atom, 1 | |
| r = resolve_unit(base_tok) | |
| if r is None: | |
| return None, None, None | |
| dv, fac, name, _, _ = r | |
| if fac is None: | |
| return None, None, None | |
| total_dv = _add_dims(total_dv, _scale_dims(dv, exp)) | |
| total_fac *= fac ** exp | |
| names.append(f"{name}^{exp}" if exp != 1 else name) | |
| return total_dv, total_fac, " Β· ".join(names) | |
| def parse_compound(expr: str): | |
| """Parse compound expression. Returns (dim_vector, factor, display) or (None,None,None).""" | |
| expr = normalize_expr(expr) | |
| parts = expr.split("/", 1) | |
| num_str = parts[0].strip() | |
| den_str = parts[1].strip() if len(parts) > 1 else "" | |
| num_dv, num_fac, num_name = _parse_segment(num_str) if num_str else (_zero_dims(), 1.0, "") | |
| if num_dv is None: | |
| return None, None, None | |
| if den_str: | |
| den_dv, den_fac, den_name = _parse_segment(den_str) | |
| if den_dv is None: | |
| return None, None, None | |
| final_dv = _add_dims(num_dv, _scale_dims(den_dv, -1)) | |
| final_fac = num_fac / den_fac if den_fac else None | |
| display = f"{num_name} / {den_name}" if den_name else num_name | |
| else: | |
| final_dv, final_fac, display = num_dv, num_fac, num_name | |
| return final_dv, final_fac, display | |
| def is_compound(expr: str) -> bool: | |
| expr = normalize_expr(expr) | |
| if "." in expr: | |
| return True | |
| if "/" not in expr: | |
| return False | |
| # Slash present β compound only if slash appears outside bracket pairs. | |
| # e.g. "[lb_av]/[ft_i]3" is compound; "[in_i'H2O]" is a single unit. | |
| depth = 0 | |
| for ch in expr: | |
| if ch == "[": depth += 1 | |
| elif ch == "]": depth -= 1 | |
| elif ch == "/" and depth == 0: | |
| return True | |
| return False | |
| def resolve_any(expr: str): | |
| """ | |
| Resolve a simple or compound UCUM expression or English unit string. | |
| Strategy: | |
| 1. If already a valid single UCUM code (e.g. 'K', 'Cel', 'Pa'), use directly. | |
| 2. If already a valid compound UCUM expression, parse directly. | |
| 3. Otherwise run english_to_ucum to convert natural language to UCUM. | |
| Returns (dv, fac, name, prop, special). | |
| """ | |
| stripped = expr.strip() | |
| norm = normalize_expr(stripped) | |
| # Fast path: already a direct UCUM code | |
| if norm in _by_code: | |
| u = _by_code[norm] | |
| return (u["dim_vector"], u["factor_to_base"], | |
| u["name"], u["property"], u.get("is_special", False)) | |
| # Fast path: already valid compound UCUM (contains . or / in right places) | |
| if is_compound(norm): | |
| dv, fac, display = parse_compound(norm) | |
| if dv is not None: | |
| return dv, fac, display, "compound quantity", False | |
| # English tokeniser: convert natural language to UCUM | |
| ucum = english_to_ucum(stripped) | |
| if ucum and ucum != stripped: | |
| expr = ucum | |
| else: | |
| expr = norm | |
| expr = normalize_expr(expr) | |
| if is_compound(expr): | |
| dv, fac, display = parse_compound(expr) | |
| if dv is None: | |
| return None, None, None, None, None | |
| return dv, fac, display, "compound quantity", False | |
| else: | |
| r = resolve_unit(expr) | |
| if r is None: | |
| return None, None, None, None, None | |
| return r | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # FORMATTING | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _fmt_exp(exp: int) -> str: | |
| sup = {"-": "β»", "0": "β°", "1": "ΒΉ", "2": "Β²", "3": "Β³", | |
| "4": "β΄", "5": "β΅", "6": "βΆ", "7": "β·", "8": "βΈ", "9": "βΉ"} | |
| return "".join(sup.get(c, c) for c in str(exp)) | |
| def format_dims(dv: dict) -> str: | |
| parts = [] | |
| for dim in BASE_DIMS: | |
| exp = dv.get(dim, 0) | |
| if exp == 0: | |
| continue | |
| bname = BASE_NAMES[dim] | |
| parts.append(bname if exp == 1 else f"{bname}{_fmt_exp(exp)}") | |
| return " Β· ".join(parts) if parts else "dimensionless" | |
| def sig_figs(val: float, n: int = 8) -> str: | |
| if val == 0: | |
| return "0" | |
| if abs(val) >= 0.0001 and abs(val) < 1e12: | |
| return f'{float(f"{val:.{n}g}"):g}' | |
| return f"{val:.{n}g}" | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # NONLINEAR TEMPERATURE CONVERSIONS | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| _NONLINEAR = { | |
| ("Cel", "K"): lambda x: x + 273.15, | |
| ("K", "Cel"): lambda x: x - 273.15, | |
| ("[degF]", "K"): lambda x: (x + 459.67) * 5 / 9, | |
| ("K", "[degF]"): lambda x: x * 9 / 5 - 459.67, | |
| ("Cel", "[degF]"): lambda x: x * 9 / 5 + 32, | |
| ("[degF]", "Cel"): lambda x: (x - 32) * 5 / 9, | |
| ("[degR]", "K"): lambda x: x * 5 / 9, | |
| ("K", "[degR]"): lambda x: x * 9 / 5, | |
| ("[degRe]","K"): lambda x: x * 5 / 4 + 273.15, | |
| ("K", "[degRe]"): lambda x: (x - 273.15) * 4 / 5, | |
| } | |
| def _nl_key(from_code: str, to_code: str): | |
| direct = (from_code, to_code) | |
| if direct in _NONLINEAR: | |
| return direct | |
| fa = _EN_ALIAS.get(from_code.lower(), from_code) | |
| ta = _EN_ALIAS.get(to_code.lower(), to_code) | |
| aliased = (fa, ta) | |
| return aliased if aliased in _NONLINEAR else None | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # CONVERSION ENGINE | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def do_conversion(value: float, from_expr: str, to_expr: str) -> dict: | |
| # Normalize both before anything else | |
| from_expr = normalize_expr(from_expr) | |
| to_expr = normalize_expr(to_expr) | |
| a = resolve_any(from_expr) | |
| b = resolve_any(to_expr) | |
| dv_a, fac_a, name_a, prop_a, special_a = a | |
| dv_b, fac_b, name_b, prop_b, special_b = b | |
| if dv_a is None: | |
| return {"error": f"Unrecognised unit or expression: '{from_expr}'\n" | |
| f"Tip: use dot notation for compound units, e.g. kg.m/s2"} | |
| if dv_b is None: | |
| return {"error": f"Unrecognised unit or expression: '{to_expr}'\n" | |
| f"Tip: use dot notation for compound units, e.g. kg.m/s2"} | |
| if dv_a != dv_b: | |
| return { | |
| "op": "incompatible", | |
| "from_expr": from_expr, "from_name": name_a, | |
| "from_dims": format_dims(dv_a), "from_prop": prop_a or "compound", | |
| "to_expr": to_expr, "to_name": name_b, | |
| "to_dims": format_dims(dv_b), "to_prop": prop_b or "compound", | |
| } | |
| nl = _nl_key(from_expr, to_expr) | |
| if special_a or special_b: | |
| if nl: | |
| return { | |
| "op": "convert_nonlinear", | |
| "value": value, | |
| "from_expr": from_expr, "from_name": name_a, | |
| "to_expr": to_expr, "to_name": name_b, | |
| "dim_string": format_dims(dv_a), | |
| "property": prop_a or "temperature", | |
| "result": _NONLINEAR[nl](value), | |
| } | |
| return {"error": f"Non-linear conversion between '{from_expr}' and '{to_expr}' " | |
| f"is not implemented for this pair."} | |
| if fac_a is None or fac_b is None: | |
| return {"error": "One or both units require an offset formula rather than a " | |
| "simple multiplication factor."} | |
| factor = fac_a / fac_b | |
| return { | |
| "op": "convert", | |
| "value": value, | |
| "from_expr": from_expr, "from_name": name_a, | |
| "to_expr": to_expr, "to_name": name_b, | |
| "dim_string": format_dims(dv_a), | |
| "property": prop_a or "compound quantity", | |
| "fac_a": fac_a, "fac_b": fac_b, | |
| "factor": factor, "result": value * factor, | |
| } | |
| def do_compatibility(expr_a: str, expr_b: str) -> dict: | |
| expr_a = normalize_expr(expr_a) | |
| expr_b = normalize_expr(expr_b) | |
| a = resolve_any(expr_a) | |
| b = resolve_any(expr_b) | |
| dv_a, _, name_a, prop_a, _ = a | |
| dv_b, _, name_b, prop_b, _ = b | |
| if dv_a is None: | |
| return {"error": f"Unrecognised unit: '{expr_a}'"} | |
| if dv_b is None: | |
| return {"error": f"Unrecognised unit: '{expr_b}'"} | |
| return { | |
| "op": "compatibility", | |
| "compatible": dv_a == dv_b, | |
| "expr_a": expr_a, "name_a": name_a, | |
| "dims_a": format_dims(dv_a), "prop_a": prop_a or "compound", | |
| "expr_b": expr_b, "name_b": name_b, | |
| "dims_b": format_dims(dv_b), "prop_b": prop_b or "compound", | |
| } | |
| def do_breakdown(expr: str) -> dict: | |
| expr = normalize_expr(expr) | |
| r = resolve_any(expr) | |
| dv, fac, name, prop, is_special = r | |
| if dv is None: | |
| return {"error": f"Unrecognised unit or expression: '{expr}'"} | |
| non_zero = [(BASE_NAMES[k], v) for k, v in dv.items() if v != 0] | |
| return { | |
| "op": "breakdown", | |
| "expr": expr, "name": name, | |
| "property": prop or "compound quantity", | |
| "dim_string": format_dims(dv), | |
| "components": non_zero, | |
| "factor": fac, "is_special": is_special, | |
| } | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # FORMAT RESULT AS MARKDOWN | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def format_result(r: dict) -> str: | |
| if "error" in r: | |
| return f"**Error:** {r['error']}" | |
| op = r.get("op", "") | |
| if op == "incompatible": | |
| return ( | |
| f"**{r['from_expr']}** ({r['from_name']}) and " | |
| f"**{r['to_expr']}** ({r['to_name']}) cannot be converted β " | |
| f"they measure different physical quantities.\n\n" | |
| f"| Expression | Dimensions | Quantity |\n" | |
| f"|:-----------|:-----------|:---------|\n" | |
| f"| `{r['from_expr']}` | {r['from_dims']} | {r['from_prop']} |\n" | |
| f"| `{r['to_expr']}` | {r['to_dims']} | {r['to_prop']} |" | |
| ) | |
| if op == "compatibility": | |
| status = "**compatible**" if r["compatible"] else "**not compatible**" | |
| verdict = ("Same dimension vector β conversion is valid." | |
| if r["compatible"] else | |
| "Different dimension vectors β these units measure different physical quantities.") | |
| return ( | |
| f"`{r['expr_a']}` ({r['name_a']}) and `{r['expr_b']}` ({r['name_b']}) " | |
| f"are {status}.\n\n" | |
| f"| Expression | Dimensions | Quantity |\n" | |
| f"|:-----------|:-----------|:---------|\n" | |
| f"| `{r['expr_a']}` | {r['dims_a']} | {r['prop_a']} |\n" | |
| f"| `{r['expr_b']}` | {r['dims_b']} | {r['prop_b']} |\n\n" | |
| f"{verdict}" | |
| ) | |
| if op == "breakdown": | |
| rows = "\n".join( | |
| f"| {bname} | {exp} |" | |
| for bname, exp in r["components"] | |
| ) if r["components"] else "| (dimensionless) | 0 |" | |
| fac_str = sig_figs(r["factor"]) if r["factor"] is not None else "non-linear" | |
| return ( | |
| f"**`{r['expr']}`** β {r['name']}\n\n" | |
| f"Quantity: {r['property']}\n\n" | |
| f"**Dimensional formula:** {r['dim_string']}\n\n" | |
| f"| Base Unit | Exponent |\n" | |
| f"|:----------|:---------|\n" | |
| f"{rows}\n\n" | |
| f"**UCUM base factor:** {fac_str}" | |
| ) | |
| if op == "convert_nonlinear": | |
| return ( | |
| f"**{sig_figs(r['value'])} {r['from_expr']}** ({r['from_name']}) " | |
| f"β **{r['to_expr']}** ({r['to_name']})\n\n" | |
| f"Both measure **{r['property']}** β {r['dim_string']}\n\n" | |
| f"Non-linear conversion (offset formula β not a simple multiplication):\n\n" | |
| f"**Result: {sig_figs(r['result'])} {r['to_expr']}**" | |
| ) | |
| if op == "convert": | |
| return ( | |
| f"**{sig_figs(r['value'])} {r['from_expr']}** ({r['from_name']}) " | |
| f"β **{r['to_expr']}** ({r['to_name']})\n\n" | |
| f"| Property | Value |\n" | |
| f"|:--|:--|\n" | |
| f"| Quantity | {r['property']} |\n" | |
| f"| Dimensions | {r['dim_string']} |\n" | |
| f"| 1 `{r['from_expr']}` in base units | {sig_figs(r['fac_a'])} |\n" | |
| f"| 1 `{r['to_expr']}` in base units | {sig_figs(r['fac_b'])} |\n" | |
| f"| Conversion factor | {sig_figs(r['fac_a'])} Γ· {sig_figs(r['fac_b'])} " | |
| f"= **{sig_figs(r['factor'])}** |\n\n" | |
| f"**{sig_figs(r['value'])} Γ {sig_figs(r['factor'])} " | |
| f"= {sig_figs(r['result'])}**\n\n" | |
| f"**Result: {sig_figs(r['value'])} {r['from_expr']} " | |
| f"= {sig_figs(r['result'])} {r['to_expr']}**" | |
| ) | |
| return "Unexpected result format." | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # AI SYSTEM PROMPTS β built from UCUM data | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _build_intent_prompt(): | |
| by_prop = {} | |
| for u in _units: | |
| p = u["property"] | |
| if p not in by_prop: | |
| by_prop[p] = [] | |
| by_prop[p].append(u["ucum_code"]) | |
| useful = [ | |
| ("length", 5), ("mass", 4), ("time", 4), ("temperature", 4), | |
| ("pressure", 4), ("energy", 4), ("force", 3), ("volume", 3), | |
| ("fluid volume", 3), ("power", 2), ("frequency", 2), ("plane angle", 3), | |
| ] | |
| unit_lines = "" | |
| for prop, limit in useful: | |
| if prop in by_prop: | |
| unit_lines += f" {prop}: " + ", ".join(by_prop[prop][:limit]) + "\n" | |
| return f"""You are a unit conversion intent parser. Output ONLY a raw JSON object. | |
| Formats: | |
| Convert: {{"intent":"convert","value":<number>,"from_unit":"<expr>","to_unit":"<expr>"}} | |
| Compatible?: {{"intent":"compatibility","unit_a":"<expr>","unit_b":"<expr>"}} | |
| Break down: {{"intent":"breakdown","unit":"<expr>"}} | |
| Unclear: {{"intent":"unknown","message":"<clarification request>"}} | |
| Unit codes: | |
| {unit_lines} compound: dots=multiply, slash=divide, integer=exponent β kg.m/s2 N.m Pa.s W/m2 J/(kg.K) | |
| Rules: | |
| - Temperature: K, Cel, [degF] | |
| - Imperial: [mi_i] [ft_i] [in_i] [lb_av] [oz_av] [gal_us] [psi] | |
| - Prefixed: km, kg, cm, mm, MHz, kPa, ml, kJ, MW | |
| - Common: miles=[mi_i], feet=[ft_i], pounds=[lb_av], gallons=[gal_us] | |
| - Pound-force (lbf): use [lbf_av] | |
| - Torque "foot-pound" or "ft-lb": use [ft_i].[lbf_av] (force Γ distance, NOT mass Γ distance) | |
| - Energy "kilowatt-hour" / "kWh": use kWh (engine resolves to kW.h automatically) | |
| - Density "kg per cubic meter": use kg/m3 (slash notation, integer exponent, no spaces) | |
| - Density "pounds per cubic foot" / "lb/ft3": use lb/ft3 | |
| - Density "g per cm3" or "g/mL": use g/cm3 or g/ml | |
| - For "can X convert to Y" or "are X and Y compatible" β use compatibility intent | |
| - For "can X be converted to Y" where they are clearly different types β use compatibility | |
| - Output ONLY the JSON. No markdown, no explanation.""" | |
| INTENT_SYSTEM = _build_intent_prompt() | |
| EXPLAIN_SYSTEM = """You are a precise engineering unit conversion assistant. Write a short, engineering-focused analysis in exactly 2-3 sentences. | |
| Structure your response as follows: | |
| 1. First sentence: State what the conversion result means concretely β describe what the input and output values represent physically (e.g., forces, pressures, energy quantities). Use the exact numbers from the result. | |
| 2. Second sentence: Explain the engineering significance of the units themselves β what physical phenomenon or system they describe, what discipline or context uses them (mechanical, thermal, fluid, electrical, etc.), and what the dimensional relationship means for engineers working with these quantities. | |
| 3. Optional third sentence: Add a practical engineering insight β typical ranges in real systems, why one unit is preferred over another in certain contexts, or how the quantity relates to a standard engineering formula or material property. | |
| Rules: | |
| - Never recalculate β use only numbers already shown in the result | |
| - Never repeat the conversion factor or the dimensional formula β those are already displayed | |
| - Do NOT use consumer analogies (e.g., "like a car", "like a football field") β keep the context engineering and technical | |
| - For incompatible units: explain dimensionally why the conversion is impossible (different physical quantities) | |
| - For compound units (Pa.s, N.m, W/mΒ²): emphasize what physical law or equation they appear in | |
| - For temperature conversions: note the offset vs. scale difference and any engineering relevance | |
| - Maximum 3 sentences total β be concise and precise""" | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # AI LAYER | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def parse_intent(user_message: str) -> dict: | |
| try: | |
| resp = _client().chat.completions.create( | |
| model = HF_MODEL, | |
| messages = [ | |
| {"role": "system", "content": INTENT_SYSTEM}, | |
| {"role": "user", "content": user_message}, | |
| ], | |
| temperature = 0.0, | |
| max_tokens = 150, | |
| ) | |
| raw = resp.choices[0].message.content.strip() | |
| raw = re.sub(r"^```[a-z]*\n?", "", raw) | |
| raw = re.sub(r"\n?```$", "", raw) | |
| m = re.search(r"\{.*\}", raw, re.DOTALL) | |
| if m: | |
| raw = m.group(0) | |
| return json.loads(raw) | |
| except json.JSONDecodeError: | |
| return {"intent": "unknown", "message": "Could not parse your request. Please rephrase."} | |
| except Exception as e: | |
| return {"intent": "error", "message": str(e)} | |
| def explain_result(result_text: str, question: str) -> str: | |
| try: | |
| resp = _client().chat.completions.create( | |
| model = HF_MODEL, | |
| messages = [ | |
| {"role": "system", "content": EXPLAIN_SYSTEM}, | |
| {"role": "user", | |
| "content": ( | |
| f"User asked: {question}\n\n" | |
| f"Computed result:\n{result_text}\n\n" | |
| f"Write your analysis:" | |
| )}, | |
| ], | |
| temperature = 0.2, | |
| max_tokens = 200, | |
| ) | |
| return resp.choices[0].message.content.strip() | |
| except Exception as e: | |
| return f"Analysis unavailable: {e}" | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # PIPELINE | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def process(user_input: str) -> tuple[str, str]: | |
| if not user_input.strip(): | |
| return "", "" | |
| intent = parse_intent(user_input) | |
| if intent.get("intent") == "error": | |
| return ( | |
| "**Connection error β Jan API server not reachable.**\n\n" | |
| f"{intent.get('message', '')}\n\n" | |
| "Ensure Jan is open, a model is loaded, and " | |
| "**Settings > Local API Server** is started.", | |
| "" | |
| ) | |
| if intent.get("intent") == "unknown": | |
| msg = intent.get("message", "Please rephrase your question.") | |
| return f"**Clarification needed:** {msg}", "" | |
| op = intent.get("intent") | |
| if op == "convert": | |
| try: | |
| value = float(intent["value"]) | |
| raw_from = str(intent["from_unit"]).strip() | |
| raw_to = str(intent["to_unit"]).strip() | |
| from_u = normalize_expr(english_to_ucum(raw_from) or raw_from) | |
| to_u = normalize_expr(english_to_ucum(raw_to) or raw_to) | |
| except (KeyError, ValueError, TypeError): | |
| return "**Error:** Could not extract value and units. Try: *Convert 5 km to miles*", "" | |
| r = do_conversion(value, from_u, to_u) | |
| elif op == "compatibility": | |
| r = do_compatibility( | |
| normalize_expr(english_to_ucum(str(intent.get("unit_a",""))) or str(intent.get("unit_a",""))), | |
| normalize_expr(english_to_ucum(str(intent.get("unit_b",""))) or str(intent.get("unit_b",""))) | |
| ) | |
| elif op == "breakdown": | |
| r = do_breakdown(normalize_expr(english_to_ucum(str(intent.get("unit",""))) or str(intent.get("unit","")))) | |
| else: | |
| return "**Unrecognised request.** Try: *Convert 5 km to miles*", "" | |
| result_text = format_result(r) | |
| explanation = explain_result(result_text, user_input) | |
| return result_text, explanation | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # GRADIO UI | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| EXAMPLES = [ | |
| "Convert 5 km to miles", | |
| "How many pounds is 10 kg?", | |
| "Convert 100 Celsius to Fahrenheit", | |
| "Convert 1 atmosphere to PSI", | |
| "Convert 1 Newton to dynes", | |
| "Convert 50 kW.h to J", | |
| "Convert 1 Pa.s to kg/(m.s)", | |
| "What is 9.81 N.m in J?", | |
| "Are Pa and kg/(m*s^2) compatible?", | |
| "Can N.m and J be converted?", | |
| "Can km be converted to hours?", | |
| "Break down a Newton into base units", | |
| "What are the dimensions of Pa.s?", | |
| "Break down W/m^2 into base units", | |
| ] | |
| CSS = """ | |
| @import url('https://fonts.googleapis.com/css2?family=Syne:wght@400;600;700&family=JetBrains+Mono:wght@400;500&family=Inter:wght@300;400;500&display=swap'); | |
| * { box-sizing: border-box; } | |
| :root { | |
| --bg: #09090b; | |
| --surface: #111115; | |
| --card: #18181d; | |
| --border: #27272f; | |
| --accent: #6366f1; | |
| --accent-lo: rgba(99,102,241,0.10); | |
| --accent-hi: #818cf8; | |
| --text: #f0f0f4; | |
| --muted: #71717a; | |
| --dim: #3f3f46; | |
| --mono: 'JetBrains Mono', monospace; | |
| --sans: 'Inter', sans-serif; | |
| --display: 'Syne', sans-serif; | |
| --r: 8px; | |
| } | |
| /* ββ Issue 1: Uniform background β force #09090b on every layer ββ */ | |
| /* Target the body/html outside the container for the sidebar areas */ | |
| html { background: #09090b !important; } | |
| body { background: #09090b !important; background-color: #09090b !important; } | |
| /* Target the Gradio container and all its internal wrappers */ | |
| .gradio-container, | |
| .gradio-container > .main, | |
| .gradio-container > .main > .wrap, | |
| .app, .contain, .wrap, .main, | |
| .app > .contain, | |
| .app > .contain > .wrap { | |
| background: #09090b !important; | |
| background-color: #09090b !important; | |
| color: var(--text) !important; | |
| } | |
| /* Issue 1: Scale β slightly reduced to avoid scrolling */ | |
| .gradio-container { | |
| max-width: 1280px !important; | |
| width: 100% !important; | |
| margin: 0 auto !important; | |
| padding: 0 32px !important; | |
| } | |
| /* ββ Issue 4: Layout alignment β consistent gaps and column spacing ββ */ | |
| /* Gradio wraps each component in a .block div; ensure no extra margins */ | |
| .gradio-container .block { | |
| margin: 0 !important; | |
| } | |
| /* The main row containing left+right columns */ | |
| .gradio-container > .main .wrap > .row, | |
| .gradio-container .row { | |
| gap: 32px !important; | |
| align-items: flex-start !important; | |
| } | |
| /* gr.HTML label blocks should have no extra padding from Gradio's default .block */ | |
| .block.svelte-90oupt, | |
| .block.svelte-1p9xokt { | |
| padding: 0 !important; | |
| } | |
| /* ββ Issue 3: Pagination β target the "Pages:" row that Gradio renders ββ */ | |
| /* The pagination row sits inside the dataset/examples component */ | |
| /* Target all buttons that are direct siblings to the sample table */ | |
| .gr-examples button, | |
| [data-testid="dataset"] button, | |
| .dataset button { | |
| /* Only target page-number buttons (they have short numeric content) */ | |
| min-height: unset !important; | |
| } | |
| /* Style the "Pages: N" label text */ | |
| .gr-examples span[class*="page"], | |
| .gr-examples .pages, | |
| [data-testid="dataset"] span[class*="page"] { | |
| font-family: var(--mono) !important; | |
| font-size: 11px !important; | |
| color: var(--muted) !important; | |
| } | |
| footer, .show-api, .built-with { display: none !important; } | |
| /* ββ Remove Gradio default container chrome ββ */ | |
| .gap, .contain, .form, .prose, | |
| .block, .block.padded { | |
| background: transparent !important; | |
| border: none !important; | |
| box-shadow: none !important; | |
| padding: 0 !important; | |
| } | |
| /* ββ Hide per-component loading bars on output markdown elements ββ */ | |
| #result-md .generating, | |
| #result-md .eta-bar, | |
| #result-md .progress-bar, | |
| #ai-md .generating, | |
| #ai-md .eta-bar, | |
| #ai-md .progress-bar { display: none !important; } | |
| /* ββ Result card ββ */ | |
| #result-md { | |
| background: var(--card) !important; | |
| border: 1px solid var(--border) !important; | |
| border-radius: var(--r) !important; | |
| padding: 22px 24px !important; | |
| min-height: 140px !important; | |
| margin-bottom: 16px !important; | |
| } | |
| /* ββ AI Analysis card ββ */ | |
| #ai-md { | |
| background: var(--surface) !important; | |
| border: 1px solid var(--border) !important; | |
| border-left: 4px solid var(--accent) !important; | |
| border-radius: var(--r) !important; | |
| padding: 20px 24px !important; | |
| min-height: 80px !important; | |
| margin-bottom: 16px !important; | |
| } | |
| /* ββ Status bar ββ */ | |
| #status-bar { | |
| margin-top: 8px !important; | |
| padding: 0 !important; | |
| background: var(--surface) !important; | |
| border: 1px solid var(--border) !important; | |
| border-radius: var(--r) !important; | |
| } | |
| #status-bar textarea { | |
| font-family: var(--mono) !important; | |
| font-size: 12px !important; | |
| color: var(--muted) !important; | |
| background: transparent !important; | |
| border: none !important; | |
| padding: 10px 18px !important; | |
| resize: none !important; | |
| box-shadow: none !important; | |
| } | |
| #status-bar textarea:focus { box-shadow: none !important; outline: none !important; } | |
| .eta-bar, .progress-bar { margin-bottom: 8px !important; } | |
| /* ββ Inputs ββ */ | |
| label > span { | |
| font-family: var(--mono) !important; | |
| font-size: 11px !important; | |
| font-weight: 500 !important; | |
| letter-spacing: 0.1em !important; | |
| text-transform: uppercase !important; | |
| color: var(--muted) !important; | |
| margin-bottom: 10px !important; | |
| display: block !important; | |
| } | |
| textarea, input[type="text"] { | |
| background: var(--surface) !important; | |
| border: 1px solid var(--border) !important; | |
| border-radius: var(--r) !important; | |
| color: var(--text) !important; | |
| font-family: var(--sans) !important; | |
| font-size: 15px !important; | |
| padding: 12px 14px !important; | |
| resize: none !important; | |
| transition: border-color 0.15s !important; | |
| } | |
| textarea:focus, input[type="text"]:focus { | |
| border-color: var(--accent) !important; | |
| outline: none !important; | |
| box-shadow: 0 0 0 3px var(--accent-lo) !important; | |
| } | |
| /* ββ Buttons ββ */ | |
| button.lg { | |
| border-radius: var(--r) !important; | |
| font-family: var(--mono) !important; | |
| font-size: 11px !important; | |
| font-weight: 500 !important; | |
| letter-spacing: 0.1em !important; | |
| text-transform: uppercase !important; | |
| height: 42px !important; | |
| transition: all 0.15s !important; | |
| } | |
| button.lg.primary { | |
| background: var(--accent) !important; | |
| border: none !important; | |
| color: #fff !important; | |
| } | |
| button.lg.primary:hover { background: var(--accent-hi) !important; } | |
| button.lg.secondary { | |
| background: transparent !important; | |
| border: 1px solid var(--border) !important; | |
| color: var(--muted) !important; | |
| } | |
| button.lg.secondary:hover { | |
| border-color: var(--accent) !important; | |
| color: var(--text) !important; | |
| } | |
| /* ββ Example pills ββ */ | |
| .gr-samples-table td, .gr-examples td { | |
| background: transparent !important; | |
| border: none !important; | |
| padding: 3px 4px !important; | |
| } | |
| .gr-samples-table button, .gr-examples button { | |
| background: var(--surface) !important; | |
| border: 1px solid var(--border) !important; | |
| border-radius: 4px !important; | |
| color: var(--muted) !important; | |
| font-family: var(--mono) !important; | |
| font-size: 12px !important; | |
| padding: 6px 12px !important; | |
| margin: 2px !important; | |
| cursor: pointer !important; | |
| transition: all 0.12s !important; | |
| white-space: nowrap !important; | |
| } | |
| .gr-samples-table button:hover, .gr-examples button:hover { | |
| border-color: var(--accent) !important; | |
| color: var(--text) !important; | |
| background: var(--accent-lo) !important; | |
| } | |
| /* ββ Issue 4: Pagination buttons β clearly outlined with obvious interactivity ββ */ | |
| /* Targets the "Pages: 1 2" row that Gradio renders for examples_per_page */ | |
| .gr-examples .paginate, | |
| .gr-examples nav, | |
| .gr-examples [aria-label="pagination"] { | |
| display: flex !important; | |
| gap: 6px !important; | |
| margin-top: 12px !important; | |
| align-items: center !important; | |
| } | |
| /* Style all page-number spans/buttons in the pagination row */ | |
| .gr-examples .paginate button, | |
| .gr-examples nav button, | |
| .gr-examples [aria-label="pagination"] button { | |
| font-family: var(--mono) !important; | |
| font-size: 12px !important; | |
| font-weight: 600 !important; | |
| min-width: 32px !important; | |
| height: 32px !important; | |
| padding: 0 10px !important; | |
| border-radius: 5px !important; | |
| cursor: pointer !important; | |
| transition: all 0.15s !important; | |
| /* Default (inactive) page button */ | |
| background: var(--surface) !important; | |
| border: 1px solid var(--border) !important; | |
| color: var(--muted) !important; | |
| } | |
| .gr-examples .paginate button:hover, | |
| .gr-examples nav button:hover, | |
| .gr-examples [aria-label="pagination"] button:hover { | |
| border-color: var(--accent) !important; | |
| color: var(--text) !important; | |
| background: var(--accent-lo) !important; | |
| } | |
| /* Active / current page button */ | |
| .gr-examples .paginate button[aria-current="true"], | |
| .gr-examples nav button[aria-current="true"], | |
| .gr-examples [aria-label="pagination"] button[aria-current="true"], | |
| .gr-examples .paginate button.current, | |
| .gr-examples nav button.current { | |
| background: var(--accent) !important; | |
| border-color: var(--accent) !important; | |
| color: #fff !important; | |
| } | |
| /* Also catch the plain text "Pages: 1 2" fallback Gradio uses in some versions */ | |
| .gr-examples > div:last-child, | |
| .gr-examples > div > div:last-child { | |
| display: flex !important; | |
| flex-wrap: wrap !important; | |
| gap: 6px !important; | |
| align-items: center !important; | |
| margin-top: 12px !important; | |
| } | |
| .gr-examples > div:last-child button, | |
| .gr-examples > div > div:last-child button { | |
| font-family: var(--mono) !important; | |
| font-size: 12px !important; | |
| font-weight: 600 !important; | |
| min-width: 32px !important; | |
| height: 32px !important; | |
| padding: 0 10px !important; | |
| border-radius: 5px !important; | |
| background: var(--surface) !important; | |
| border: 2px solid var(--border) !important; | |
| color: var(--text) !important; | |
| cursor: pointer !important; | |
| transition: all 0.15s !important; | |
| } | |
| .gr-examples > div:last-child button:hover, | |
| .gr-examples > div > div:last-child button:hover { | |
| border-color: var(--accent) !important; | |
| background: var(--accent-lo) !important; | |
| } | |
| /* ββ Result markdown text ββ */ | |
| .result-md p { color: var(--text); font-size: 15px; line-height: 1.7; margin: 6px 0; } | |
| .result-md strong { color: var(--accent-hi) !important; font-weight: 500 !important; } | |
| .result-md code { | |
| font-family: var(--mono) !important; | |
| font-size: 13px !important; | |
| background: var(--surface) !important; | |
| border: 1px solid var(--border) !important; | |
| border-radius: 4px !important; | |
| padding: 1px 6px !important; | |
| color: var(--accent-hi) !important; | |
| } | |
| .result-md table { | |
| width: 100%; | |
| border-collapse: collapse; | |
| font-family: var(--mono); | |
| font-size: 14px; | |
| margin: 16px 0; | |
| } | |
| /* ββ AI explanation markdown ββ */ | |
| .ai-md p { | |
| color: #d4d4de; | |
| font-size: 16px !important; | |
| line-height: 1.8 !important; | |
| margin: 0 0 8px 0 !important; | |
| font-style: normal; | |
| } | |
| .ai-md strong { color: var(--accent-hi) !important; } | |
| /* ββ Issue 3: Table header β use visible named header styling instead of blank dark row ββ */ | |
| /* The th is now always named (Property/Value or Expression/Dimensions/Quantity etc) */ | |
| .result-md th { | |
| background: var(--surface) !important; | |
| border: 1px solid var(--border) !important; | |
| padding: 10px 14px !important; | |
| text-align: left !important; | |
| color: var(--accent-hi) !important; | |
| font-weight: 600 !important; | |
| font-size: 11px !important; | |
| letter-spacing: 0.06em !important; | |
| text-transform: uppercase !important; | |
| } | |
| .result-md td { | |
| background: var(--card) !important; | |
| border: 1px solid var(--border) !important; | |
| padding: 10px 14px !important; | |
| color: var(--text) !important; | |
| } | |
| .result-md tr:nth-child(even) td { background: #111115 !important; } | |
| """ | |
| def build_ui(): | |
| # Build the theme here so it can be passed to gr.Blocks | |
| theme = gr.themes.Base( | |
| primary_hue = "indigo", | |
| neutral_hue = "zinc", | |
| font = [gr.themes.GoogleFont("Inter"), "sans-serif"], | |
| font_mono = [gr.themes.GoogleFont("JetBrains Mono"), "monospace"], | |
| ).set( | |
| body_background_fill = "#09090b", | |
| body_background_fill_dark = "#09090b", | |
| background_fill_primary = "#09090b", | |
| background_fill_primary_dark = "#09090b", | |
| background_fill_secondary = "#111115", | |
| background_fill_secondary_dark = "#111115", | |
| block_background_fill = "transparent", | |
| block_background_fill_dark = "transparent", | |
| body_text_color = "#f0f0f4", | |
| body_text_color_dark = "#f0f0f4", | |
| ) | |
| # js forces dark mode AND styles pagination buttons after load | |
| js_dark = """() => { | |
| document.documentElement.classList.add('dark'); | |
| // Style the Examples pagination buttons | |
| // Gradio renders pagination as buttons inside the dataset component | |
| // We use MutationObserver to catch them even after dynamic render | |
| function stylePaginationButtons() { | |
| // Find all buttons inside .gr-examples or dataset wrappers | |
| // that contain only a number (page number buttons) | |
| const allButtons = document.querySelectorAll('button'); | |
| allButtons.forEach(btn => { | |
| const txt = btn.textContent.trim(); | |
| const isPageNum = /^\\d+$/.test(txt); | |
| const isParentDataset = btn.closest('.gr-examples') || | |
| btn.closest('[data-testid="dataset"]') || | |
| btn.closest('.dataset'); | |
| if (isPageNum && isParentDataset) { | |
| btn.style.cssText = ` | |
| font-family: 'JetBrains Mono', monospace !important; | |
| font-size: 12px !important; | |
| font-weight: 700 !important; | |
| min-width: 30px !important; | |
| height: 30px !important; | |
| padding: 0 8px !important; | |
| border-radius: 5px !important; | |
| cursor: pointer !important; | |
| background: #111115 !important; | |
| border: 2px solid #27272f !important; | |
| color: #f0f0f4 !important; | |
| margin: 0 2px !important; | |
| transition: all 0.15s !important; | |
| display: inline-flex !important; | |
| align-items: center !important; | |
| justify-content: center !important; | |
| `; | |
| btn.onmouseenter = () => { | |
| btn.style.borderColor = '#6366f1'; | |
| btn.style.background = 'rgba(99,102,241,0.15)'; | |
| }; | |
| btn.onmouseleave = () => { | |
| btn.style.borderColor = '#27272f'; | |
| btn.style.background = '#111115'; | |
| }; | |
| } | |
| }); | |
| } | |
| // Run after short delay for initial render, then observe mutations | |
| setTimeout(stylePaginationButtons, 800); | |
| setTimeout(stylePaginationButtons, 1500); | |
| const observer = new MutationObserver(() => { | |
| stylePaginationButtons(); | |
| }); | |
| observer.observe(document.body, { childList: true, subtree: true }); | |
| }""" | |
| with gr.Blocks(title="Unit Converter AI", theme=theme, js=js_dark) as app: | |
| # ββ Header β single self-contained gr.HTML block ββββββββββββββββββββββ | |
| gr.HTML(""" | |
| <div style=" | |
| padding: 28px 0 20px; | |
| border-bottom: 1px solid #27272f; | |
| margin-bottom: 24px; | |
| display: flex; | |
| align-items: center; | |
| gap: 24px; | |
| "> | |
| <div style=" | |
| font-family:'Syne',sans-serif;font-size:30px;font-weight:700; | |
| color:#f0f0f4;letter-spacing:-0.01em;white-space:nowrap; | |
| ">Unit Converter <span style="color:#818cf8;">AI</span></div> | |
| <div style="display:flex;gap:10px;flex-wrap:wrap;align-items:center;"> | |
| <span style="font-family:'JetBrains Mono',monospace;font-size:11px; | |
| background:rgba(99,102,241,0.1);border:1px solid rgba(99,102,241,0.25); | |
| color:#818cf8;padding:4px 12px;border-radius:100px;">UCUM</span> | |
| <span style="font-family:'JetBrains Mono',monospace;font-size:11px; | |
| background:rgba(99,102,241,0.1);border:1px solid rgba(99,102,241,0.25); | |
| color:#818cf8;padding:4px 12px;border-radius:100px;">312 units</span> | |
| <span style="font-family:'JetBrains Mono',monospace;font-size:11px; | |
| background:rgba(99,102,241,0.1);border:1px solid rgba(99,102,241,0.25); | |
| color:#818cf8;padding:4px 12px;border-radius:100px;">7 base dimensions</span> | |
| </div> | |
| </div> | |
| """) | |
| # ββ Main layout βββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| with gr.Row(equal_height=False): | |
| # Left: input | |
| with gr.Column(scale=4, min_width=340): | |
| # Matching section label β aligns vertically with "Result" on the right | |
| gr.HTML("""<div style=" | |
| font-family:'JetBrains Mono',monospace;font-size:11px; | |
| letter-spacing:0.1em;text-transform:uppercase; | |
| color:#71717a;margin-bottom:12px;">Query</div>""") | |
| query = gr.Textbox( | |
| label = None, | |
| placeholder = "Convert 5 km to miles\nAre Pa and kg/(m*s^2) compatible?\nBreak down W/m^2 into base units", | |
| lines = 4, | |
| container = False, | |
| show_label = False, | |
| ) | |
| with gr.Row(): | |
| btn_run = gr.Button("Run", variant="primary", scale=3) | |
| btn_clear = gr.Button("Clear", variant="secondary", scale=1) | |
| # Example queries label | |
| gr.HTML("""<div style=" | |
| font-family:'JetBrains Mono',monospace;font-size:11px; | |
| letter-spacing:0.1em;text-transform:uppercase; | |
| color:#71717a;margin:20px 0 10px;">Example queries</div>""") | |
| gr.Examples( | |
| examples = EXAMPLES, | |
| inputs = query, | |
| label = None, | |
| examples_per_page = 7, | |
| ) | |
| # Right: output | |
| with gr.Column(scale=6, min_width=440): | |
| # "Result" section label β matches "Query" label height on left | |
| gr.HTML("""<div style=" | |
| font-family:'JetBrains Mono',monospace;font-size:11px; | |
| letter-spacing:0.1em;text-transform:uppercase; | |
| color:#71717a;margin-bottom:12px;">Result</div>""") | |
| # Result markdown β card styling applied via #result-md in CSS | |
| result_md = gr.Markdown( | |
| value = "<span style='color:#3f3f46;font-family:JetBrains Mono,monospace;font-size:13px;'>Submit a query to see results.</span>", | |
| show_label = False, | |
| container = False, | |
| elem_id = "result-md", | |
| elem_classes = ["result-md"], | |
| ) | |
| # AI Analysis header label β self-contained, includes the dot+label | |
| # This is OUTSIDE the ai_md component so it is never affected by | |
| # Gradio's loading state on that component. | |
| gr.HTML("""<div style=" | |
| display:flex;align-items:center;gap:8px; | |
| font-family:'JetBrains Mono',monospace;font-size:11px; | |
| letter-spacing:0.12em;text-transform:uppercase; | |
| color:#818cf8;margin-bottom:10px;"> | |
| <svg width='8' height='8' viewBox='0 0 8 8' style='flex-shrink:0;'> | |
| <circle cx='4' cy='4' r='4' fill='#6366f1'/> | |
| </svg> | |
| AI Analysis | |
| </div>""") | |
| # AI markdown β card styling applied via #ai-md in CSS | |
| ai_md = gr.Markdown( | |
| value = "", | |
| show_label = False, | |
| container = False, | |
| elem_id = "ai-md", | |
| elem_classes = ["ai-md"], | |
| ) | |
| # Status bar β FIX 3: no split HTML div wrapper, styled via #status-bar CSS | |
| status = gr.Textbox( | |
| value = "Ready β ensure Jan is running with Local API Server enabled", | |
| show_label = False, | |
| interactive = False, | |
| lines = 1, | |
| container = True, | |
| elem_id = "status-bar", | |
| ) | |
| # ββ Events ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def on_run(user_input): | |
| if not user_input.strip(): | |
| return ( | |
| "<span style='color:#3f3f46;font-family:JetBrains Mono,monospace;font-size:13px;'>Please enter a query.</span>", | |
| "", | |
| "Ready.", | |
| ) | |
| result, explanation = process(user_input) | |
| st = "Done." if "Error" not in result else "Error β see result panel." | |
| return result, explanation, st | |
| def on_clear(): | |
| return ( | |
| "", | |
| "<span style='color:#3f3f46;font-family:JetBrains Mono,monospace;font-size:13px;'>Submit a query to see results.</span>", | |
| "", | |
| "Ready.", | |
| ) | |
| btn_run.click( | |
| fn = on_run, | |
| inputs = [query], | |
| outputs = [result_md, ai_md, status], | |
| ) | |
| query.submit( | |
| fn = on_run, | |
| inputs = [query], | |
| outputs = [result_md, ai_md, status], | |
| ) | |
| btn_clear.click( | |
| fn = on_clear, | |
| inputs = [], | |
| outputs = [query, result_md, ai_md, status], | |
| ) | |
| return app | |
| # ββ Entry point βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| if __name__ == "__main__": | |
| print(f"Unit Converter AI β HF Spaces Edition") | |
| print(f" Model : {HF_MODEL}") | |
| print(f" API : {HF_API_BASE}") | |
| print(f" Token : {'set' if HF_TOKEN else 'MISSING β add HF_TOKEN to Space Secrets'}") | |
| print() | |
| app = build_ui() | |
| app.launch() | |