import re import difflib from fastapi import FastAPI, Query from fastapi.middleware.cors import CORSMiddleware app = FastAPI(title="String Similarity API (Hybrid Difflib + Jaccard + Numeric Bonus)") # CORS: open for testing; lock down in prod app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) UNIT_PATTERN = r"(mg|mcg|g|iu|ml|%)" def norm_base(s: str) -> str: s = str(s or "") s = s.lower() s = s.replace("+", " ").replace("/", " ") s = re.sub(r"[^\w\s.%/+-]", " ", s) s = re.sub(r"\s+", " ", s).strip() return s def extract_numbers(s: str): s2 = norm_base(s) num_unit = re.findall(rf"\b\d+(?:\.\d+)?\s*{UNIT_PATTERN}\b", s2) nums = re.findall(r"\b\d+(?:\.\d+)?\b", s2) return sorted(set([x.strip() for x in num_unit + nums])) def token_set(s: str): return [t for t in norm_base(s).split(" ") if t] def hybrid_similarity(a: str, b: str): a_n, b_n = norm_base(a), norm_base(b) if a_n == b_n: return {"diff": 100.0, "jacc": 100.0, "num": 100.0, "score": 100.0} diff = difflib.SequenceMatcher(None, a_n, b_n).ratio() * 100.0 aset, bset = set(token_set(a)), set(token_set(b)) jacc = (len(aset & bset) / len(aset | bset) * 100.0) if (aset and bset) else 0.0 anums, bnums = extract_numbers(a), extract_numbers(b) num_bonus = 100.0 if (anums and bnums and set(anums) == set(bnums)) else 0.0 score = 0.60 * diff + 0.30 * jacc + 0.10 * num_bonus return { "diff": round(diff, 2), "jacc": round(jacc, 2), "num": 100.0 if num_bonus else 0.0, "score": round(score, 2) } @app.get("/string-match") def string_match( a: str = Query(..., description="First string to compare"), b: str = Query(..., description="Second string to compare"), threshold: float = Query(70.0, ge=0.0, le=100.0, description="Threshold for considering a match") ): parts = hybrid_similarity(a, b) # uses your existing logic score = parts["score"] return { "string_a": a, "string_b": b, "percent_match": round(score, 2), "matched": bool(score >= threshold), "components": parts } @app.get("/") def root(): return { "status": "ok", "message": "Use /string-match?a=...&b=... for comparisons. See /docs for Swagger UI." }