anujakkulkarni's picture
Create app.py
57779e5 verified
import re
import difflib
from fastapi import FastAPI, Query
from fastapi.middleware.cors import CORSMiddleware
app = FastAPI(title="String Similarity API (Hybrid Difflib + Jaccard + Numeric Bonus)")
# CORS: open for testing; lock down in prod
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
UNIT_PATTERN = r"(mg|mcg|g|iu|ml|%)"
def norm_base(s: str) -> str:
s = str(s or "")
s = s.lower()
s = s.replace("+", " ").replace("/", " ")
s = re.sub(r"[^\w\s.%/+-]", " ", s)
s = re.sub(r"\s+", " ", s).strip()
return s
def extract_numbers(s: str):
s2 = norm_base(s)
num_unit = re.findall(rf"\b\d+(?:\.\d+)?\s*{UNIT_PATTERN}\b", s2)
nums = re.findall(r"\b\d+(?:\.\d+)?\b", s2)
return sorted(set([x.strip() for x in num_unit + nums]))
def token_set(s: str):
return [t for t in norm_base(s).split(" ") if t]
def hybrid_similarity(a: str, b: str):
a_n, b_n = norm_base(a), norm_base(b)
if a_n == b_n:
return {"diff": 100.0, "jacc": 100.0, "num": 100.0, "score": 100.0}
diff = difflib.SequenceMatcher(None, a_n, b_n).ratio() * 100.0
aset, bset = set(token_set(a)), set(token_set(b))
jacc = (len(aset & bset) / len(aset | bset) * 100.0) if (aset and bset) else 0.0
anums, bnums = extract_numbers(a), extract_numbers(b)
num_bonus = 100.0 if (anums and bnums and set(anums) == set(bnums)) else 0.0
score = 0.60 * diff + 0.30 * jacc + 0.10 * num_bonus
return {
"diff": round(diff, 2),
"jacc": round(jacc, 2),
"num": 100.0 if num_bonus else 0.0,
"score": round(score, 2)
}
@app.get("/string-match")
def string_match(
a: str = Query(..., description="First string to compare"),
b: str = Query(..., description="Second string to compare"),
threshold: float = Query(70.0, ge=0.0, le=100.0, description="Threshold for considering a match")
):
parts = hybrid_similarity(a, b) # uses your existing logic
score = parts["score"]
return {
"string_a": a,
"string_b": b,
"percent_match": round(score, 2),
"matched": bool(score >= threshold),
"components": parts
}
@app.get("/")
def root():
return {
"status": "ok",
"message": "Use /string-match?a=...&b=... for comparisons. See /docs for Swagger UI."
}