Spaces:

anujakkulkarni
/

tenderfastapi

Sleeping

App Files Files Community

anujakkulkarni commited on Oct 7, 2025

Commit

221a9b2

verified ·

1 Parent(s): 85d1af4

Update app.py

Browse files

Files changed (1) hide show

app.py +60 -51

app.py CHANGED Viewed

@@ -295,8 +295,8 @@ def match_generic_to_product_master(
 # ---------- NEW: Grouped matcher (generic_name -> array of matches) ----------
-def match_generic_to_product_master_grouped(
-    generic_list: List[str],
     pm_df: pd.DataFrame,
     molecule_col: str,
     brand_id_col: Optional[str],
@@ -304,10 +304,7 @@ def match_generic_to_product_master_grouped(
     min_score: float = 60.0,
     top_n: int = 3
 ) -> List[Dict[str, Any]]:
-    """
-    For each RFQ generic name, return an array of up to top_n matches from product master
-    with score >= min_score.
-    """
     subset = pm_df.dropna(subset=[molecule_col]).copy()
     mol_raw = subset[molecule_col].astype(str).tolist()
     brand_ids = subset[brand_id_col].astype(str).tolist(
@@ -315,29 +312,23 @@ def match_generic_to_product_master_grouped(
     brand_names = subset[brand_name_col].astype(str).tolist(
     ) if brand_name_col and brand_name_col in subset.columns else [None]*len(subset)
-    grouped_results = []
-    for g in generic_list:
-        g_str = str(g or "").strip()
-        if not g_str:
-            continue
-        scored = []
-        for idx, cand in enumerate(mol_raw):
-            parts = hybrid_similarity(g_str, cand)
-            score = parts["score"]
-            if score >= min_score:
-                scored.append({
-                    "matched_name": cand,
-                    "match_percent": round(score, 2),
-                    "brand_id": brand_ids[idx],
-                    "brand_name": brand_names[idx]
-                })
-        scored.sort(key=lambda x: x["match_percent"], reverse=True)
-        if scored:
-            grouped_results.append({
-                "generic_name": g_str,
-                "matches": scored[:top_n]
             })
-    return grouped_results
 # ---------- Endpoints ----------
@@ -487,22 +478,19 @@ async def match_with_difflib_grouped(
     top_n: int = Query(3, description="Max number of matches per RFQ row")
 ):
     """
-    Return grouped matches: one RFQ generic_name → array of matched product master molecules.
     """
     try:
         # RFQ
         rfq_bytes = await rfq_file.read()
         rfq_df = dataframe_from_upload_bytes(rfq_file.filename, rfq_bytes)
         mapped, mapping = build_mapped_rfq(rfq_df)
-        if "generic_name" not in mapped.columns:
-            raise HTTPException(
-                status_code=400, detail="No 'generic_name' column found after mapping RFQ.")
-        gen_series = mapped["generic_name"]
-        nonempty_mask = gen_series.notna() & gen_series.astype(
-            str).str.strip().ne("") & gen_series.astype(str).str.lower().ne("<na>")
-        generic_list = gen_series[nonempty_mask].astype(str).tolist()
         # Product master
         pm_bytes = await product_master_json.read()
@@ -515,29 +503,50 @@ async def match_with_difflib_grouped(
             pm_df, "__product_master_brand_id__")
         brand_name_col = detect_single_column(
             pm_df, "__product_master_brand_name__")
         if not molecule_col:
             raise HTTPException(
                 status_code=400, detail="Could not detect molecule column in product master JSON.")
-        grouped_matches = match_generic_to_product_master_grouped(
-            generic_list, pm_df,
-            molecule_col=molecule_col,
-            brand_id_col=brand_id_col,
-            brand_name_col=brand_name_col,
-            min_score=min_score,
-            top_n=top_n
-        )
         return {
-            "rfq_rows": int(nonempty_mask.sum()),
             "product_master_detected": {
                 "molecule_col": molecule_col,
                 "brand_id_col": brand_id_col,
                 "brand_name_col": brand_name_col
             },
-            "matches_returned": len(grouped_matches),
-            "data": grouped_matches
         }
     except HTTPException:
         raise
@@ -553,4 +562,4 @@ def debug_score(a: str, b: str):
 @app.get("/")
 def root():
-    return {"status": "ok", "message": "POST /match-difflib (rfq_file + product_master_json). Use /match-difflib-debug to inspect best matches."}

 # ---------- NEW: Grouped matcher (generic_name -> array of matches) ----------
+def match_generic_to_product_master_grouped_for_row(
+    generic_value: str,
     pm_df: pd.DataFrame,
     molecule_col: str,
     brand_id_col: Optional[str],
     min_score: float = 60.0,
     top_n: int = 3
 ) -> List[Dict[str, Any]]:
+    """Compute matches for a *single* RFQ row's generic name."""
     subset = pm_df.dropna(subset=[molecule_col]).copy()
     mol_raw = subset[molecule_col].astype(str).tolist()
     brand_ids = subset[brand_id_col].astype(str).tolist(
     brand_names = subset[brand_name_col].astype(str).tolist(
     ) if brand_name_col and brand_name_col in subset.columns else [None]*len(subset)
+    g_str = str(generic_value or "").strip()
+    if not g_str:
+        return []
+    scored = []
+    for idx, cand in enumerate(mol_raw):
+        parts = hybrid_similarity(g_str, cand)
+        score = parts["score"]
+        if score >= min_score:
+            scored.append({
+                "matched_name": cand,
+                "match_percent": round(score, 2),
+                "brand_id": brand_ids[idx],
+                "brand_name": brand_names[idx]
             })
+    scored.sort(key=lambda x: x["match_percent"], reverse=True)
+    return scored[:top_n]
 # ---------- Endpoints ----------
     top_n: int = Query(3, description="Max number of matches per RFQ row")
 ):
     """
+    Return ALL extracted RFQ rows (template-aligned fields), each with a `matches` array of
+    product master molecules (matched_name, match_percent, brand_id, brand_name) scoring ≥ min_score.
+    Rows with no matches still appear with an empty `matches` list.
     """
     try:
         # RFQ
         rfq_bytes = await rfq_file.read()
         rfq_df = dataframe_from_upload_bytes(rfq_file.filename, rfq_bytes)
         mapped, mapping = build_mapped_rfq(rfq_df)
+        # Ensure columns exist even if not mapped
+        for col in TEMPLATE_COLUMNS:
+            if col not in mapped.columns:
+                mapped[col] = pd.NA
         # Product master
         pm_bytes = await product_master_json.read()
             pm_df, "__product_master_brand_id__")
         brand_name_col = detect_single_column(
             pm_df, "__product_master_brand_name__")
         if not molecule_col:
             raise HTTPException(
                 status_code=400, detail="Could not detect molecule column in product master JSON.")
+        # Build response data: include every RFQ row as extracted, plus matches
+        data_out = []
+        match_rows_with_any = 0
+        # Work only with the same index order; keep all rows
+        for idx, row in mapped.iterrows():
+            # serialize RFQ row (template-aligned)
+            rfq_record = {col: (None if pd.isna(row.get(col)) else str(
+                row.get(col))) for col in TEMPLATE_COLUMNS}
+            # compute matches based on this row's generic_name
+            g_val = rfq_record.get("generic_name") or ""
+            matches = match_generic_to_product_master_grouped_for_row(
+                generic_value=g_val,
+                pm_df=pm_df,
+                molecule_col=molecule_col,
+                brand_id_col=brand_id_col,
+                brand_name_col=brand_name_col,
+                min_score=min_score,
+                top_n=top_n
+            )
+            if matches:
+                match_rows_with_any += 1
+            data_out.append({
+                "row_index": int(idx),
+                # ALL extracted fields (id, generic_name, annual_volume_qty, etc.)
+                "rfq": rfq_record,
+                "matches": matches                 # zero or more matches
+            })
         return {
+            "rfq_rows": int(len(mapped)),
             "product_master_detected": {
                 "molecule_col": molecule_col,
                 "brand_id_col": brand_id_col,
                 "brand_name_col": brand_name_col
             },
+            "rows_with_matches": match_rows_with_any,
+            "data": data_out
         }
     except HTTPException:
         raise
 @app.get("/")
 def root():
+    return {"status": "ok", "message": "POST /match-difflib (rfq_file + product_master_json). Use /match-difflib-grouped to get full RFQ rows + matches."}