Spaces:

bhojakkeyur
/

Diamond

Sleeping

App Files Files Community

https://github.com/keyurbhojak1992/diamond-matcher.git

by bhojakkeyur - opened Jul 28, 2025

base: refs/heads/main

←

from: refs/pr/1

Discussion Files changed

-823

Files changed (3) hide show

.streamlit.config.toml +0 -3
Streamlit Stone Pairing App.py +0 -817
requirements.txt +2 -3

.streamlit.config.toml DELETED Viewed

@@ -1,3 +0,0 @@
-# .streamlit/config.toml
-[browser]
-gatherUsageStats = false

Streamlit Stone Pairing App.py DELETED Viewed

@@ -1,817 +0,0 @@
-import os
-# Redirect Streamlit config/metrics to writable location
-os.environ['STREAMLIT_HOME'] = '/tmp/.streamlit'
-os.environ['XDG_CONFIG_HOME'] = '/tmp/.config'
-os.environ["STREAMLIT_BROWSER_GATHER_USAGE_STATS"] = "false"
-import streamlit as st
-import pandas as pd
-from itertools import combinations
-import io
-from pandas.api.types import CategoricalDtype
-# xlsxwriter is needed for advanced Excel formatting
-import xlsxwriter
-# --- 2. Define Criteria Mappings and Tolerances ---
-# These are moved inside process_df to keep them scoped,
-# but defined outside functions that use them to avoid re-creation on every call.
-# Carat threshold constant (for overall above/below 0.90 categories)
-CARAT_THRESHOLD = 0.90
-# NEW: Define Carat Sizing Groups based on your specifications
-# These define strict bins for matching
-CARAT_SIZING_GROUPS = {
-    'below_0.90': [
-        (0.30, 0.39),
-        (0.40, 0.49),
-        (0.50, 0.69),
-        (0.70, 0.89)
-    ],
-    'above_0.90': [
-        (0.90, 0.99),
-        (1.00, 1.49),
-        (1.50, 1.99),
-        (2.00, 2.99),
-        (3.00, 3.99),
-        (4.00, 4.99),
-        (5.00, float('inf')) # Added for carats 5.00 and above, as not explicitly listed but necessary
-    ]
-}
-# UPDATED: Carat-dependent numeric tolerances as per your request
-carat_tolerances = {
-    'below_0.90': {
-        'Ratio': 0.08,
-        'LENGH': 0.20,
-        'WIDTH': 0.20,
-        'HEIGHT': 0.25,
-    },
-    'above_0.90': {
-        'Ratio': 0.10,
-        'LENGH': 0.22,
-        'WIDTH': 0.22,
-        'HEIGHT': 0.25,
-    }
-}
-# Color order for sorting and matching
-color_order = ['D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N']
-color_map = {color: i for i, color in enumerate(color_order)}
-color_cat_type = CategoricalDtype(categories=color_order, ordered=True)
-# Clarity order for sorting and numerical comparison
-clarity_sort_order = ['FL', 'IF', 'VVS1', 'VVS2', 'VS1', 'VS2', 'SI1', 'SI2']
-clarity_cat_type = CategoricalDtype(categories=clarity_sort_order, ordered=True)
-clarity_value_map = {grade: i for i, grade in enumerate(clarity_sort_order)} # Lower index = higher clarity
-# REPLACED: Carat-dependent Clarity group mappings with Ranks (as per your provided snippet)
-carat_clarity_groups_map_ranked = {
-    'below_0.90': {
-        'rank1': {
-            ('FL', 'IF'), ('IF', 'FL'), # FL - IF
-            ('VVS1', 'VVS1'), ('VVS1', 'VVS2'), ('VVS2', 'VVS1'), ('VVS2', 'VVS2'), # VVS - VVS
-            ('VS1', 'VS1'), ('VS1', 'VS2'), ('VS2', 'VS1'), ('VS2', 'VS2'), # VS - VS
-            ('SI1', 'SI1'), ('SI1', 'SI2'), ('SI2', 'SI1'), ('SI2', 'SI2')  # SI - SI
-        },
-        'rank2_groups': { # These are the previous group definitions
-            'FL': {'C1'}, 'IF': {'C1'}, 'VVS1': {'C1', 'C2'}, # FL to VVS2, VVS1 to VS2
-            'VVS2': {'C1', 'C2'}, # FL to VVS2, VVS1 - VS2 -- C3 removed as per discussion
-            'VS1': {'C1', 'C2', 'C3'}, # VVS1 to VS2, VS1 to SI1
-            'VS2': {'C2', 'C3', 'C4'}, # VVS1 - VS2, VS1 to SI1, VS2 to SI2
-            'SI1': {'C3', 'C4'}, # VS1-SI1, VS2-SI2, VS1 to SI1, VS2 to SI2
-            'SI2': {'C4'} # VS2-SI2
-        }
-    },
-    'above_0.90': { # Assuming same ranking rules for above 0.90
-        'rank1': {
-            ('FL', 'IF'), ('IF', 'FL'),
-            ('VVS1', 'VVS1'), ('VVS1', 'VVS2'), ('VVS2', 'VVS1'), ('VVS2', 'VVS2'),
-            ('VS1', 'VS1'), ('VS1', 'VS2'), ('VS2', 'VS1'), ('VS2', 'VS2'),
-            ('SI1', 'SI1'), ('SI1', 'SI2'), ('SI2', 'SI1'), ('SI2', 'SI2')
-        },
-        'rank2_groups': {
-            'FL': {'C1'}, 'IF': {'C1'}, 'VVS1': {'C1', 'C2'}, # FL to VVS2, VVS1 to VS2
-            'VVS2': {'C1', 'C2'}, # FL to VVS2, VVS1 - VS2 -- C3 removed as per discussion
-            'VS1': {'C1', 'C2', 'C3'}, # VVS1 to VS2, VS1 to SI1
-            'VS2': {'C2', 'C3', 'C4'}, # VVS1 - VS2, VS1 to SI1, VS2 to SI2
-            'SI1': {'C3', 'C4'}, # VS1-SI1, VS2-SI2, VS1 to SI1, VS2 to SI2
-            'SI2': {'C4'} # VS2-SI2
-        }
-    }
-}
-# Lab 100% Match
-allowed_labs = {'NONE', 'IGI', 'GIA', 'HRD'}
-# Carat-dependent Flour with Lab-Based Grouping
-carat_flour_lab_based_groups = {
-    'below_0.90': {
-        'NONE': {
-            'NON': {'LC_G1'}, 'VSL': {'LC_G1'}, 'SL': {'LC_G1'}, 'FNT': {'LC_G1', 'LC_G2'}, 'MED': {'LC_G1', 'LC_G2', 'LC_G3'}, 'STG': {'LC_G2', 'LC_G3'}, 'VST': {'LC_G3'}
-        },
-        'IGI': {
-            'NON': {'G1'}, 'VSL': {'G1', 'G2'}, 'SL': {'G2', 'G3'}, 'MED': {'G3', 'G4'}, 'STG': {'G4'}
-        },
-        'GIA': {
-            'NON': {'LC_G1'}, 'VSL': {'LC_G1'}, 'SL': {'LC_G1'}, 'FNT': {'LC_G1', 'LC_G2'}, 'MED': {'LC_G1', 'LC_G2', 'LC_G3'}, 'STG': {'LC_G2', 'LC_G3'}, 'VST': {'LC_G3'}
-        },
-        'HRD': {
-            'NON': {'G1'}, 'VSL': {'G1'}, 'SL': {'G1', 'G2'}, 'MED': {'G2', 'G3'}, 'STG': {'G3'}
-        },
-        'NONE_GIA_COMBINED': { # Rule for NONE and GIA combination
-            'NON': {'LC_G1'}, 'VSL': {'LC_G1'}, 'SL': {'LC_G1'}, 'FNT': {'LC_G1', 'LC_G2'}, 'MED': {'LC_G1', 'LC_G2', 'LC_G3'}, 'STG': {'LC_G2', 'LC_G3'}, 'VST': {'LC_G3'}
-        }
-    },
-    'above_0.90': {
-        'NONE': {
-            'NON': {'HC_G1'}, 'VSL': {'HC_G1'}, 'SL': {'HC_G1'}, 'FNT': {'HC_G1', 'HC_G2'}, 'MED': {'HC_G1', 'HC_G2', 'HC_G3'}, 'STG': {'HC_G2', 'HC_G3'}, 'VST': {'HC_G3'}
-        },
-        'IGI': {
-            'NON': {'G1'}, 'VSL': {'G1', 'G2'}, 'SL': {'G2', 'G3'}, 'MED': {'G3', 'G4'}, 'STG': {'G4'}
-        },
-        'GIA': {
-            'NON': {'HC_G1'}, 'VSL': {'HC_G1'}, 'SL': {'HC_G1'}, 'FNT': {'HC_G1', 'HC_G2'}, 'MED': {'HC_G1', 'HC_G2', 'HC_G3'}, 'STG': {'HC_G2', 'HC_G3'}, 'VST': {'HC_G3'}
-        },
-        'HRD': {
-            'NON': {'G1'}, 'VSL': {'G1'}, 'SL': {'G1', 'G2'}, 'MED': {'G2', 'G3'}, 'STG': {'G3'}
-        },
-        'NONE_GIA_COMBINED': {
-            'NON': {'HC_G1'}, 'VSL': {'HC_G1'}, 'SL': {'HC_G1'}, 'FNT': {'HC_G1', 'HC_G2'}, 'MED': {'HC_G1', 'HC_G2', 'HC_G3'}, 'STG': {'HC_G2', 'HC_G3'}, 'VST': {'HC_G3'}
-        }
-    }
-}
-# Shade Order
-shade_order = ['NONE', 'INSPECTION ADVISABLE', 'MIX TINGE 1', 'MIX TINGE 2', 'MIX TINGE 3', 'LIGHT BROWN', 'BROWN', 'STRONG BROWN', 'VERY STRONG BROWN']
-shade_cat_type = CategoricalDtype(categories=shade_order, ordered=True)
-# NEW: Carat-dependent Shade Grouping with Ranks (similar to clarity)
-carat_shade_groups_map_ranked = {
-    'below_0.90': {
-        'rank1': {
-            ('NONE', 'NONE'),
-            ('INSPECTION ADVISABLE', 'INSPECTION ADVISABLE'),
-            ('MIX TINGE 1', 'MIX TINGE 2'), ('MIX TINGE 2', 'MIX TINGE 1'),
-            ('MIX TINGE 2', 'MIX TINGE 3'), ('MIX TINGE 3', 'MIX TINGE 2'),
-            ('LIGHT BROWN', 'BROWN'), ('BROWN', 'LIGHT BROWN'),
-            ('BROWN', 'STRONG BROWN'), ('STRONG BROWN', 'BROWN'),
-            ('STRONG BROWN', 'VERY STRONG BROWN'), ('VERY STRONG BROWN', 'STRONG BROWN')
-        },
-        'rank2_groups': {
-            'NONE': {'SG1'},
-            'INSPECTION ADVISABLE': {'SG1'},
-            'MIX TINGE 1': {'SG1', 'SG2'},
-            'MIX TINGE 2': {'SG2', 'SG3'},
-            'MIX TINGE 3': {'SG3'},
-            'LIGHT BROWN': {'SG1', 'SG2'},
-            'BROWN': {'SG2', 'SG3'},
-            'STRONG BROWN': {'SG3'},
-            'VERY STRONG BROWN': {'SG3'}
-        }
-    },
-    'above_0.90': {
-        'rank1': { # Same rules for above 0.90 as per user request
-            ('NONE', 'NONE'),
-            ('INSPECTION ADVISABLE', 'INSPECTION ADVISABLE'),
-            ('MIX TINGE 1', 'MIX TINGE 2'), ('MIX TINGE 2', 'MIX TINGE 1'),
-            ('MIX TINGE 2', 'MIX TINGE 3'), ('MIX TINGE 3', 'MIX TINGE 2'),
-            ('LIGHT BROWN', 'BROWN'), ('BROWN', 'LIGHT BROWN'),
-            ('BROWN', 'STRONG BROWN'), ('STRONG BROWN', 'BROWN'),
-            ('STRONG BROWN', 'VERY STRONG BROWN'), ('VERY STRONG BROWN', 'STRONG BROWN')
-        },
-        'rank2_groups': { # Same rules for above 0.90 as per user request
-            'NONE': {'SG1'},
-            'INSPECTION ADVISABLE': {'SG1'},
-            'MIX TINGE 1': {'SG1', 'SG2'},
-            'MIX TINGE 2': {'SG2', 'SG3'},
-            'MIX TINGE 3': {'SG3'},
-            'LIGHT BROWN': {'SG1', 'SG2'},
-            'BROWN': {'SG2', 'SG3'},
-            'STRONG BROWN': {'SG3'},
-            'VERY STRONG BROWN': {'SG3'}
-        }
-    }
-}
-# --- 3. Helper Functions for Matching Logic ---
-def is_numeric_match(val1, val2, tolerance, round_decimals=6):
-    if pd.isna(val1) or pd.isna(val2):
-        return False
-    calculated_diff = abs(val1 - val2)
-    return round(calculated_diff, round_decimals) <= tolerance
-def is_color_match(color1, color2, color_map):
-    if pd.isna(color1) or pd.isna(color2) or color1 not in color_map or color2 not in color_map: return False
-    idx1 = color_map[color1]
-    idx2 = color_map[color2]
-    return abs(idx1 - idx2) <= 1
-def get_carat_sizing_group(carat_value, carat_sizing_groups, carat_threshold):
-    if pd.isna(carat_value):
-        return None
-    broad_range_key = 'below_0.90' if carat_value < carat_threshold else 'above_0.90'
-    specific_sizing_groups = carat_sizing_groups.get(broad_range_key, [])
-    for lower, upper in specific_sizing_groups:
-        if lower <= carat_value <= upper:
-            return (lower, upper)
-    return None
-def is_clarity_group_match(clarity1, clarity2, carat_value, carat_clarity_groups_map_ranked, carat_threshold):
-    if pd.isna(clarity1) or pd.isna(clarity2): return None
-    carat_range_key = 'below_0.90' if carat_value < carat_threshold else 'above_0.90'
-    clarity_rules_for_carat = carat_clarity_groups_map_ranked.get(carat_range_key, {})
-    rank1_set = clarity_rules_for_carat.get('rank1', set())
-    if (clarity1, clarity2) in rank1_set or (clarity2, clarity1) in rank1_set:
-        return 1
-    rank2_groups_map = clarity_rules_for_carat.get('rank2_groups', {})
-    groups1 = rank2_groups_map.get(clarity1, set())
-    groups2 = rank2_groups_map.get(clarity2, set())
-    if bool(groups1.intersection(groups2)):
-        return 2
-    return None
-def is_lab_match(lab1, lab2, allowed_labs):
-    if pd.isna(lab1) or pd.isna(lab2):
-        return False
-    lab1_upper = str(lab1).upper() # Ensure string and upper case for comparison
-    lab2_upper = str(lab2).upper()
-    if lab1_upper not in allowed_labs or lab2_upper not in allowed_labs:
-        return False
-    if lab1_upper == lab2_upper:
-        return True
-    elif (lab1_upper == 'NONE' and lab2_upper == 'GIA') or (lab1_upper == 'GIA' and lab2_upper == 'NONE'):
-        return True
-    return False
-def is_flour_lab_based_group_match(flour1, flour2, lab1, lab2, carat_value, carat_flour_lab_based_groups, allowed_labs, carat_threshold):
-    if pd.isna(flour1) or pd.isna(flour2):
-        return False
-    if pd.isna(lab1) or pd.isna(lab2):
-        return False
-    lab1_upper = str(lab1).upper()
-    lab2_upper = str(lab2).upper()
-    if not is_lab_match(lab1_upper, lab2_upper, allowed_labs):
-        return False
-    effective_lab_key = None
-    if lab1_upper == lab2_upper:
-        effective_lab_key = lab1_upper
-    elif (lab1_upper == 'NONE' and lab2_upper == 'GIA') or (lab1_upper == 'GIA' and lab2_upper == 'NONE'):
-        effective_lab_key = 'NONE_GIA_COMBINED'
-    if not effective_lab_key:
-        return False
-    carat_range_key = 'below_0.90' if carat_value < carat_threshold else 'above_0.90'
-    flour_lab_map = carat_flour_lab_based_groups.get(carat_range_key, {}).get(effective_lab_key, {})
-    groups1 = flour_lab_map.get(flour1, set())
-    groups2 = flour_lab_map.get(flour2, set())
-    return bool(groups1.intersection(groups2))
-def is_shade_group_match(shade1, shade2, carat_value, carat_shade_groups_map_ranked, carat_threshold):
-    if pd.isna(shade1) or pd.isna(shade2): return None
-    carat_range_key = 'below_0.90' if carat_value < carat_threshold else 'above_0.90'
-    shade_rules_for_carat = carat_shade_groups_map_ranked.get(carat_range_key, {})
-    rank1_set = shade_rules_for_carat.get('rank1', set())
-    if (shade1, shade2) in rank1_set or (shade2, shade1) in rank1_set:
-        return 1
-    rank2_groups_map = shade_rules_for_carat.get('rank2_groups', {})
-    groups1 = rank2_groups_map.get(shade1, set())
-    groups2 = rank2_groups_map.get(shade2, set())
-    if bool(groups1.intersection(groups2)):
-        return 2
-    return None
-# --- Main Processing Function ---
-def process_df(df: pd.DataFrame):
-    # Ensure numeric columns are actually numeric
-    # Define numeric_cols inside the function or pass it as an argument
-    numeric_cols = ['Carat', 'Depth', 'Table', 'Ratio', 'LENGH', 'WIDTH', 'HEIGHT', 'Dis%', 'Price/Ct$', 'Amount$', 'Diameter']
-    for col in numeric_cols:
-        df[col] = pd.to_numeric(df[col], errors='coerce')
-    # --- 4. Main Matching Logic (Detailed for Reporting - generating all potential pairs) ---
-    all_potential_matched_pairs_raw = []
-    all_comparisons_for_report = [] # To store detailed comparison results for text reports
-    # Using st.progress for visual feedback
-    progress_text = "Analyzing stone pairs..."
-    my_bar = st.progress(0, text=progress_text)
-    total_combinations = len(list(combinations(df.index, 2))) # Calculate once
-    for idx_count, (i, j) in enumerate(combinations(df.index, 2)):
-        stone1 = df.loc[i]
-        stone2 = df.loc[j]
-        # Ensure Stone ID_1 is always numerically smaller for consistency in raw pairs
-        if int(stone1['Stone ID']) > int(stone2['Stone ID']):
-            stone1, stone2 = stone2, stone1
-        reasons_for_match = []
-        reasons_for_no_match = []
-        current_pair_is_match = True # Assume match until a criterion fails
-        clarity_match_rank = None
-        shade_match_rank = None
-        abs_diff_clarity_index = float('inf')
-        clarity_bias_index = float('inf')
-        # Determine broad carat range based on Stone 1's carat for applying rules
-        pair_carat_for_rules = stone1['Carat']
-        broad_carat_range_key = 'below_0.90' if pair_carat_for_rules < CARAT_THRESHOLD else 'above_0.90'
-        # Get the appropriate tolerances for the current broad carat range
-        current_numeric_tolerances = carat_tolerances[broad_carat_range_key]
-        # Carat Sizing Group Match (mandatory first check)
-        carat_group1 = get_carat_sizing_group(stone1['Carat'], CARAT_SIZING_GROUPS, CARAT_THRESHOLD)
-        carat_group2 = get_carat_sizing_group(stone2['Carat'], CARAT_SIZING_GROUPS, CARAT_THRESHOLD)
-        carat_sizing_group_current_match = (carat_group1 is not None and carat_group1 == carat_group2)
-        if carat_sizing_group_current_match:
-            reasons_for_match.append(f"Carat Sizing Group: Stone 1 ({stone1['Carat']:.2f}ct) and Stone 2 ({stone2['Carat']:.2f}ct) are both in group {carat_group1}.")
-        else:
-            current_pair_is_match = False
-            if carat_group1 is None or carat_group2 is None:
-                reasons_for_no_match.append(f"Carat Sizing Group mismatch: One or both stones ({stone1['Carat']:.2f}ct, {stone2['Carat']:.2f}ct) outside defined sizing groups.")
-            else:
-                reasons_for_no_match.append(f"Carat Sizing Group mismatch: Stone 1 ({stone1['Carat']:.2f}ct) is in group {carat_group1}, Stone 2 ({stone2['Carat']:.2f}ct) is in group {carat_group2}.")
-        if current_pair_is_match:
-            # Lab Match
-            lab_current_match = is_lab_match(stone1['Lab'], stone2['Lab'], allowed_labs)
-            if lab_current_match:
-                reasons_for_match.append(f"Lab: '{stone1['Lab']}' and '{stone2['Lab']}' matched based on specific rules.")
-            else:
-                current_pair_is_match = False
-                reason_lab_detail = []
-                if pd.isna(stone1['Lab']) or pd.isna(stone2['Lab']):
-                    reason_lab_detail.append(f"Missing Lab value for one or both stones.")
-                else:
-                    s1_lab_upper = str(stone1['Lab']).upper()
-                    s2_lab_upper = str(stone2['Lab']).upper()
-                    if s1_lab_upper not in allowed_labs or s2_lab_upper not in allowed_labs:
-                         reason_lab_detail.append(f"One or both labs ('{stone1['Lab']}', '{stone2['Lab']}') are not in allowed labs: {allowed_labs}.")
-                    elif (s1_lab_upper == 'NONE' and s2_lab_upper == 'GIA') or \
-                        (s1_lab_upper == 'GIA' and s2_lab_upper == 'NONE'):
-                        reason_lab_detail.append(f"Labs ('{stone1['Lab']}', '{stone2['Lab']}') should have matched by NONE/GIA combination rule but didn't. Check values.")
-                    else:
-                        reason_lab_detail.append(f"No specific rule for labs ('{stone1['Lab']}', '{stone2['Lab']}') to match.")
-                reasons_for_no_match.append(f"Lab mismatch: {' '.join(reason_lab_detail)}")
-            # Shape Match
-            if current_pair_is_match:
-                shape_current_match = (stone1['Shape'] == stone2['Shape'])
-                if shape_current_match:
-                    reasons_for_match.append(f"Shape: '{stone1['Shape']}' matched.")
-                else:
-                    current_pair_is_match = False
-                    reasons_for_no_match.append(f"Shape mismatch: Stone 1 is '{stone1['Shape']}', Stone 2 is '{stone2['Shape']}'.")
-            # Color Match
-            if current_pair_is_match:
-                color_current_match = is_color_match(stone1['Color'], stone2['Color'], color_map)
-                if color_current_match:
-                    reasons_for_match.append(f"Color: '{stone1['Color']}' and '{stone2['Color']}' matched (within 1 grade).")
-                else:
-                    current_pair_is_match = False
-                    reasons_for_no_match.append(f"Color mismatch: Stone 1 is '{stone1['Color']}', Stone 2 is '{stone2['Color']}' (exceeds 1 grade tolerance or missing/invalid).")
-            # Clarity Match
-            if current_pair_is_match:
-                clarity_match_rank = is_clarity_group_match(stone1['Clarity'], stone2['Clarity'], pair_carat_for_rules, carat_clarity_groups_map_ranked, CARAT_THRESHOLD)
-                if clarity_match_rank is not None:
-                    reasons_for_match.append(f"Clarity: '{stone1['Clarity']}' and '{stone2['Clarity']}' matched with Rank {clarity_match_rank} for {broad_carat_range_key} carat range.")
-                    if stone1['Clarity'] in clarity_value_map and stone2['Clarity'] in clarity_value_map:
-                        val1 = clarity_value_map[stone1['Clarity']]
-                        val2 = clarity_value_map[stone2['Clarity']]
-                        abs_diff_clarity_index = abs(val1 - val2)
-                        clarity_bias_index = val2 - val1
-                else:
-                    current_pair_is_match = False
-                    reasons_for_no_match.append(f"Clarity mismatch: Stone 1 is '{stone1['Clarity']}', Stone 2 is '{stone2['Clarity']}' (no common group or invalid for {broad_carat_range_key} carat range).")
-            # Flour Match (Lab-based)
-            if current_pair_is_match:
-                flour_current_match = is_flour_lab_based_group_match(
-                    stone1['Flour'], stone2['Flour'], stone1['Lab'], stone2['Lab'],
-                    pair_carat_for_rules, carat_flour_lab_based_groups, allowed_labs, CARAT_THRESHOLD
-                )
-                if flour_current_match:
-                    reasons_for_match.append(f"Flour: '{stone1['Flour']}' and '{stone2['Flour']}' matched based on Lab '{stone1['Lab']}' and '{stone2['Lab']}' group rules for {broad_carat_range_key} carat range.")
-                else:
-                    current_pair_is_match = False
-                    reason_flour_detail = []
-                    if pd.isna(stone1['Flour']) or pd.isna(stone2['Flour']):
-                        reason_flour_detail.append(f"Flour value missing for one or both stones ('{stone1['Flour']}' vs '{stone2['Flour']}').")
-                    elif not is_lab_match(stone1['Lab'], stone2['Lab'], allowed_labs):
-                        reason_flour_detail.append(f"Labs mismatch or not allowed for Flour comparison ('{stone1['Lab']}' vs '{stone2['Lab']}').")
-                    else:
-                        effective_lab_key = None
-                        lab1_upper = str(stone1['Lab']).upper() if pd.notna(stone1['Lab']) else ''
-                        lab2_upper = str(stone2['Lab']).upper() if pd.notna(stone2['Lab']) else ''
-                        if lab1_upper == lab2_upper:
-                            effective_lab_key = lab1_upper
-                        elif (lab1_upper == 'NONE' and lab2_upper == 'GIA') or (lab1_upper == 'GIA' and lab2_upper == 'NONE'):
-                            effective_lab_key = 'NONE_GIA_COMBINED'
-                        if effective_lab_key and broad_carat_range_key in carat_flour_lab_based_groups and effective_lab_key in carat_flour_lab_based_groups[broad_carat_range_key]:
-                            flour_lab_map = carat_flour_lab_based_groups[broad_carat_range_key][effective_lab_key]
-                            groups1 = flour_lab_map.get(stone1['Flour'], set())
-                            groups2 = flour_lab_map.get(stone2['Flour'], set())
-                            if not bool(groups1.intersection(groups2)):
-                                reason_flour_detail.append(f"Flour mismatch: Stone 1 ('{stone1['Flour']}') and Stone 2 ('{stone2['Flour']}') have no common group for the effective Lab '{effective_lab_key}' and {broad_carat_range_key} carat range.")
-                        else:
-                            reason_flour_detail.append(f"Flour comparison failed due to an unexpected scenario (e.g., internal logic error or unhandled lab/flour combination).")
-                    reasons_for_no_match.append(f"Flour mismatch: {' '.join(reason_flour_detail)}")
-            # Shade Match
-            if current_pair_is_match:
-                shade_match_rank = is_shade_group_match(stone1['Shade'], stone2['Shade'], pair_carat_for_rules, carat_shade_groups_map_ranked, CARAT_THRESHOLD)
-                if shade_match_rank is not None:
-                    reasons_for_match.append(f"Shade: '{stone1['Shade']}' and '{stone2['Shade']}' matched with Rank {shade_match_rank} for {broad_carat_range_key} carat range.")
-                else:
-                    current_pair_is_match = False
-                    reasons_for_no_match.append(f"Shade mismatch: Stone 1 is '{stone1['Shade']}', Stone 2 is '{stone2['Shade']}' (no common group or invalid for {broad_carat_range_key} carat range).")
-            # Numeric Match
-            if current_pair_is_match:
-                numeric_mismatches_details = []
-                for col, tol in current_numeric_tolerances.items():
-                    if not is_numeric_match(stone1[col], stone2[col], tol):
-                        numeric_mismatches_details.append(
-                            f"    - {col}: Stone 1={stone1[col]:.2f}, Stone 2={stone2[col]:.2f}, diff={abs(stone1[col] - stone2[col]):.2f}, tolerance={tol:.2f}. Diff exceeded tolerance for {broad_carat_range_key} carat range."
-                        )
-                all_numeric_current_match = not bool(numeric_mismatches_details)
-                if all_numeric_current_match:
-                    reasons_for_match.append(f"All numeric criteria matched within tolerance for {broad_carat_range_key} carat range.")
-                else:
-                    current_pair_is_match = False
-                    reasons_for_no_match.append("Numeric criteria mismatches:")
-                    reasons_for_no_match.extend(numeric_mismatches_details)
-        # Final decision for the pair for reporting and Excel data collection
-        if current_pair_is_match:
-            is_100_percent_sub_match = (str(stone1['Color']) == str(stone2['Color'])) and \
-                                       (str(stone1['Clarity']) == str(stone2['Clarity'])) and \
-                                       (str(stone1['Flour']) == str(stone2['Flour']))
-            all_comparisons_for_report.append({
-                'stone_id_1': stone1['Stone ID'],
-                'stone_id_2': stone2['Stone ID'],
-                'is_match': True,
-                'reasons': reasons_for_match
-            })
-            all_potential_matched_pairs_raw.append({
-                'stone1_data': stone1.to_dict(),
-                'stone2_data': stone2.to_dict(),
-                '_Highlight_Row': is_100_percent_sub_match,
-                '_Clarity_Match_Rank': clarity_match_rank,
-                '_Abs_Diff_Clarity_Index': abs_diff_clarity_index,
-                '_Clarity_Bias_Index': clarity_bias_index,
-                '_Shade_Match_Rank': shade_match_rank
-            })
-        else:
-            all_comparisons_for_report.append({
-                'stone_id_1': stone1['Stone ID'],
-                'stone_id_2': stone2['Stone ID'],
-                'is_match': False,
-                'reasons': reasons_for_no_match
-            })
-        # Update progress bar
-        my_bar.progress((idx_count + 1) / total_combinations, text=progress_text)
-    # --- Calculate Global Stone ID Counts across all potential matched pairs ---
-    global_stone_id_counts = {}
-    for pair in all_potential_matched_pairs_raw:
-        s1_id = pair['stone1_data']['Stone ID']
-        s2_id = pair['stone2_data']['Stone ID']
-        global_stone_id_counts[s1_id] = global_stone_id_counts.get(s1_id, 0) + 1
-        global_stone_id_counts[s2_id] = global_stone_id_counts.get(s2_id, 0) + 1
-    # --- Apply NEW Global Filtering Logic based on sorted pair priority and uniqueness ---
-    final_filtered_matched_pairs = []
-    claimed_stones = set()
-    # Augment each potential pair with sorting metrics
-    for pair in all_potential_matched_pairs_raw:
-        s1_id = pair['stone1_data']['Stone ID']
-        s2_id = pair['stone2_data']['Stone ID']
-        pair_sum_global_counts = global_stone_id_counts.get(s1_id, 0) + global_stone_id_counts.get(s2_id, 0)
-        pair_min_id = min(int(s1_id), int(s2_id))
-        pair_max_id = max(int(s1_id), int(s2_id))
-        pair['_Pair_Sort_Key'] = (
-            pair['_Clarity_Match_Rank'],
-            pair['_Shade_Match_Rank'],
-            (pair['_Abs_Diff_Clarity_Index'], -pair['_Clarity_Bias_Index']),
-            not pair['_Highlight_Row'],
-            pair_min_id,
-            pair_max_id,
-            pair_sum_global_counts
-        )
-    sorted_all_potential_matched_pairs = sorted(all_potential_matched_pairs_raw, key=lambda p: p['_Pair_Sort_Key'])
-    for pair in sorted_all_potential_matched_pairs:
-        s1_id = pair['stone1_data']['Stone ID']
-        s2_id = pair['stone2_data']['Stone ID']
-        if s1_id not in claimed_stones and s2_id not in claimed_stones:
-            final_filtered_matched_pairs.append(pair)
-            claimed_stones.add(s1_id)
-            claimed_stones.add(s2_id)
-    # --- 5. Prepare Output DataFrame for Excel ---
-    wide_output_rows = []
-    for pair_data in final_filtered_matched_pairs:
-        stone1_raw = pair_data['stone1_data']
-        stone2_raw = pair_data['stone2_data']
-        is_highlighted = pair_data['_Highlight_Row']
-        if int(stone1_raw['Stone ID']) > int(stone2_raw['Stone ID']):
-            stone1_raw, stone2_raw = stone2_raw, stone1_raw
-        stone1_full_data = pd.Series(stone1_raw)
-        stone2_full_data = pd.Series(stone2_raw)
-        current_pair_row = {}
-        for col in df.columns:
-            current_pair_row[f'{col}_1'] = stone1_full_data[col]
-            current_pair_row[f'{col}_2'] = stone2_full_data[col]
-        current_pair_row['Pair No'] = 0 # Placeholder, will be assigned later
-        current_pair_row['_Highlight_Row'] = is_highlighted
-        wide_output_rows.append(current_pair_row)
-    excel_output_data = io.BytesIO()
-    matched_report_content = io.StringIO()
-    non_matched_report_content = io.StringIO()
-    if wide_output_rows:
-        final_wide_df = pd.DataFrame(wide_output_rows)
-        # Convert CategoricalDtype for sorting
-        final_wide_df['Color_1'] = final_wide_df['Color_1'].fillna('').astype(color_cat_type)
-        final_wide_df['Color_2'] = final_wide_df['Color_2'].fillna('').astype(color_cat_type)
-        final_wide_df['Clarity_1'] = final_wide_df['Clarity_1'].fillna('').astype(clarity_cat_type)
-        final_wide_df['Clarity_2'] = final_wide_df['Clarity_2'].fillna('').astype(clarity_cat_type)
-        final_wide_df['Shade_1'] = final_wide_df['Shade_1'].fillna('').astype(shade_cat_type)
-        final_wide_df['Shade_2'] = final_wide_df['Shade_2'].fillna('').astype(shade_cat_type)
-        final_wide_df['Stone ID_1_Num'] = pd.to_numeric(final_wide_df['Stone ID_1'], errors='coerce')
-        final_wide_df = final_wide_df.sort_values(
-            by=['Stone ID_1_Num', 'Color_1', 'Clarity_1', 'Color_2', 'Clarity_2'],
-            ascending=True
-        ).reset_index(drop=True)
-        final_wide_df = final_wide_df.drop(columns=['Stone ID_1_Num'])
-        final_wide_df['Pair No'] = final_wide_df.groupby('Stone ID_1').ngroup() + 1
-        final_wide_df['_Is_Last_In_Pair_Group'] = final_wide_df['Stone ID_1'].shift(-1) != final_wide_df['Stone ID_1']
-        final_wide_df.loc[len(final_wide_df) - 1, '_Is_Last_In_Pair_Group'] = True
-        preferred_column_order_str = "Pair No\tStone ID_1\tStone ID_2\tShape_1\tShape_2\tCarat_1\tCarat_2\tLab_1\tLab_2\tColor_1\tColor_2\tClarity_1\tClarity_2\tFlour_1\tFlour_2\tRatio_1\tRatio_2\tLENGH_1\tLENGH_2\tWIDTH_1\tWIDTH_2\tHEIGHT_1\tHEIGHT_2\tTable_1\tTable_2\tDepth_1\tDepth_2\tShade_1\tShade_2\tMilky_1\tMilky_2"
-        preferred_cols_list = preferred_column_order_str.split('\t')
-        current_cols_before_dropping_highlight = final_wide_df.columns.tolist()
-        final_ordered_cols = []
-        for col in preferred_cols_list:
-            if col in current_cols_before_dropping_highlight:
-                final_ordered_cols.append(col)
-        remaining_cols = [col for col in current_cols_before_dropping_highlight
-                          if col not in final_ordered_cols and col not in ['_Highlight_Row', '_Is_Last_In_Pair_Group']]
-        final_ordered_cols.extend(remaining_cols)
-        final_wide_df = final_wide_df[final_ordered_cols + ['_Highlight_Row', '_Is_Last_In_Pair_Group']]
-        # --- 6. Save to XLSX with Advanced Formatting ---
-        # Using BytesIO to save to memory
-        writer = pd.ExcelWriter(excel_output_data, engine='xlsxwriter')
-        final_wide_df_for_excel = final_wide_df.drop(columns=['_Highlight_Row', '_Is_Last_In_Pair_Group'])
-        final_wide_df_for_excel.to_excel(writer, sheet_name='Matched Pairs', index=False)
-        workbook = writer.book
-        worksheet = writer.sheets['Matched Pairs']
-        worksheet.freeze_panes(1, 0)
-        header_format = workbook.add_format({
-            'bold': True, 'bg_color': '#F2F2F2', 'border': 1,
-            'border_color': '#C0C0C0', 'align': 'center', 'valign': 'vcenter'
-        })
-        data_format = workbook.add_format({
-            'border': 1, 'border_color': '#C0C0C0', 'align': 'center', 'valign': 'vcenter'
-        })
-        numeric_data_format = workbook.add_format({
-            'num_format': '0.00', 'border': 1, 'border_color': '#C0C0C0',
-            'align': 'center', 'valign': 'vcenter'
-        })
-        highlight_row_format = workbook.add_format({
-            'bg_color': '#E0FFD4', 'border': 1, 'border_color': '#C0C0C0',
-            'align': 'center', 'valign': 'vcenter'
-        })
-        highlight_numeric_format = workbook.add_format({
-            'num_format': '0.00', 'bg_color': '#E0FFD4', 'border': 1, 'border_color': '#C0C0C0',
-            'align': 'center', 'valign': 'vcenter'
-        })
-        data_format_dark_border = workbook.add_format({
-            'border': 1, 'border_color': '#C0C0C0', 'bottom': 5, 'bottom_color': '#000000',
-            'align': 'center', 'valign': 'vcenter'
-        })
-        numeric_data_format_dark_border = workbook.add_format({
-            'num_format': '0.00', 'border': 1, 'border_color': '#C0C0C0', 'bottom': 5, 'bottom_color': '#000000',
-            'align': 'center', 'valign': 'vcenter'
-        })
-        highlight_row_format_dark_border = workbook.add_format({
-            'bg_color': '#E0FFD4', 'border': 1, 'border_color': '#C0C0C0', 'bottom': 5, 'bottom_color': '#000000',
-            'align': 'center', 'valign': 'vcenter'
-        })
-        highlight_numeric_format_dark_border = workbook.add_format({
-            'num_format': '0.00', 'bg_color': '#E0FFD4', 'border': 1, 'border_color': '#C0C0C0', 'bottom': 5, 'bottom_color': '#000000',
-            'align': 'center', 'valign': 'vcenter'
-        })
-        numeric_cols_suffixes = [f'{col}_1' for col in numeric_cols] + [f'{col}_2' for col in numeric_cols]
-        for col_num, value in enumerate(final_wide_df_for_excel.columns.values):
-            worksheet.write(0, col_num, value, header_format)
-        for row_num in range(len(final_wide_df)):
-            is_row_highlighted = final_wide_df.loc[row_num, '_Highlight_Row']
-            is_last_in_group = final_wide_df.loc[row_num, '_Is_Last_In_Pair_Group']
-            for col_num, col_name in enumerate(final_wide_df_for_excel.columns):
-                cell_value = final_wide_df_for_excel.iloc[row_num, col_num]
-                excel_row = row_num + 1
-                current_cell_format = None
-                if col_name in numeric_cols_suffixes:
-                    if is_row_highlighted and is_last_in_group:
-                        current_cell_format = highlight_numeric_format_dark_border
-                    elif is_row_highlighted:
-                        current_cell_format = highlight_numeric_format
-                    elif is_last_in_group:
-                        current_cell_format = numeric_data_format_dark_border
-                    else:
-                        current_cell_format = numeric_data_format
-                else:
-                    if is_row_highlighted and is_last_in_group:
-                        current_cell_format = highlight_row_format_dark_border
-                    elif is_row_highlighted:
-                        current_cell_format = highlight_row_format
-                    elif is_last_in_group:
-                        current_cell_format = data_format_dark_border
-                    else:
-                        current_cell_format = data_format
-                if pd.isna(cell_value):
-                    worksheet.write(excel_row, col_num, '', current_cell_format)
-                elif col_name in ['DETAIL_1', 'DETAIL_2']:
-                    # Assuming DETAIL_1/DETAIL_2 contain URLs if you want them as clickable links
-                    # If they contain other text, handle accordingly
-                    if isinstance(cell_value, str) and cell_value.startswith('http'):
-                        worksheet.write_url(excel_row, col_num, cell_value, current_cell_format, "DETAILS")
-                    else:
-                        worksheet.write(excel_row, col_num, str(cell_value), current_cell_format) # Convert to string for non-URL text
-                else:
-                    worksheet.write(excel_row, col_num, cell_value, current_cell_format)
-        for i, col in enumerate(final_wide_df_for_excel.columns):
-            max_len = max(
-                final_wide_df_for_excel[col].astype(str).apply(len).max(),
-                len(col)
-            )
-            if col in ['DETAIL_1', 'DETAIL_2']:
-                max_len = max(max_len, len("DETAILS"))
-            worksheet.set_column(i, i, max_len + 2)
-        writer.close()
-    else:
-        st.warning("No matching pairs found for Excel report.")
-    # --- 7. Generate Text Reports ---
-    matched_comparisons = [c for c in all_comparisons_for_report if c['is_match']]
-    non_matched_comparisons = [c for c in all_comparisons_for_report if not c['is_match']]
-    matched_stones_count = len(matched_comparisons)
-    non_matched_stones_count = len(non_matched_comparisons)
-    matched_report_content.write(f"No_of_stones: {matched_stones_count}\n")
-    matched_report_content.write("--- Matched Stone Pairs Detailed Report ---\n")
-    if not matched_comparisons:
-        matched_report_content.write("No matching pairs found.\n")
-    for idx, comp in enumerate(matched_comparisons):
-        matched_report_content.write(f"\n--- Pair {idx+1}: {comp['stone_id_1']} & {comp['stone_id_2']} ---\n")
-        matched_report_content.write("Match Status: MATCHED\n")
-        matched_report_content.write("Reasons for Match:\n")
-        for reason in comp['reasons']:
-            matched_report_content.write(f"    - {reason}\n")
-    non_matched_report_content.write(f"No_of_stones: {non_matched_stones_count}\n")
-    non_matched_report_content.write("--- Non-Matched Stone Pairs Detailed Report ---\n")
-    if not non_matched_comparisons:
-        non_matched_report_content.write("No non-matching pairs found.\n")
-    for idx, comp in enumerate(non_matched_comparisons):
-        non_matched_report_content.write(f"\n--- Pair {idx+1}: {comp['stone_id_1']} & {comp['stone_id_2']} ---\n")
-        non_matched_report_content.write("Match Status: NOT MATCHED\n")
-        non_matched_report_content.write("Reasons for Not Matching:\n")
-        for reason in comp['reasons']:
-            non_matched_report_content.write(f"    - {reason}\n")
-    return excel_output_data.getvalue(), matched_report_content.getvalue(), non_matched_report_content.getvalue()
-st.set_page_config(page_title="Stone Pairing App", layout="centered")
-st.title("💎 Stone Pair Matching Application")
-st.markdown("""
-    Upload your Excel file containing stone data, and this app will identify matching pairs
-    based on predefined criteria, generating a formatted Excel output and detailed reports.
-""")
-uploaded_file = st.file_uploader("Upload your raw Excel file (.xlsx)", type=["xlsx"])
-if uploaded_file:
-    st.info("File uploaded successfully! Starting processing...")
-    try:
-        df = pd.read_excel(uploaded_file)
-        if df.empty:
-            st.error("The uploaded Excel file is empty. Please upload a file with data.")
-        else:
-            # Display first few rows for user confirmation
-            st.subheader("Preview of your data:")
-            st.dataframe(df.head())
-            st.write("Processing, this may take a while for large files...")
-            with st.spinner("Processing complex matching logic..."):
-                excel_data, matched_report_str, non_matched_report_str = process_df(df)
-            st.success("Processing complete! You can now download the results.")
-            col1, col2, col3 = st.columns(3)
-            with col1:
-                st.download_button(
-                    label="Download Processed Excel",
-                    data=excel_data,
-                    file_name="matched_stones_pairwise_comparison_formatted.xlsx",
-                    mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
-                    help="Download the Excel file with matched pairs, formatted with highlights and borders."
-                )
-            with col2:
-                st.download_button(
-                    label="Download Matched Pairs Report",
-                    data=matched_report_str,
-                    file_name="matched_stones_detailed_report.txt",
-                    mime="text/plain",
-                    help="Get a text report detailing all matched stone pairs and reasons."
-                )
-            with col3:
-                st.download_button(
-                    label="Download Non-Matched Pairs Report",
-                    data=non_matched_report_str,
-                    file_name="non_matched_stones_detailed_report.txt",
-                    mime="text/plain",
-                    help="Get a text report detailing all non-matched stone pairs and reasons."
-                )
-    except Exception as e:
-        st.error(f"An error occurred during file processing: {e}")
-        st.error("Please ensure your Excel file has the expected column names and data types as per the original script.")
-st.markdown("---")
-st.markdown("Developed with ❤️ for efficient stone pairing.")

requirements.txt CHANGED Viewed

@@ -1,4 +1,3 @@
-streamlit
 pandas
-xlsxwriter
-openpyxl

+altair
 pandas
+streamlit