import streamlit as st
import pandas as pd
from io import StringIO
import csv

def flatten_tk_data(tk_df):
    tk_df = tk_df[["*Phone Number", "Contact Name", "PropertyID", "Email 1", "TK_NOTES"]].copy()
    tk_df["Contact Name"] = tk_df["Contact Name"].astype(str).str.strip()
    grouped = tk_df.groupby("Contact Name").agg({
        "PropertyID": lambda x: list(set(x.dropna())),
        "Email 1": "first",
        "TK_NOTES": "first",
        "*Phone Number": list
    }).reset_index()

    def pad_phones(phone_list):
        phone_list = [str(p).strip() for p in phone_list if pd.notnull(p)]
        return (phone_list + [None]*5)[:5]

    grouped[["phone1", "phone2", "phone3", "phone4", "phone5"]] = grouped["*Phone Number"].apply(pad_phones).apply(pd.Series)
    grouped["first_name"] = grouped["Contact Name"].apply(lambda x: x.strip().split(" ")[0] if x.strip() else "")
    grouped["last_name"] = grouped["Contact Name"].apply(lambda x: x.strip().split(" ")[-1] if x.strip() else "")
    return grouped

def merge_costar_and_tk(costar_df, tk_flat):
    merged = pd.merge(costar_df, tk_flat.explode("PropertyID"), how='left', left_on='PropertyID', right_on='PropertyID', suffixes=('', '_tk'))
    unmatched = merged[merged['Contact Name'].isna()].drop(columns=[
        'Contact Name', 'PropertyID_tk', 'Email 1', 'TK_NOTES', 'phone1', 'phone2', 'phone3', 'phone4', 'phone5', 'first_name', 'last_name'
    ], errors='ignore')
    fallback = pd.merge(unmatched, tk_flat, how='left', left_on='True Owner Contact', right_on='Contact Name', suffixes=('', '_tk2'))
    matched = merged[~merged['Contact Name'].isna()]
    final = pd.concat([matched, fallback], ignore_index=True)
    return final

def build_final_export_csv(df):
    def get_product_subtype(ptype, stype):
        return stype if pd.notna(stype) else ptype

    mapped_rows = []
    for _, row in df.iterrows():
        new_row = {
            "first_name": row.get("first_name"),
            "last_name": row.get("last_name"),
            "people_id": row.get("Email 1"),
            "phone1": row.get("phone1"),
            "phone2": row.get("phone2"),
            "phone3": row.get("phone3"),
            "phone4": row.get("phone4"),
            "phone5": row.get("phone5"),
            "contact_notes": row.get("TK_NOTES"),
            "company_name": row.get("True Owner Name"),
            "address": row.get("Property Address"),
            "city": row.get("City"),
            "state": row.get("State"),
            "postal_code": row.get("Zip"),
            "property_name": row.get("Property Name"),
            "year_built": row.get("Year Built"),
            "year_renovated": row.get("Year Renovated"),
            "building_class": row.get("Building Class"),
            "lot_size_acres": row.get("Land Area (AC)"),
            "parking_ratio": row.get("Parking Ratio"),
            "gross_sqft": row.get("RBA"),
            "anchor": row.get("Anchor Tenants"),
            "website": f"https://product.costar.com/detail/all-properties/{row.get('PropertyID')}/summary",
            "productsubtype": get_product_subtype(row.get("PropertyType"), row.get("Secondary Type")),
            "sale_price": row.get("Last Sale Price"),
            "sale_date": row.get("Last Sale Date"),
        }
        mapped_rows.append(new_row)

    df_mapped = pd.DataFrame(mapped_rows).replace({pd.NA: "", pd.NaT: "", None: ""}).fillna("")

    final_columns = [
        (1,  "blank", "User"), (2,  "first_name", "first_name"), (3,  "last_name", "last_name"), (4,  "blank", "middle_name"),
        (5,  "people_id", "people_id"), (6,  "blank", "address"), (7,  "blank", "city"), (8,  "blank", "state"),
        (9,  "blank", "postal_code"), (10, "blank", "country"), (11, "blank", "address2"), (12, "phone1", "phone1"),
        (13, "blank", "phone1_type"), (14, "phone2", "phone2"), (15, "blank", "phone2_type"), (16, "phone3", "phone3"),
        (17, "blank", "phone3_type"), (18, "phone4", "phone4"), (19, "blank", "phone4_type"), (20, "phone5", "phone5"),
        (21, "blank", "phone5_type"), (22, "blank", "salutation"), (23, "blank", "nickname"), (24, "blank", "spouse"),
        (25, "contact_notes", "contact_notes"), (26, "blank", "sic_code"), (27, "blank", "Company"), (28, "company_name", "name"),
        (29, "blank", "address"), (30, "blank", "city"), (31, "blank", "state"), (32, "blank", "postal_code"),
        (33, "blank", "country"), (34, "blank", "address2"), (35, "blank", "Property"), (36, "blank", "lat"),
        (37, "blank", "lon"), (38, "address", "address"), (39, "city", "city"), (40, "state", "state"),
        (41, "postal_code", "postal_code"), (42, "blank", "country"), (43, "blank", "address2"), (44, "property_name", "name"),
        (45, "blank", "county"), (46, "blank", "parcel_number"), (47, "year_built", "year_built"), (48, "year_renovated", "year_renovated"),
        (49, "blank", "stories"), (50, "blank", "buildings"), (51, "blank", "roof"), (52, "blank", "zoning"),
        (53, "building_class", "building_class"), (54, "lot_size_acres", "lot_size_acres"), (55, "parking_ratio", "parking_ratio"),
        (56, "blank", "open_parking_spaces"), (57, "gross_sqft", "gross_sqft"), (58, "blank", "buildable_sqft"),
        (59, "anchor", "anchor"), (60, "blank", "clear_ceiling_height_ft"), (61, "blank", "units"),
        (62, "blank", "assessed_land_value"), (63, "blank", "assessed_improved_percent"), (64, "blank", "owner_occupied"),
        (65, "blank", "owner_managed"), (66, "blank", "rent_control"), (67, "blank", "tax_delinquent"),
        (68, "blank", "fire_sprinklers"), (69, "blank", "electronic_gate_entry"), (70, "blank", "video_surveillance_fire_sprinklers"),
        (71, "blank", "covered_parking_spaces"), (72, "blank", "garage_parking_spaces"), (73, "blank", "subterranean_parking_spaces"),
        (74, "blank", "pool"), (75, "blank", "soft_story"), (76, "blank", "opportunity_zone"), (77, "blank", "ratings_location"),
        (78, "blank", "ratings_improvements"), (79, "blank", "notes"), (80, "blank", "meeting_rooms"), (81, "blank", "construction"),
        (82, "blank", "corridor_type"), (83, "website", "website"), (84, "blank", "amenities"), (85, "blank", "legacy_id"),
        (86, "blank", "brand_name_tag"), (87, "blank", "brand_flag_tag"), (88, "blank", "brand_chain_scale_tag"),
        (89, "productsubtype", "productsubtype"), (90, "blank", "Deal"), (91, "sale_price", "sale_price"), (92, "sale_date", "sale_date")
    ]

    output_io = StringIO()
    writer = csv.writer(output_io)
    writer.writerow([col[2] for col in final_columns])
    for _, row in df_mapped.iterrows():
        writer.writerow(["" if key == "blank" else row.get(key, "") for _, key, _ in final_columns])
    return output_io.getvalue()

def main():
    st.title("Property + Contact CSV/Excel Merger")
    st.markdown("Upload two files: Costar + Terrakotta contact data")

    costar_file = st.file_uploader("Upload Costar file", type=["csv", "xlsx"])
    tk_file = st.file_uploader("Upload Terrakotta file", type=["csv", "xlsx"])

    if costar_file and tk_file:
        try:
            costar_df = pd.read_csv(costar_file) if costar_file.name.endswith(".csv") else pd.read_excel(costar_file)
            tk_df = pd.read_csv(tk_file) if tk_file.name.endswith(".csv") else pd.read_excel(tk_file)

            tk_flat = flatten_tk_data(tk_df)
            merged_df = merge_costar_and_tk(costar_df, tk_flat)

            st.subheader("Filter True Owner Names")
            owner_names = sorted(merged_df["True Owner Name"].dropna().unique())
            selected_owners = [owner for owner in owner_names if st.checkbox(owner, value=False, key=owner)]
            merged_df["True Owner Name"] = merged_df["True Owner Name"].apply(lambda x: x if x in selected_owners else None)

            st.subheader("Filtered Merged Dataset")
            st.dataframe(merged_df, use_container_width=True)

            csv_data = build_final_export_csv(merged_df)
            st.download_button("Export Final CSV File", data=csv_data.encode('utf-8'), file_name="final_export.csv", mime="text/csv")

        except Exception as e:
            st.error(f"Error processing files: {e}")

if __name__ == "__main__":
    main()