import streamlit as st import pandas as pd from io import StringIO import csv def flatten_tk_data(tk_df): tk_df = tk_df[["*Phone Number", "Contact Name", "PropertyID", "Email 1", "TK_NOTES"]].copy() tk_df["Contact Name"] = tk_df["Contact Name"].astype(str).str.strip() grouped = tk_df.groupby("Contact Name").agg({ "PropertyID": lambda x: list(set(x.dropna())), "Email 1": "first", "TK_NOTES": "first", "*Phone Number": list }).reset_index() def pad_phones(phone_list): phone_list = [str(p).strip() for p in phone_list if pd.notnull(p)] return (phone_list + [None]*5)[:5] grouped[["phone1", "phone2", "phone3", "phone4", "phone5"]] = grouped["*Phone Number"].apply(pad_phones).apply(pd.Series) grouped["first_name"] = grouped["Contact Name"].apply(lambda x: x.strip().split(" ")[0] if x.strip() else "") grouped["last_name"] = grouped["Contact Name"].apply(lambda x: x.strip().split(" ")[-1] if x.strip() else "") return grouped def merge_costar_and_tk(costar_df, tk_flat): merged = pd.merge(costar_df, tk_flat.explode("PropertyID"), how='left', left_on='PropertyID', right_on='PropertyID', suffixes=('', '_tk')) unmatched = merged[merged['Contact Name'].isna()].drop(columns=[ 'Contact Name', 'PropertyID_tk', 'Email 1', 'TK_NOTES', 'phone1', 'phone2', 'phone3', 'phone4', 'phone5', 'first_name', 'last_name' ], errors='ignore') fallback = pd.merge(unmatched, tk_flat, how='left', left_on='True Owner Contact', right_on='Contact Name', suffixes=('', '_tk2')) matched = merged[~merged['Contact Name'].isna()] final = pd.concat([matched, fallback], ignore_index=True) return final def build_final_export_csv(df): def get_product_subtype(ptype, stype): return stype if pd.notna(stype) else ptype mapped_rows = [] for _, row in df.iterrows(): new_row = { "first_name": row.get("first_name"), "last_name": row.get("last_name"), "people_id": row.get("Email 1"), "phone1": row.get("phone1"), "phone2": row.get("phone2"), "phone3": row.get("phone3"), "phone4": row.get("phone4"), "phone5": row.get("phone5"), "contact_notes": row.get("TK_NOTES"), "company_name": row.get("True Owner Name"), "address": row.get("Property Address"), "city": row.get("City"), "state": row.get("State"), "postal_code": row.get("Zip"), "property_name": row.get("Property Name"), "year_built": row.get("Year Built"), "year_renovated": row.get("Year Renovated"), "building_class": row.get("Building Class"), "lot_size_acres": row.get("Land Area (AC)"), "parking_ratio": row.get("Parking Ratio"), "gross_sqft": row.get("RBA"), "anchor": row.get("Anchor Tenants"), "website": f"https://product.costar.com/detail/all-properties/{row.get('PropertyID')}/summary", "productsubtype": get_product_subtype(row.get("PropertyType"), row.get("Secondary Type")), "sale_price": row.get("Last Sale Price"), "sale_date": row.get("Last Sale Date"), } mapped_rows.append(new_row) df_mapped = pd.DataFrame(mapped_rows).replace({pd.NA: "", pd.NaT: "", None: ""}).fillna("") final_columns = [ (1, "blank", "User"), (2, "first_name", "first_name"), (3, "last_name", "last_name"), (4, "blank", "middle_name"), (5, "people_id", "people_id"), (6, "blank", "address"), (7, "blank", "city"), (8, "blank", "state"), (9, "blank", "postal_code"), (10, "blank", "country"), (11, "blank", "address2"), (12, "phone1", "phone1"), (13, "blank", "phone1_type"), (14, "phone2", "phone2"), (15, "blank", "phone2_type"), (16, "phone3", "phone3"), (17, "blank", "phone3_type"), (18, "phone4", "phone4"), (19, "blank", "phone4_type"), (20, "phone5", "phone5"), (21, "blank", "phone5_type"), (22, "blank", "salutation"), (23, "blank", "nickname"), (24, "blank", "spouse"), (25, "contact_notes", "contact_notes"), (26, "blank", "sic_code"), (27, "blank", "Company"), (28, "company_name", "name"), (29, "blank", "address"), (30, "blank", "city"), (31, "blank", "state"), (32, "blank", "postal_code"), (33, "blank", "country"), (34, "blank", "address2"), (35, "blank", "Property"), (36, "blank", "lat"), (37, "blank", "lon"), (38, "address", "address"), (39, "city", "city"), (40, "state", "state"), (41, "postal_code", "postal_code"), (42, "blank", "country"), (43, "blank", "address2"), (44, "property_name", "name"), (45, "blank", "county"), (46, "blank", "parcel_number"), (47, "year_built", "year_built"), (48, "year_renovated", "year_renovated"), (49, "blank", "stories"), (50, "blank", "buildings"), (51, "blank", "roof"), (52, "blank", "zoning"), (53, "building_class", "building_class"), (54, "lot_size_acres", "lot_size_acres"), (55, "parking_ratio", "parking_ratio"), (56, "blank", "open_parking_spaces"), (57, "gross_sqft", "gross_sqft"), (58, "blank", "buildable_sqft"), (59, "anchor", "anchor"), (60, "blank", "clear_ceiling_height_ft"), (61, "blank", "units"), (62, "blank", "assessed_land_value"), (63, "blank", "assessed_improved_percent"), (64, "blank", "owner_occupied"), (65, "blank", "owner_managed"), (66, "blank", "rent_control"), (67, "blank", "tax_delinquent"), (68, "blank", "fire_sprinklers"), (69, "blank", "electronic_gate_entry"), (70, "blank", "video_surveillance_fire_sprinklers"), (71, "blank", "covered_parking_spaces"), (72, "blank", "garage_parking_spaces"), (73, "blank", "subterranean_parking_spaces"), (74, "blank", "pool"), (75, "blank", "soft_story"), (76, "blank", "opportunity_zone"), (77, "blank", "ratings_location"), (78, "blank", "ratings_improvements"), (79, "blank", "notes"), (80, "blank", "meeting_rooms"), (81, "blank", "construction"), (82, "blank", "corridor_type"), (83, "website", "website"), (84, "blank", "amenities"), (85, "blank", "legacy_id"), (86, "blank", "brand_name_tag"), (87, "blank", "brand_flag_tag"), (88, "blank", "brand_chain_scale_tag"), (89, "productsubtype", "productsubtype"), (90, "blank", "Deal"), (91, "sale_price", "sale_price"), (92, "sale_date", "sale_date") ] output_io = StringIO() writer = csv.writer(output_io) writer.writerow([col[2] for col in final_columns]) for _, row in df_mapped.iterrows(): writer.writerow(["" if key == "blank" else row.get(key, "") for _, key, _ in final_columns]) return output_io.getvalue() def main(): st.title("Property + Contact CSV/Excel Merger") st.markdown("Upload two files: Costar + Terrakotta contact data") costar_file = st.file_uploader("Upload Costar file", type=["csv", "xlsx"]) tk_file = st.file_uploader("Upload Terrakotta file", type=["csv", "xlsx"]) if costar_file and tk_file: try: costar_df = pd.read_csv(costar_file) if costar_file.name.endswith(".csv") else pd.read_excel(costar_file) tk_df = pd.read_csv(tk_file) if tk_file.name.endswith(".csv") else pd.read_excel(tk_file) tk_flat = flatten_tk_data(tk_df) merged_df = merge_costar_and_tk(costar_df, tk_flat) st.subheader("Filter True Owner Names") owner_names = sorted(merged_df["True Owner Name"].dropna().unique()) selected_owners = [owner for owner in owner_names if st.checkbox(owner, value=False, key=owner)] merged_df["True Owner Name"] = merged_df["True Owner Name"].apply(lambda x: x if x in selected_owners else None) st.subheader("Filtered Merged Dataset") st.dataframe(merged_df, use_container_width=True) csv_data = build_final_export_csv(merged_df) st.download_button("Export Final CSV File", data=csv_data.encode('utf-8'), file_name="final_export.csv", mime="text/csv") except Exception as e: st.error(f"Error processing files: {e}") if __name__ == "__main__": main()