Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| from io import StringIO | |
| import csv | |
| def flatten_tk_data(tk_df): | |
| tk_df = tk_df[["*Phone Number", "Contact Name", "PropertyID", "Email 1", "TK_NOTES"]].copy() | |
| tk_df["Contact Name"] = tk_df["Contact Name"].astype(str).str.strip() | |
| grouped = tk_df.groupby("Contact Name").agg({ | |
| "PropertyID": lambda x: list(set(x.dropna())), | |
| "Email 1": "first", | |
| "TK_NOTES": "first", | |
| "*Phone Number": list | |
| }).reset_index() | |
| def pad_phones(phone_list): | |
| phone_list = [str(p).strip() for p in phone_list if pd.notnull(p)] | |
| return (phone_list + [None]*5)[:5] | |
| grouped[["phone1", "phone2", "phone3", "phone4", "phone5"]] = grouped["*Phone Number"].apply(pad_phones).apply(pd.Series) | |
| grouped["first_name"] = grouped["Contact Name"].apply(lambda x: x.strip().split(" ")[0] if x.strip() else "") | |
| grouped["last_name"] = grouped["Contact Name"].apply(lambda x: x.strip().split(" ")[-1] if x.strip() else "") | |
| return grouped | |
| def merge_costar_and_tk(costar_df, tk_flat): | |
| merged = pd.merge(costar_df, tk_flat.explode("PropertyID"), how='left', left_on='PropertyID', right_on='PropertyID', suffixes=('', '_tk')) | |
| unmatched = merged[merged['Contact Name'].isna()].drop(columns=[ | |
| 'Contact Name', 'PropertyID_tk', 'Email 1', 'TK_NOTES', 'phone1', 'phone2', 'phone3', 'phone4', 'phone5', 'first_name', 'last_name' | |
| ], errors='ignore') | |
| fallback = pd.merge(unmatched, tk_flat, how='left', left_on='True Owner Contact', right_on='Contact Name', suffixes=('', '_tk2')) | |
| matched = merged[~merged['Contact Name'].isna()] | |
| final = pd.concat([matched, fallback], ignore_index=True) | |
| return final | |
| def build_final_export_csv(df): | |
| def get_product_subtype(ptype, stype): | |
| return stype if pd.notna(stype) else ptype | |
| mapped_rows = [] | |
| for _, row in df.iterrows(): | |
| new_row = { | |
| "first_name": row.get("first_name"), | |
| "last_name": row.get("last_name"), | |
| "people_id": row.get("Email 1"), | |
| "phone1": row.get("phone1"), | |
| "phone2": row.get("phone2"), | |
| "phone3": row.get("phone3"), | |
| "phone4": row.get("phone4"), | |
| "phone5": row.get("phone5"), | |
| "contact_notes": row.get("TK_NOTES"), | |
| "company_name": row.get("True Owner Name"), | |
| "address": row.get("Property Address"), | |
| "city": row.get("City"), | |
| "state": row.get("State"), | |
| "postal_code": row.get("Zip"), | |
| "property_name": row.get("Property Name"), | |
| "year_built": row.get("Year Built"), | |
| "year_renovated": row.get("Year Renovated"), | |
| "building_class": row.get("Building Class"), | |
| "lot_size_acres": row.get("Land Area (AC)"), | |
| "parking_ratio": row.get("Parking Ratio"), | |
| "gross_sqft": row.get("RBA"), | |
| "anchor": row.get("Anchor Tenants"), | |
| "website": f"https://product.costar.com/detail/all-properties/{row.get('PropertyID')}/summary", | |
| "productsubtype": get_product_subtype(row.get("PropertyType"), row.get("Secondary Type")), | |
| "sale_price": row.get("Last Sale Price"), | |
| "sale_date": row.get("Last Sale Date"), | |
| } | |
| mapped_rows.append(new_row) | |
| df_mapped = pd.DataFrame(mapped_rows).replace({pd.NA: "", pd.NaT: "", None: ""}).fillna("") | |
| final_columns = [ | |
| (1, "blank", "User"), (2, "first_name", "first_name"), (3, "last_name", "last_name"), (4, "blank", "middle_name"), | |
| (5, "people_id", "people_id"), (6, "blank", "address"), (7, "blank", "city"), (8, "blank", "state"), | |
| (9, "blank", "postal_code"), (10, "blank", "country"), (11, "blank", "address2"), (12, "phone1", "phone1"), | |
| (13, "blank", "phone1_type"), (14, "phone2", "phone2"), (15, "blank", "phone2_type"), (16, "phone3", "phone3"), | |
| (17, "blank", "phone3_type"), (18, "phone4", "phone4"), (19, "blank", "phone4_type"), (20, "phone5", "phone5"), | |
| (21, "blank", "phone5_type"), (22, "blank", "salutation"), (23, "blank", "nickname"), (24, "blank", "spouse"), | |
| (25, "contact_notes", "contact_notes"), (26, "blank", "sic_code"), (27, "blank", "Company"), (28, "company_name", "name"), | |
| (29, "blank", "address"), (30, "blank", "city"), (31, "blank", "state"), (32, "blank", "postal_code"), | |
| (33, "blank", "country"), (34, "blank", "address2"), (35, "blank", "Property"), (36, "blank", "lat"), | |
| (37, "blank", "lon"), (38, "address", "address"), (39, "city", "city"), (40, "state", "state"), | |
| (41, "postal_code", "postal_code"), (42, "blank", "country"), (43, "blank", "address2"), (44, "property_name", "name"), | |
| (45, "blank", "county"), (46, "blank", "parcel_number"), (47, "year_built", "year_built"), (48, "year_renovated", "year_renovated"), | |
| (49, "blank", "stories"), (50, "blank", "buildings"), (51, "blank", "roof"), (52, "blank", "zoning"), | |
| (53, "building_class", "building_class"), (54, "lot_size_acres", "lot_size_acres"), (55, "parking_ratio", "parking_ratio"), | |
| (56, "blank", "open_parking_spaces"), (57, "gross_sqft", "gross_sqft"), (58, "blank", "buildable_sqft"), | |
| (59, "anchor", "anchor"), (60, "blank", "clear_ceiling_height_ft"), (61, "blank", "units"), | |
| (62, "blank", "assessed_land_value"), (63, "blank", "assessed_improved_percent"), (64, "blank", "owner_occupied"), | |
| (65, "blank", "owner_managed"), (66, "blank", "rent_control"), (67, "blank", "tax_delinquent"), | |
| (68, "blank", "fire_sprinklers"), (69, "blank", "electronic_gate_entry"), (70, "blank", "video_surveillance_fire_sprinklers"), | |
| (71, "blank", "covered_parking_spaces"), (72, "blank", "garage_parking_spaces"), (73, "blank", "subterranean_parking_spaces"), | |
| (74, "blank", "pool"), (75, "blank", "soft_story"), (76, "blank", "opportunity_zone"), (77, "blank", "ratings_location"), | |
| (78, "blank", "ratings_improvements"), (79, "blank", "notes"), (80, "blank", "meeting_rooms"), (81, "blank", "construction"), | |
| (82, "blank", "corridor_type"), (83, "website", "website"), (84, "blank", "amenities"), (85, "blank", "legacy_id"), | |
| (86, "blank", "brand_name_tag"), (87, "blank", "brand_flag_tag"), (88, "blank", "brand_chain_scale_tag"), | |
| (89, "productsubtype", "productsubtype"), (90, "blank", "Deal"), (91, "sale_price", "sale_price"), (92, "sale_date", "sale_date") | |
| ] | |
| output_io = StringIO() | |
| writer = csv.writer(output_io) | |
| writer.writerow([col[2] for col in final_columns]) | |
| for _, row in df_mapped.iterrows(): | |
| writer.writerow(["" if key == "blank" else row.get(key, "") for _, key, _ in final_columns]) | |
| return output_io.getvalue() | |
| def main(): | |
| st.title("Property + Contact CSV/Excel Merger") | |
| st.markdown("Upload two files: Costar + Terrakotta contact data") | |
| costar_file = st.file_uploader("Upload Costar file", type=["csv", "xlsx"]) | |
| tk_file = st.file_uploader("Upload Terrakotta file", type=["csv", "xlsx"]) | |
| if costar_file and tk_file: | |
| try: | |
| costar_df = pd.read_csv(costar_file) if costar_file.name.endswith(".csv") else pd.read_excel(costar_file) | |
| tk_df = pd.read_csv(tk_file) if tk_file.name.endswith(".csv") else pd.read_excel(tk_file) | |
| tk_flat = flatten_tk_data(tk_df) | |
| merged_df = merge_costar_and_tk(costar_df, tk_flat) | |
| st.subheader("Filter True Owner Names") | |
| owner_names = sorted(merged_df["True Owner Name"].dropna().unique()) | |
| selected_owners = [owner for owner in owner_names if st.checkbox(owner, value=False, key=owner)] | |
| merged_df["True Owner Name"] = merged_df["True Owner Name"].apply(lambda x: x if x in selected_owners else None) | |
| st.subheader("Filtered Merged Dataset") | |
| st.dataframe(merged_df, use_container_width=True) | |
| csv_data = build_final_export_csv(merged_df) | |
| st.download_button("Export Final CSV File", data=csv_data.encode('utf-8'), file_name="final_export.csv", mime="text/csv") | |
| except Exception as e: | |
| st.error(f"Error processing files: {e}") | |
| if __name__ == "__main__": | |
| main() | |