csv-transformer / app.py
mattn01's picture
Update app.py
347f022 verified
import streamlit as st
import pandas as pd
from io import StringIO
import csv
def flatten_tk_data(tk_df):
tk_df = tk_df[["*Phone Number", "Contact Name", "PropertyID", "Email 1", "TK_NOTES"]].copy()
tk_df["Contact Name"] = tk_df["Contact Name"].astype(str).str.strip()
grouped = tk_df.groupby("Contact Name").agg({
"PropertyID": lambda x: list(set(x.dropna())),
"Email 1": "first",
"TK_NOTES": "first",
"*Phone Number": list
}).reset_index()
def pad_phones(phone_list):
phone_list = [str(p).strip() for p in phone_list if pd.notnull(p)]
return (phone_list + [None]*5)[:5]
grouped[["phone1", "phone2", "phone3", "phone4", "phone5"]] = grouped["*Phone Number"].apply(pad_phones).apply(pd.Series)
grouped["first_name"] = grouped["Contact Name"].apply(lambda x: x.strip().split(" ")[0] if x.strip() else "")
grouped["last_name"] = grouped["Contact Name"].apply(lambda x: x.strip().split(" ")[-1] if x.strip() else "")
return grouped
def merge_costar_and_tk(costar_df, tk_flat):
merged = pd.merge(costar_df, tk_flat.explode("PropertyID"), how='left', left_on='PropertyID', right_on='PropertyID', suffixes=('', '_tk'))
unmatched = merged[merged['Contact Name'].isna()].drop(columns=[
'Contact Name', 'PropertyID_tk', 'Email 1', 'TK_NOTES', 'phone1', 'phone2', 'phone3', 'phone4', 'phone5', 'first_name', 'last_name'
], errors='ignore')
fallback = pd.merge(unmatched, tk_flat, how='left', left_on='True Owner Contact', right_on='Contact Name', suffixes=('', '_tk2'))
matched = merged[~merged['Contact Name'].isna()]
final = pd.concat([matched, fallback], ignore_index=True)
return final
def build_final_export_csv(df):
def get_product_subtype(ptype, stype):
return stype if pd.notna(stype) else ptype
mapped_rows = []
for _, row in df.iterrows():
new_row = {
"first_name": row.get("first_name"),
"last_name": row.get("last_name"),
"people_id": row.get("Email 1"),
"phone1": row.get("phone1"),
"phone2": row.get("phone2"),
"phone3": row.get("phone3"),
"phone4": row.get("phone4"),
"phone5": row.get("phone5"),
"contact_notes": row.get("TK_NOTES"),
"company_name": row.get("True Owner Name"),
"address": row.get("Property Address"),
"city": row.get("City"),
"state": row.get("State"),
"postal_code": row.get("Zip"),
"property_name": row.get("Property Name"),
"year_built": row.get("Year Built"),
"year_renovated": row.get("Year Renovated"),
"building_class": row.get("Building Class"),
"lot_size_acres": row.get("Land Area (AC)"),
"parking_ratio": row.get("Parking Ratio"),
"gross_sqft": row.get("RBA"),
"anchor": row.get("Anchor Tenants"),
"website": f"https://product.costar.com/detail/all-properties/{row.get('PropertyID')}/summary",
"productsubtype": get_product_subtype(row.get("PropertyType"), row.get("Secondary Type")),
"sale_price": row.get("Last Sale Price"),
"sale_date": row.get("Last Sale Date"),
}
mapped_rows.append(new_row)
df_mapped = pd.DataFrame(mapped_rows).replace({pd.NA: "", pd.NaT: "", None: ""}).fillna("")
final_columns = [
(1, "blank", "User"), (2, "first_name", "first_name"), (3, "last_name", "last_name"), (4, "blank", "middle_name"),
(5, "people_id", "people_id"), (6, "blank", "address"), (7, "blank", "city"), (8, "blank", "state"),
(9, "blank", "postal_code"), (10, "blank", "country"), (11, "blank", "address2"), (12, "phone1", "phone1"),
(13, "blank", "phone1_type"), (14, "phone2", "phone2"), (15, "blank", "phone2_type"), (16, "phone3", "phone3"),
(17, "blank", "phone3_type"), (18, "phone4", "phone4"), (19, "blank", "phone4_type"), (20, "phone5", "phone5"),
(21, "blank", "phone5_type"), (22, "blank", "salutation"), (23, "blank", "nickname"), (24, "blank", "spouse"),
(25, "contact_notes", "contact_notes"), (26, "blank", "sic_code"), (27, "blank", "Company"), (28, "company_name", "name"),
(29, "blank", "address"), (30, "blank", "city"), (31, "blank", "state"), (32, "blank", "postal_code"),
(33, "blank", "country"), (34, "blank", "address2"), (35, "blank", "Property"), (36, "blank", "lat"),
(37, "blank", "lon"), (38, "address", "address"), (39, "city", "city"), (40, "state", "state"),
(41, "postal_code", "postal_code"), (42, "blank", "country"), (43, "blank", "address2"), (44, "property_name", "name"),
(45, "blank", "county"), (46, "blank", "parcel_number"), (47, "year_built", "year_built"), (48, "year_renovated", "year_renovated"),
(49, "blank", "stories"), (50, "blank", "buildings"), (51, "blank", "roof"), (52, "blank", "zoning"),
(53, "building_class", "building_class"), (54, "lot_size_acres", "lot_size_acres"), (55, "parking_ratio", "parking_ratio"),
(56, "blank", "open_parking_spaces"), (57, "gross_sqft", "gross_sqft"), (58, "blank", "buildable_sqft"),
(59, "anchor", "anchor"), (60, "blank", "clear_ceiling_height_ft"), (61, "blank", "units"),
(62, "blank", "assessed_land_value"), (63, "blank", "assessed_improved_percent"), (64, "blank", "owner_occupied"),
(65, "blank", "owner_managed"), (66, "blank", "rent_control"), (67, "blank", "tax_delinquent"),
(68, "blank", "fire_sprinklers"), (69, "blank", "electronic_gate_entry"), (70, "blank", "video_surveillance_fire_sprinklers"),
(71, "blank", "covered_parking_spaces"), (72, "blank", "garage_parking_spaces"), (73, "blank", "subterranean_parking_spaces"),
(74, "blank", "pool"), (75, "blank", "soft_story"), (76, "blank", "opportunity_zone"), (77, "blank", "ratings_location"),
(78, "blank", "ratings_improvements"), (79, "blank", "notes"), (80, "blank", "meeting_rooms"), (81, "blank", "construction"),
(82, "blank", "corridor_type"), (83, "website", "website"), (84, "blank", "amenities"), (85, "blank", "legacy_id"),
(86, "blank", "brand_name_tag"), (87, "blank", "brand_flag_tag"), (88, "blank", "brand_chain_scale_tag"),
(89, "productsubtype", "productsubtype"), (90, "blank", "Deal"), (91, "sale_price", "sale_price"), (92, "sale_date", "sale_date")
]
output_io = StringIO()
writer = csv.writer(output_io)
writer.writerow([col[2] for col in final_columns])
for _, row in df_mapped.iterrows():
writer.writerow(["" if key == "blank" else row.get(key, "") for _, key, _ in final_columns])
return output_io.getvalue()
def main():
st.title("Property + Contact CSV/Excel Merger")
st.markdown("Upload two files: Costar + Terrakotta contact data")
costar_file = st.file_uploader("Upload Costar file", type=["csv", "xlsx"])
tk_file = st.file_uploader("Upload Terrakotta file", type=["csv", "xlsx"])
if costar_file and tk_file:
try:
costar_df = pd.read_csv(costar_file) if costar_file.name.endswith(".csv") else pd.read_excel(costar_file)
tk_df = pd.read_csv(tk_file) if tk_file.name.endswith(".csv") else pd.read_excel(tk_file)
tk_flat = flatten_tk_data(tk_df)
merged_df = merge_costar_and_tk(costar_df, tk_flat)
st.subheader("Filter True Owner Names")
owner_names = sorted(merged_df["True Owner Name"].dropna().unique())
selected_owners = [owner for owner in owner_names if st.checkbox(owner, value=False, key=owner)]
merged_df["True Owner Name"] = merged_df["True Owner Name"].apply(lambda x: x if x in selected_owners else None)
st.subheader("Filtered Merged Dataset")
st.dataframe(merged_df, use_container_width=True)
csv_data = build_final_export_csv(merged_df)
st.download_button("Export Final CSV File", data=csv_data.encode('utf-8'), file_name="final_export.csv", mime="text/csv")
except Exception as e:
st.error(f"Error processing files: {e}")
if __name__ == "__main__":
main()