Spaces:
Sleeping
Sleeping
added zip code fixer
Browse files
app.py
CHANGED
|
@@ -1,22 +1,15 @@
|
|
| 1 |
-
|
| 2 |
import os
|
| 3 |
import pandas as pd
|
|
|
|
| 4 |
import gradio as gr
|
| 5 |
from datetime import datetime
|
| 6 |
|
| 7 |
-
# ------------------------------------------------
|
| 8 |
-
|
| 9 |
-
# ------------------------------------------------------------------
|
| 10 |
-
EXTRA_MODS = [] # optional suffixes
|
| 11 |
CITY_TEMPLATES = [
|
| 12 |
"{keyword} {city}",
|
| 13 |
-
# "{keyword} {city} ca",
|
| 14 |
-
# "{keyword} {city} new jersey",
|
| 15 |
"{keyword} in {city}",
|
| 16 |
-
# "{keyword} in {city} california",
|
| 17 |
"{city} {keyword}",
|
| 18 |
-
# "{city} ca {keyword}",
|
| 19 |
-
# "{city} california {keyword}",
|
| 20 |
]
|
| 21 |
ZIP_TEMPLATES = [
|
| 22 |
"{keyword} {zip}",
|
|
@@ -26,32 +19,24 @@ ZIP_TEMPLATES = [
|
|
| 26 |
def has_any_mod(phrase: str, mods) -> bool:
|
| 27 |
return any(phrase.endswith(" " + m) for m in mods)
|
| 28 |
|
| 29 |
-
# ------------------------------------------------
|
| 30 |
-
# 2️⃣ CORE LOGIC (unchanged except for reading keywords from Excel)
|
| 31 |
-
# ------------------------------------------------------------------
|
| 32 |
def build_keywords(city_df: pd.DataFrame, kw_df: pd.DataFrame):
|
| 33 |
-
primary_zip_col = city_df.columns[0]
|
| 34 |
-
city_col = city_df.columns[1]
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
# Check if ZIP list column (F) exists
|
| 38 |
-
zip_list_col = city_df.columns[5] if len(city_df.columns) > 5 else None
|
| 39 |
|
| 40 |
city_kw_records, zip_kw_records = [], []
|
| 41 |
|
| 42 |
for _, row in city_df.iterrows():
|
| 43 |
city = str(row[city_col]).strip().lower()
|
| 44 |
-
# state = str(row[state_col]).strip().upper()
|
| 45 |
primary_zip = str(row[primary_zip_col]).strip()
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
zip_list = [z.strip() for z in zip_raw.split(",") if z.strip().isdigit()] or (
|
| 49 |
[primary_zip] if primary_zip.isdigit() else []
|
| 50 |
)
|
| 51 |
|
| 52 |
for kw in kw_df.iloc[:, 0].astype(str):
|
| 53 |
kw = kw.lower().strip()
|
| 54 |
-
|
| 55 |
kw_variants = [kw]
|
| 56 |
if not has_any_mod(kw, EXTRA_MODS):
|
| 57 |
kw_variants += [f"{kw} {m}" for m in EXTRA_MODS]
|
|
@@ -59,78 +44,82 @@ def build_keywords(city_df: pd.DataFrame, kw_df: pd.DataFrame):
|
|
| 59 |
for v in kw_variants:
|
| 60 |
for tpl in CITY_TEMPLATES:
|
| 61 |
candidate = tpl.format(keyword=v, city=city).replace(" ", " ").strip()
|
| 62 |
-
city_kw_records.append({
|
| 63 |
-
"City": city.title(),
|
| 64 |
-
# "State": state,
|
| 65 |
-
"Keyword": candidate,
|
| 66 |
-
})
|
| 67 |
|
| 68 |
for z in zip_list:
|
| 69 |
for tpl in ZIP_TEMPLATES:
|
| 70 |
candidate = tpl.format(keyword=kw, zip=z).strip()
|
| 71 |
-
zip_kw_records.append({
|
| 72 |
-
"ZIP": z,
|
| 73 |
-
"Keyword": candidate,
|
| 74 |
-
})
|
| 75 |
|
| 76 |
city_out = pd.DataFrame(city_kw_records).drop_duplicates(subset=["Keyword"])
|
| 77 |
zip_out = pd.DataFrame(zip_kw_records).drop_duplicates(subset=["Keyword"])
|
| 78 |
return city_out, zip_out
|
| 79 |
|
| 80 |
-
|
| 81 |
-
# ------------------------------------------------------------------
|
| 82 |
-
# 3️⃣ UTIL – create tidy output file names
|
| 83 |
-
# ------------------------------------------------------------------
|
| 84 |
def make_name(prefix: str, city_file: str, kw_file: str):
|
| 85 |
a = os.path.splitext(os.path.basename(city_file))[0]
|
| 86 |
b = os.path.splitext(os.path.basename(kw_file))[0]
|
| 87 |
-
today = datetime.now().strftime("%Y%m%d")
|
| 88 |
return f"{prefix}_{a}__{b}_{today}.csv"
|
| 89 |
|
| 90 |
-
|
| 91 |
-
# ------------------------------------------------------------------
|
| 92 |
-
# 4️⃣ MAIN callable for Gradio
|
| 93 |
-
# ------------------------------------------------------------------
|
| 94 |
def run_builder(city_excel, keyword_excel):
|
| 95 |
-
# read excels
|
| 96 |
city_df = pd.read_excel(city_excel.name)
|
| 97 |
kw_df = pd.read_excel(keyword_excel.name)
|
| 98 |
|
| 99 |
city_out, zip_out = build_keywords(city_df, kw_df)
|
| 100 |
|
| 101 |
-
# write to CSVs with combined names
|
| 102 |
city_csv = make_name("locksmith_city_keywords", city_excel.name, keyword_excel.name)
|
| 103 |
zip_csv = make_name("locksmith_zip_keywords", city_excel.name, keyword_excel.name)
|
| 104 |
city_out.to_csv(city_csv, index=False)
|
| 105 |
zip_out.to_csv(zip_csv, index=False)
|
| 106 |
|
| 107 |
-
# return paths so Gradio presents download buttons
|
| 108 |
return city_csv, zip_csv
|
| 109 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
title = "Locksmith Keyword Generator"
|
| 115 |
-
description = (
|
| 116 |
-
"Upload:\n"
|
| 117 |
-
" 1️⃣ *City-ZIP Excel* (columns: ZIP Code, City, State, … ZIP List in col F)\n"
|
| 118 |
-
" 2️⃣ *Keywords Excel* (first column contains the base keywords)\n\n"
|
| 119 |
-
"Click **Generate** and download the two CSVs."
|
| 120 |
-
)
|
| 121 |
|
| 122 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
fn=run_builder,
|
| 124 |
inputs=[
|
| 125 |
-
gr.File(label="City & ZIP Excel
|
| 126 |
-
gr.File(label="Keyword Excel
|
| 127 |
],
|
| 128 |
outputs=[
|
| 129 |
gr.File(label="City-level keyword CSV"),
|
| 130 |
gr.File(label="ZIP-level keyword CSV"),
|
| 131 |
],
|
| 132 |
-
title=
|
| 133 |
-
description=
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 134 |
)
|
| 135 |
|
| 136 |
if __name__ == "__main__":
|
|
|
|
|
|
|
| 1 |
import os
|
| 2 |
import pandas as pd
|
| 3 |
+
import re
|
| 4 |
import gradio as gr
|
| 5 |
from datetime import datetime
|
| 6 |
|
| 7 |
+
# ------------------------ SETTINGS ------------------------
|
| 8 |
+
EXTRA_MODS = []
|
|
|
|
|
|
|
| 9 |
CITY_TEMPLATES = [
|
| 10 |
"{keyword} {city}",
|
|
|
|
|
|
|
| 11 |
"{keyword} in {city}",
|
|
|
|
| 12 |
"{city} {keyword}",
|
|
|
|
|
|
|
| 13 |
]
|
| 14 |
ZIP_TEMPLATES = [
|
| 15 |
"{keyword} {zip}",
|
|
|
|
| 19 |
def has_any_mod(phrase: str, mods) -> bool:
|
| 20 |
return any(phrase.endswith(" " + m) for m in mods)
|
| 21 |
|
| 22 |
+
# ------------------------ CORE LOGIC ------------------------
|
|
|
|
|
|
|
| 23 |
def build_keywords(city_df: pd.DataFrame, kw_df: pd.DataFrame):
|
| 24 |
+
primary_zip_col = city_df.columns[0]
|
| 25 |
+
city_col = city_df.columns[1]
|
| 26 |
+
zip_list_col = city_df.columns[5] if len(city_df.columns) > 5 else None
|
|
|
|
|
|
|
|
|
|
| 27 |
|
| 28 |
city_kw_records, zip_kw_records = [], []
|
| 29 |
|
| 30 |
for _, row in city_df.iterrows():
|
| 31 |
city = str(row[city_col]).strip().lower()
|
|
|
|
| 32 |
primary_zip = str(row[primary_zip_col]).strip()
|
| 33 |
+
zip_raw = str(row[zip_list_col]) if zip_list_col and pd.notna(row[zip_list_col]) else ""
|
| 34 |
+
zip_list = [z.strip() for z in zip_raw.split(",") if z.strip().isdigit()] or (
|
|
|
|
| 35 |
[primary_zip] if primary_zip.isdigit() else []
|
| 36 |
)
|
| 37 |
|
| 38 |
for kw in kw_df.iloc[:, 0].astype(str):
|
| 39 |
kw = kw.lower().strip()
|
|
|
|
| 40 |
kw_variants = [kw]
|
| 41 |
if not has_any_mod(kw, EXTRA_MODS):
|
| 42 |
kw_variants += [f"{kw} {m}" for m in EXTRA_MODS]
|
|
|
|
| 44 |
for v in kw_variants:
|
| 45 |
for tpl in CITY_TEMPLATES:
|
| 46 |
candidate = tpl.format(keyword=v, city=city).replace(" ", " ").strip()
|
| 47 |
+
city_kw_records.append({"City": city.title(), "Keyword": candidate})
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
|
| 49 |
for z in zip_list:
|
| 50 |
for tpl in ZIP_TEMPLATES:
|
| 51 |
candidate = tpl.format(keyword=kw, zip=z).strip()
|
| 52 |
+
zip_kw_records.append({"ZIP": z, "Keyword": candidate})
|
|
|
|
|
|
|
|
|
|
| 53 |
|
| 54 |
city_out = pd.DataFrame(city_kw_records).drop_duplicates(subset=["Keyword"])
|
| 55 |
zip_out = pd.DataFrame(zip_kw_records).drop_duplicates(subset=["Keyword"])
|
| 56 |
return city_out, zip_out
|
| 57 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
def make_name(prefix: str, city_file: str, kw_file: str):
|
| 59 |
a = os.path.splitext(os.path.basename(city_file))[0]
|
| 60 |
b = os.path.splitext(os.path.basename(kw_file))[0]
|
| 61 |
+
today = datetime.now().strftime("%Y%m%d")
|
| 62 |
return f"{prefix}_{a}__{b}_{today}.csv"
|
| 63 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
def run_builder(city_excel, keyword_excel):
|
|
|
|
| 65 |
city_df = pd.read_excel(city_excel.name)
|
| 66 |
kw_df = pd.read_excel(keyword_excel.name)
|
| 67 |
|
| 68 |
city_out, zip_out = build_keywords(city_df, kw_df)
|
| 69 |
|
|
|
|
| 70 |
city_csv = make_name("locksmith_city_keywords", city_excel.name, keyword_excel.name)
|
| 71 |
zip_csv = make_name("locksmith_zip_keywords", city_excel.name, keyword_excel.name)
|
| 72 |
city_out.to_csv(city_csv, index=False)
|
| 73 |
zip_out.to_csv(zip_csv, index=False)
|
| 74 |
|
|
|
|
| 75 |
return city_csv, zip_csv
|
| 76 |
|
| 77 |
+
# ------------------------ ZIP COMBINER TAB ------------------------
|
| 78 |
+
def combine_zips(file):
|
| 79 |
+
df = pd.read_excel(file.name)
|
| 80 |
+
col_a = df.iloc[:, 0].dropna().astype(str).str.strip()
|
| 81 |
+
col_f_raw = df.iloc[:, 5].dropna().astype(str)
|
| 82 |
+
flat_col_f = []
|
| 83 |
|
| 84 |
+
for row in col_f_raw:
|
| 85 |
+
zip_codes = re.split(r'[\n,]+', row)
|
| 86 |
+
flat_col_f.extend([z.strip() for z in zip_codes if z.strip()])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 87 |
|
| 88 |
+
all_zips = pd.Series(list(col_a) + flat_col_f).drop_duplicates().reset_index(drop=True)
|
| 89 |
+
|
| 90 |
+
base_name = os.path.splitext(os.path.basename(file.name))[0]
|
| 91 |
+
output_path = f"{base_name}_zips_combined.xlsx"
|
| 92 |
+
all_zips.to_excel(output_path, index=False, header=['ZIP'])
|
| 93 |
+
|
| 94 |
+
return output_path
|
| 95 |
+
|
| 96 |
+
# ------------------------ INTERFACES ------------------------
|
| 97 |
+
locksmith_demo = gr.Interface(
|
| 98 |
fn=run_builder,
|
| 99 |
inputs=[
|
| 100 |
+
gr.File(label="City & ZIP Excel", file_types=[".xls", ".xlsx"]),
|
| 101 |
+
gr.File(label="Keyword Excel", file_types=[".xls", ".xlsx"]),
|
| 102 |
],
|
| 103 |
outputs=[
|
| 104 |
gr.File(label="City-level keyword CSV"),
|
| 105 |
gr.File(label="ZIP-level keyword CSV"),
|
| 106 |
],
|
| 107 |
+
title="Locksmith Keyword Generator",
|
| 108 |
+
description="Upload 1️⃣ City-ZIP Excel and 2️⃣ Keyword Excel. Generate city & ZIP-based keyword CSVs.",
|
| 109 |
+
)
|
| 110 |
+
|
| 111 |
+
zip_demo = gr.Interface(
|
| 112 |
+
fn=combine_zips,
|
| 113 |
+
inputs=gr.File(label="Excel with ZIPs in Column A & F", file_types=[".xls", ".xlsx"]),
|
| 114 |
+
outputs=gr.File(label="Combined ZIP Excel"),
|
| 115 |
+
title="ZIP Code Combiner",
|
| 116 |
+
description="Upload an Excel file with ZIPs in Column A and ZIP Lists in Column F (comma/newline separated). Get a clean, deduplicated ZIP list.",
|
| 117 |
+
)
|
| 118 |
+
|
| 119 |
+
# ------------------------ MULTI-TAB APP ------------------------
|
| 120 |
+
demo = gr.TabbedInterface(
|
| 121 |
+
interface_list=[locksmith_demo, zip_demo],
|
| 122 |
+
tab_names=["🔐 Keyword Builder", "📬 ZIP Combiner"]
|
| 123 |
)
|
| 124 |
|
| 125 |
if __name__ == "__main__":
|