| |
| """ |
| Created on Mon Sep 22 13:39:46 2025 |
| |
| @author: rmd2219 |
| """ |
|
|
| import pandas as pd |
| import numpy as np |
| import os, json, re, time |
| from openai import OpenAI |
| import difflib |
| import gradio as gr |
|
|
| from huggingface_hub import hf_hub_download, HfApi |
| from huggingface_hub.utils import EntryNotFoundError |
|
|
| USAGE_DATASET_REPO = os.environ.get("USAGE_DATASET_REPO", "NYSERDA-CRE-Working-Group/nyserda_demo_useage_store") |
| USAGE_FILENAME = os.environ.get("USAGE_FILENAME", "usage.csv") |
| MAX_RUNS_PER_USER = int(os.environ.get("MAX_RUNS_PER_USER", "10")) |
|
|
| |
| map_pluto_df = pd.read_csv("map_pluto.csv", low_memory=False) |
| LL87_df = pd.read_csv("LL87.csv", low_memory=False) |
| LL84_df = pd.read_csv('LL84.csv', low_memory=False) |
| LL84_df = LL84_df.replace("Not Available", np.nan) |
|
|
| permit_df = pd.read_csv('Permit.csv', low_memory = False) |
| ecb_violation_df = pd.read_csv('ECB_violations.csv', low_memory = False) |
| violation_df = pd.read_csv('Violations.csv', low_memory = False) |
|
|
| os.environ["OPENAI_API_KEY"] = os.environ.get("OPENAI_API_KEY") |
| HF_TOKEN = os.environ.get("HF_TOKEN") |
|
|
| client = OpenAI() |
| api = HfApi(token=HF_TOKEN) |
| def user_id_from_profile(profile: gr.OAuthProfile | None) -> str | None: |
| if profile is None: |
| return None |
| |
| |
| uid = getattr(profile, "name", None) |
| if not uid: |
| return None |
| return uid.strip().lower() |
|
|
| def _load_usage_df() -> pd.DataFrame: |
| try: |
| local_path = hf_hub_download( |
| repo_id=USAGE_DATASET_REPO, |
| repo_type="dataset", |
| filename=USAGE_FILENAME, |
| token=HF_TOKEN, |
| ) |
| return pd.read_csv(local_path) |
| except EntryNotFoundError: |
| |
| return pd.DataFrame(columns=["user_id", "runs", "first_seen", "last_seen"]) |
|
|
| def _save_usage_df(df: pd.DataFrame, commit_message: str) -> None: |
| tmp_path = "/tmp/usage.csv" |
| df.to_csv(tmp_path, index=False) |
|
|
| api.upload_file( |
| path_or_fileobj=tmp_path, |
| path_in_repo=USAGE_FILENAME, |
| repo_id=USAGE_DATASET_REPO, |
| repo_type="dataset", |
| commit_message=commit_message, |
| ) |
|
|
| def check_and_increment_quota(user_id: str) -> tuple[bool, int]: |
| now = int(time.time()) |
| df = _load_usage_df() |
|
|
| if df.empty or (df["user_id"] == user_id).sum() == 0: |
| runs = 0 |
| if runs >= MAX_RUNS_PER_USER: |
| return False, 0 |
| new_row = { |
| "user_id": user_id, |
| "runs": 1, |
| "first_seen": now, |
| "last_seen": now, |
| } |
| df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True) |
| _save_usage_df(df, commit_message=f"usage: increment {user_id} to 1") |
| return True, MAX_RUNS_PER_USER - 1 |
|
|
| idx = df.index[df["user_id"] == user_id][0] |
| runs = int(df.loc[idx, "runs"]) |
|
|
| if runs >= MAX_RUNS_PER_USER: |
| return False, 0 |
|
|
| runs += 1 |
| df.loc[idx, "runs"] = runs |
| df.loc[idx, "last_seen"] = now |
|
|
| _save_usage_df(df, commit_message=f"usage: increment {user_id} to {runs}") |
| return True, MAX_RUNS_PER_USER - runs |
| |
|
|
| |
| |
| |
| dataframes = { |
| "map_pluto": map_pluto_df, |
| "LL87": LL87_df, |
| "LL84": LL84_df, |
| "Permit": permit_df, |
| "ECB Violation": ecb_violation_df, |
| "Violation": violation_df |
| } |
|
|
| column_names = { |
| "map_pluto": list(map_pluto_df.columns), |
| "LL87": list(LL87_df.columns), |
| "LL84": list(LL84_df.columns), |
| "Permit": list(permit_df.columns), |
| "ECB Violation": list(ecb_violation_df.columns), |
| "Violation": list(ecb_violation_df.columns) |
| } |
|
|
| for name, df in dataframes.items(): |
| df['BBL'] = df['BBL'].astype(str) |
|
|
| ll97_description = """ |
| Local Law 97 (LL97) of New York City limits greenhouse gas emissions for buildings |
| over 25,000 square feet. Starting in 2024, each building type has a maximum allowed |
| emissions intensity (metric tons CO2e per square foot per year). Exceeding this limit |
| leads to a fine of $268 per metric ton over the limit, per year. |
| |
| Key points: |
| - Applies to buildings >25,000 sqft or two or more buildings on the same tax lot that together exceed 50,000 gross square feet. |
| - Limits depend on occupancy group (e.g., residential, office, university). |
| - Calculate: building_emissions = Site EUI Γ emissions_factor Γ floor_area. |
| - Compare to LL97 threshold for that occupancy group. |
| - Fine = (building_emissions β threshold Γ floor_area) Γ $268 if positive. |
| - Retrofits (insulation, HVAC upgrades, electrification) can lower site EUI or |
| emissions factors to avoid fines. |
| - Emission Factors are: |
| Electricity: 0.000288962 tCO2e/kWh |
| Natural Gas: 0.00005311 tCO2e/kBtu |
| #2 Fuel Oil: 0.00007421 tCO2e/kBtu |
| #4 Fuel Oil: 0.00007529 tCO2e/kBtu |
| District Steam: 0.00004493 tCO2e/kBtu |
| """ |
|
|
|
|
| borough_options = { |
| "Manhattan": ["MN","mn",1,"Manhattan","MANHATTAN", 'manhattan'], |
| "Bronx": ["BX", "bx", 2, "Bronx", "BRONX", 'bronx'], |
| "Brooklyn": ['BK', "bk", 3, "Brooklyn", "BROOKLYN", 'brooklyn'], |
| "Queens": ['QN', 'qn', 4, "Queens", "QUEENS", 'queens'], |
| "Staten Island": ['SI', 'si', 5, "Staten Island", "STATEN ISLAND", 'staten island'] |
| } |
|
|
| base_message = { |
| "role": "system", |
| "content": ( |
| f''' |
| You are risk assessor for building acquisition in NYC. You will be given information from the following public databases. |
| - map_pluto: Parcel-level tax lot data (BBL, addresses, zoning, building attributes). |
| - LL84: Benchmarking data (annual energy use, emissions, EUI, GFA, property IDs, years). |
| - LL87: Audit/retrofit data (Energy Conservation Measures, audit year, floor area, systems). |
| - Permit: Information about all permis for work done to buildings in NYC. |
| - Violations: Record of all DOB violations recorded for NYC |
| - ECB Violaations: Record of all ECB violations for NYC |
| |
| Your job is to summarize the risk posed by aquiring this building given all of the information passed to you. |
| Your output will be a one page PDF summary, attmept to summarize it in ~1 page of text. |
| |
| Keep in mind LL97 described here: {ll97_description} |
| ''' |
| ), |
| } |
|
|
| def generate_bbl_report(question): |
| |
| global messages |
| messages = [] |
| messages.append(base_message) |
| match = re.search(r'\b\d{10}\b', question) |
| if match: |
| bbl = match.group(0) |
| else: |
| raise ValueError("No valid BBL found in query") |
| |
| global results |
| results = {} |
| for name, df in dataframes.items(): |
| subset = df[df['BBL'].astype(str).apply(lambda x: str(bbl) in x)] |
| subset = subset.drop(['BBL'], axis=1) |
| |
| if not subset.empty: |
| cols_to_exclude = ['ISSUE_DATE', 'VIOLATION_TYPE_DESC', 'VIOLATION_TYPE', 'VIOLATION_DESCRIPTION', 'VIOLATION_CATEGORY', 'SEVERITY', 'Issuance Date', 'Work Type'] |
| cols_to_check = [c for c in subset.columns if c not in cols_to_exclude] |
| subset = subset.dropna(subset=cols_to_check, how='all') |
| results[name] = subset |
| |
| if not results: |
| return f"No data found for BBL {bbl}.", None |
| |
| global combined_text |
| |
| combined_text = f"### Summary Data for BBL {bbl}\n\n" |
| for name, df in results.items(): |
| |
| text_snippet = df.to_markdown(index=False) |
| combined_text += f"#### Dataset: {name}\n{text_snippet}\n\n" |
| |
| |
| |
| messages.append({"role": "user", "content": f""" |
| You are a risk assesment tool to pre-screen buildings for aquisition for commercial real estate firms. |
| **Task:** Analyze the available data to identify patterns, risks, and compliance issues. |
| Give particular attention on the timeline of both permits and violations. |
| - When looking at permits, note both major renovations as well as frequency of permits for similar systems in case this could indicate a faulty system. |
| - Cross check this data with energy use/audit information if it is available. |
| - See if permits were filed to fix previous violations. |
| Highlight large renovations or major violations that would materially affect a potential new building owner. |
| Using the data available for BBL {bbl}, write a structured risk assessment report |
| |
| **Output Requirements:** |
| Return your full response in **strict JSON** format with the following top-level keys: |
| - "Assesment" |
| - "Executive Summary" |
| - "Building Overview" |
| - "Risk Factors" |
| - "Compliance History" |
| - "Recommendations" |
| - "Key Items" |
| |
| Each section must contain a paragraph of text **except** "Assesment", "Key Items", "Recommendations", |
| "Assesment" should be a string with a breif description of the overall risk. i.e. High Risk, Low Risk or some combination |
| |
| "Key Items" must follow this specific dictionary structure: |
| |
| ```json |
| "Key Items": {{ |
| "Permit": {{ |
| "Date": ["YYYY-MM-DD", ...], |
| "Label": ["Description of permit", ...] |
| }}, |
| "Violation": {{ |
| "Date": ["YYYY-MM-DD", ...], |
| "Label": ["Description of violation", ...] |
| }}, |
| "ECB Violation": {{ |
| "Date": ["YYYY-MM-DD", ...], |
| "Label": ["Description of ECB violation", ...] |
| }} |
| }} |
| |
| For Key Items: |
| Include only the most important permits and violations (the ones that should be annotated on a timeline plot). |
| Select events that represent major renovations, structural or system upgrades, or serious compliance issues. |
| Keep the descriptions very breif. They are annotating points and should only be a few words long. Only provide more details if absolutely neccesary. |
| |
| "Recommendations" must be a list of strings where the strings are your reccomendations. These will be shown to the user as a numbered list and can be as detailed as you would like. |
| - These are reccomandations to a potential buyer, for things they would need to do before and after purchase. |
| |
| "Executive Summary" must also be a list of strings. The first string must be a summary sentance i.e. "This property is a medium risk property because:" |
| - All following strings will be organized into bullet points. Only include information relevant to the risk assesment, this section is meant to be breif < 5 bullets and bullets must be concise < 1 line |
| |
| Ensure the "Compliance History" section is a detailed paracgraph and highlights all important violations, patterns of violations and unresolved violations. |
| Give a comprehensive overview of compliance history. |
| |
| When composing resonse keep in mind I am outputting this to a pdf, avoid using characters that may break fonts. |
| Avoid using smart quotes (β β β β), en/em dashes (β β), and special bullets (β’, β) in output text. Use ASCII equivalents: straight quotes, single dash (-), and hyphens. |
| |
| Question: {question} |
| Data: {combined_text} |
| """}) |
| |
| |
| response = client.chat.completions.create( |
| model="gpt-5.2", |
| |
| |
| messages = messages |
| ) |
| |
| sections = json.loads(response.choices[0].message.content) |
| |
|
|
| |
| pdf_buffer = make_structured_pdf(sections, bbl) |
| |
| return sections, pdf_buffer |
| |
| |
| from reportlab.lib.pagesizes import letter |
| from reportlab.lib import colors |
| from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle |
| from reportlab.platypus import SimpleDocTemplate, Paragraph, PageBreak, Spacer, HRFlowable, Image, ListFlowable, ListItem |
| def normalize_text(text): |
| return ( |
| text.replace("β", "-") |
| .replace("β", "-") |
| .replace("β", "-") |
| ) |
| def make_structured_pdf(sections, bbl): |
| buffer = io.BytesIO() |
|
|
| doc = SimpleDocTemplate(buffer, pagesize=letter, |
| rightMargin=50, leftMargin=50, topMargin=60, bottomMargin=50) |
| styles = getSampleStyleSheet() |
|
|
| |
| title_style = ParagraphStyle( |
| name="Title", parent=styles["Heading1"], |
| alignment=1, fontSize=18, spaceAfter=10) |
| sub_title_style = ParagraphStyle( |
| name="SubTitle", parent=styles["Heading1"], |
| alignment=1, fontSize=14, spaceAfter=20) |
| header_style = ParagraphStyle( |
| name="Header", parent=styles["Heading2"], |
| textColor=colors.darkblue, spaceAfter=6) |
| body_style = ParagraphStyle( |
| name="Body", parent=styles["BodyText"], |
| fontSize=11, leading=15, spaceAfter=12) |
| |
| |
|
|
| story = [] |
|
|
| |
| story.append(Paragraph(f"Building Risk Report β BBL {bbl}", title_style)) |
| story.append(Paragraph(f"Assesment: {str(sections['Assesment'])}", sub_title_style)) |
| story.append(HRFlowable(width="100%", thickness=1, color=colors.darkblue)) |
| story.append(Spacer(1, 12)) |
|
|
| |
| for section_name, content in sections.items(): |
| if section_name == "Key Items": |
| global sample_dict |
| sample_dict = content |
| timeline_buf = make_timeline(results, content, bbl) |
| elif section_name == 'Assesment': |
| continue |
| else: |
| story.append(Paragraph(section_name, header_style)) |
| |
| if section_name.lower() == "recommendations": |
| |
| list_items = [ListItem(Paragraph(r, body_style)) for r in content] |
| story.append(ListFlowable(list_items, bulletType='1',bulletFormat='%s)', start='1', leftIndent=20)) |
| story.append(Spacer(1, 16)) |
| elif section_name.lower() == "executive summary": |
| list_items = [ListItem(Paragraph(r, body_style)) for r in content[1:]] |
| story.append(Paragraph(content[0], body_style)) |
| |
| story.append(ListFlowable(list_items, bulletType='1',bulletFormat='%s)', start='1', leftIndent=20)) |
| story.append(Spacer(1, 16)) |
| else: |
| content = str(content).strip() |
| |
| for paragraph in content.split("\n"): |
| paragraph = paragraph.strip() |
| if paragraph: |
| story.append(Paragraph(normalize_text(paragraph), body_style)) |
| story.append(Spacer(1, 16)) |
| |
| if timeline_buf.getbuffer().nbytes > 0: |
| story.append(PageBreak()) |
| story.append(HRFlowable(width="100%", thickness=1, color=colors.darkblue)) |
| story.append(Spacer(1, 20)) |
| story.append(Paragraph("Timeline of Key Items", header_style)) |
| |
| story.append(Image(timeline_buf, width=600, height=600)) |
| |
| doc.build(story) |
| |
| buffer.seek(0) |
| return buffer |
|
|
| |
| |
| |
| |
| |
|
|
| |
|
|
| |
| |
|
|
| |
| import matplotlib.pyplot as plt |
| import matplotlib.dates as mdates |
| from adjustText import adjust_text |
| import contextlib, io |
| import textwrap, random |
| import matplotlib.patches as mpatches |
| from matplotlib.lines import Line2D |
| import matplotlib.gridspec as gridspec |
|
|
| def make_timeline(results, content, bbl): |
| |
| permits = results.get('Permit') |
| violations = results.get('Violation') |
| ecb_violations = results.get('ECB Violation') |
| |
| |
| fig = plt.figure(figsize=(12, 12)) |
| gs = gridspec.GridSpec(2, 3, height_ratios=[3, 1], hspace=0.2, wspace=0.3) |
| |
| ax = fig.add_subplot(gs[0, :]) |
| ax1 = fig.add_subplot(gs[1, 0]) |
| ax2 = fig.add_subplot(gs[1, 1]) |
| ax3 = fig.add_subplot(gs[1, 2]) |
| |
| permit_colors = { |
| "Boiler": "#1f77b4", |
| "Curb Cut": "#ff7f0e", |
| "Construction Equipment":"#2ca02c", |
| "Fire Alarm": "#d62728", |
| "Fuel Burning": "#9467bd", |
| "Fire Suppression": "#8c564b", |
| "Fuel Storage": "#e377c2", |
| "Mechanical/HVAC": "#7f7f7f", |
| "New Building": "#bcbd22", |
| "Other": "#17becf", |
| "Plumbing": "#aec7e8", |
| "Standpipe": "#ffbb78", |
| "Sprinkler": "#98df8a", |
| } |
| |
| x_label = mdates.date2num(pd.to_datetime('2025-06-01')) |
| |
| if permits is not None and not permits.empty: |
| permit_dates = pd.to_datetime(permits['Issuance Date'], errors='coerce') |
| permit_labels = permits['Work Type'].fillna('Other') |
| |
| mask = permit_dates.notna() |
| permit_dates = permit_dates[mask] |
| permit_labels = permit_labels[mask] |
|
|
| |
| |
| sorted_idx = np.argsort(permit_dates) |
| permit_dates = permit_dates.iloc[sorted_idx] |
| permit_labels = permit_labels.iloc[sorted_idx] |
| |
| |
| counts = permit_labels.value_counts() if hasattr(permit_labels, 'value_counts') else \ |
| pd.Series(permit_labels).value_counts() |
|
|
| |
| labels = counts.index.tolist() |
| colors = [permit_colors[label] for label in labels] |
| |
| ax1.bar(range(len(labels)), counts.values, color=colors) |
| ax1.set_xticks(range(len(labels))) |
| ax1.set_xticklabels(labels, rotation=45, ha='right', fontsize = 8) |
| ax1.set_ylabel("Count") |
| |
| ax1.set_title('Permit Type and Frequency', fontsize = 10) |
| |
| |
| |
| |
| colors = [permit_colors.get(cat, "#333333") for cat in permit_labels] |
| |
| |
| ax.hlines(1, permit_dates.min(), permit_dates.max(), color='gray', alpha=0.7, linewidth=2) |
| ax.scatter(permit_dates, np.full(len(permit_dates), 1.0), color=colors, s=60, zorder=3) |
| |
| ax.text( |
| x_label+300, 1, |
| f"Permit History", |
| va='center', ha='left', |
| fontsize=8, fontweight='bold', color='gray' |
| ) |
| |
| severe_dates = pd.to_datetime(content['Permit']['Date']) |
| severe_labels = content['Permit']['Label'] |
| |
| |
| |
| ax.scatter(severe_dates, np.full(len(severe_dates), 1), |
| facecolor='none', edgecolor='black', s=120, lw=1.2, zorder=4) |
| |
| label_y = [] |
| min_gap = np.timedelta64(5000, 'D') |
| base_y = 1.03 |
| offset_step = 0.02 |
| |
| for i, date in enumerate(severe_dates): |
| y = base_y |
| |
| for j in range(i): |
| if abs(date - severe_dates[j]) < min_gap and abs(label_y[j] - y) < offset_step: |
| y = label_y[j] + offset_step |
| label_y.append(y) |
| ax.plot([date, date], [1, y], color='gray', lw=0.8, zorder=2) |
| ax.text(date, y, severe_labels[i], rotation=0, ha='center', va='bottom', |
| fontsize=7, color='black') |
|
|
| |
| violation_colors = { |
| 'Boiler': "#1f77b4", |
| 'Construction': "#2ca02c", |
| 'Elevator': 'orange', |
| 'Emergency': 'red', |
| 'High Pressure Boiler': '#00008B', |
| 'Immediate Emergency': '#8B0000', |
| 'Landmark Building': "#7f7f7f", |
| 'Plumbing': "#aec7e8", |
| 'Unsafe Building': "k", |
| 'Zoning': 'yellow', |
| 'Other - Likely Safety': 'k'} |
| |
| if violations is not None and not violations.empty: |
| violation_dates = pd.to_datetime(violations['ISSUE_DATE'], errors='coerce') |
| violation_labels = violations['VIOLATION_TYPE_DESC'].fillna('Other - Likely Safety') |
| |
| violation_status = violations['VIOLATION_CATEGORY'] |
| |
| active_mask = violation_status.str.contains("ACTIVE", case=False, na=False) |
| |
| mask = violation_dates.notna() |
| violation_dates = violation_dates[mask] |
| violtion_labels = violation_labels[mask] |
| |
| |
| counts = violation_labels.value_counts() if hasattr(violation_labels, 'value_counts') else \ |
| pd.Series(violation_labels).value_counts() |
|
|
| |
| labels = counts.index.tolist() |
| colors = [violation_colors[label] for label in labels] |
| |
| ax2.bar(range(len(labels)), counts.values, color=colors) |
| ax2.set_xticks(range(len(labels))) |
| ax2.set_xticklabels(labels, rotation=45, ha='right', fontsize = 8) |
| ax2.set_ylabel("Count") |
| |
| ax2.set_title('Violation Type and Frequency', fontsize = 10) |
| |
| |
| sorted_idx = np.argsort(violation_dates) |
| violation_dates = violation_dates.iloc[sorted_idx] |
| violtion_labels = violation_labels.iloc[sorted_idx] |
| |
| |
| |
| |
| colors = [violation_colors.get(cat, "#333333") for cat in violation_labels] |
| |
| |
| ax.hlines(0.8, violation_dates.min(), violation_dates.max(), color='gray', alpha=0.7, linewidth=2) |
| ax.scatter(violation_dates, np.full(len(violation_dates), 0.8), color=colors, s=60, zorder=3) |
| |
| ax.scatter(violation_dates[active_mask], np.full(len(violation_dates[active_mask]), 0.8), |
| color='none', s=80, edgecolor='red', zorder=4) |
| |
| if sum(active_mask > 0): |
| active_violations = True |
| else: |
| active_violations = False |
| |
| severe_dates = pd.to_datetime(content['Violation']['Date']) |
| severe_labels = content['Violation']['Label'] |
| |
| ax.scatter(severe_dates, np.full(len(severe_dates), 0.8), |
| color='none', s=80, edgecolor='black', zorder=4) |
| |
| label_y = [] |
| min_gap = np.timedelta64(4000, 'D') |
| base_y = 0.83 |
| offset_step = 0.02 |
| |
| for i, date in enumerate(severe_dates): |
| y = base_y |
| |
| for j in range(i): |
| if abs(date - severe_dates[j]) < min_gap and abs(label_y[j] - y) < offset_step: |
| y = label_y[j] + offset_step |
| label_y.append(y) |
| ax.plot([date, date], [0.8, y], color='gray', lw=0.8, zorder=2) |
| ax.text(date, y, severe_labels[i], rotation=0, ha='center', va='bottom', |
| fontsize=7, color='black') |
| |
| ax.text( |
| x_label+200, 0.8, |
| f"Violation History", |
| va='center', ha='left', |
| fontsize=8, fontweight='bold', color='gray' |
| ) |
| |
| ecb_color = { |
| 'Administrative': 'blue', |
| 'Boilers': "#1f77b4", |
| 'Construction': "#2ca02c", |
| 'Cranes and Derricks':"#2ca02c", |
| 'Elevators': 'orange', |
| 'HPD': '#00008B', |
| 'Local Law': 'cyan', |
| 'Padlock': "#7f7f7f", |
| 'Plumbing': "#aec7e8", |
| 'Public Assembly': 'blue', |
| 'Quality of Life': 'blue', |
| 'Signs': 'yellow', |
| 'Site Safety': 'red', |
| 'Unknown': 'gray', |
| 'Zoning': 'yellow'} |
| |
| if ecb_violations is not None and not ecb_violations.empty: |
| ecb_violation_dates = pd.to_datetime(ecb_violations['ISSUE_DATE'], errors='coerce') |
| ecb_violation_labels = ecb_violations['VIOLATION_TYPE'].fillna('Unknown') |
| ecb_severity = ecb_violations['SEVERITY'] |
| desc = ecb_violations['VIOLATION_DESCRIPTION'] |
| |
| mask = ecb_violation_dates.notna() |
| ecb_violation_dates = ecb_violation_dates[mask] |
| ecb_violtion_labels = ecb_violation_labels[mask] |
| ecb_severity = ecb_severity[mask] |
| desc = desc[mask] |
| |
| counts = ecb_violation_labels.value_counts() if hasattr(ecb_violation_labels, 'value_counts') else \ |
| pd.Series(ecb_violation_labels).value_counts() |
|
|
| |
| labels = counts.index.tolist() |
| colors = [ecb_color[label] for label in labels] |
| |
| ax3.bar(range(len(labels)), counts.values, color=colors) |
| ax3.set_xticks(range(len(labels))) |
| ax3.set_xticklabels(labels, rotation=45, ha='right', fontsize = 8) |
| ax3.set_ylabel("Count") |
| |
| ax3.set_title('ECB Violation Type and Frequency', fontsize = 10) |
| |
| sorted_idx = np.argsort(ecb_violation_dates) |
| ecb_violation_dates = ecb_violation_dates.iloc[sorted_idx] |
| ecb_violtion_labels = ecb_violation_labels.iloc[sorted_idx] |
| ecb_severity = ecb_severity.iloc[sorted_idx] |
| desc = desc.iloc[sorted_idx] |
| |
| |
| colors = [ecb_color.get(cat, "#333333") for cat in ecb_violation_labels] |
| ax.hlines(0.6, ecb_violation_dates.min(), ecb_violation_dates.max(), color='gray', alpha=0.7, linewidth=2) |
| ax.scatter(ecb_violation_dates, np.full(len(ecb_violation_dates), 0.6), color=colors, s=60, zorder=3) |
| |
| |
| severe_mask = ecb_severity.isin(["CLASS - 1", "CLASS - 2", "Hazardous"]) |
| |
| if sum(severe_mask > 0): |
| active_violations = True |
| else: |
| if not active_violations: |
| active_violations = False |
| |
| severe_dates = ecb_violation_dates[severe_mask] |
| |
| |
| |
| ax.scatter(severe_dates, np.full(len(severe_dates), 0.6), |
| color='none', s=80, edgecolor='red', zorder=4) |
| |
| severe_dates = pd.to_datetime(content['ECB Violation']['Date']) |
| severe_labels = content['ECB Violation']['Label'] |
| |
| ax.scatter(severe_dates, np.full(len(severe_dates), 0.6), |
| color='none', s=80, edgecolor='black', zorder=4) |
| |
| label_y = [] |
| min_gap = np.timedelta64(5000, 'D') |
| base_y = 0.57 |
| offset_step = 0.02 |
| |
| for i, date in enumerate(severe_dates): |
| y = base_y |
| |
| for j in range(i): |
| if abs(date - severe_dates[j]) < min_gap and abs(label_y[j] - y) < offset_step: |
| y = label_y[j] - offset_step |
| label_y.append(y) |
| ax.plot([date, date], [0.6, y], color='gray', lw=0.8, zorder=2) |
| ax.text(date, y, severe_labels[i], rotation=0, ha='center', va='top', |
| fontsize=7, color='black') |
| |
| all_dates = np.concatenate([ |
| mdates.date2num(permit_dates), |
| mdates.date2num(violation_dates), |
| mdates.date2num(ecb_violation_dates) |
| ]) |
| |
| ax.text( |
| x_label, 0.6, |
| f"ECB Violation History", |
| va='center', ha='left', |
| fontsize=8, fontweight='bold', color='gray' |
| ) |
| |
| if active_violations: |
| unresolved_handle = Line2D( |
| [0], [0], |
| marker='o', linestyle='none', |
| markerfacecolor='none', |
| markeredgecolor='red', |
| markeredgewidth=1.2, |
| markersize=6, |
| label='Unresolved or Hazardous Violation' |
| ) |
| |
| leg_unresolved = ax.legend( |
| handles=[unresolved_handle], |
| loc='upper center', |
| bbox_to_anchor=(0.17, 0.4), |
| frameon=True, |
| fontsize=10, title_fontsize=10 |
| ) |
| fig.add_artist(leg_unresolved) |
| |
| |
| fig.subplots_adjust(bottom=0.25) |
| |
| ax.set_ylim(0.4, 1.2) |
| ax.set_xlim(min(all_dates) - 1000, x_label + 2500) |
| ax.set_yticks([]) |
| ax.set_xlabel("Issuance Date") |
| ax.set_title("Building Timeline", fontsize=12, fontweight='bold') |
| timeline_buf = io.BytesIO() |
| plt.savefig(timeline_buf, format="png", dpi=300, bbox_inches="tight") |
| plt.close() |
| timeline_buf.seek(0) |
| |
| return timeline_buf |
|
|
| |
| |
| import gradio as gr |
| import re, tempfile, traceback |
| import os |
| import json |
| import pandas as pd |
|
|
| def run_bbl_report(question, profile: gr.OAuthProfile | None): |
| uid = user_id_from_profile(profile) |
| if uid is None: |
| raise gr.Error("Please sign in with Hugging Face to use this demo.") |
|
|
| allowed, remaining = check_and_increment_quota(uid) |
| if not allowed: |
| raise gr.Error(f"Usage limit reached: {MAX_RUNS_PER_USER} runs per user.") |
| |
| if remaining <= 2: |
| gr.Warning(f"β οΈ Only {remaining} run(s) left!") |
| else: |
| gr.Info(f"β Runs remaining: {remaining}") |
| |
| try: |
| sections, pdf_buffer = generate_bbl_report(str(question)) |
| if not pdf_buffer: |
| return f"β **Exception:** BBL Not Found", None |
| bbl = re.search(r'\b\d{10}\b', str(question)).group(0) |
| filename = f"BBL_{bbl}_Report.pdf" |
| |
| with tempfile.NamedTemporaryFile(delete=False, prefix=f"BBL_{bbl}_Report_", suffix=".pdf") as tmp: |
| |
| data = pdf_buffer.getvalue() if hasattr(pdf_buffer, "getvalue") else pdf_buffer |
| tmp.write(data) |
| tmp_path = tmp.name |
| |
| return f"β
Report successfully generated for BBL {bbl}", tmp_path |
|
|
| except Exception as e: |
| tb = traceback.format_exc() |
| |
| return f"β **Exception:** {e}\n\n```\n{tb}\n```", None |
|
|
|
|
|
|
| |
| |
| |
| with gr.Blocks(theme=gr.themes.Soft()) as demo: |
| gr.LoginButton() |
| |
| with gr.Group(visible=True) as main_app: |
| gr.Markdown("# π’ NYC Building Risk Assessment Tool") |
| gr.Markdown("Enter a 10-digit **BBL** (e.g. `1005880006`) to generate a full PDF risk report.") |
| gr.Markdown("Due to storage limits on this website, databases limited to Manhattan.") |
| gr.Markdown("Note that generating a PDF takes ~ 100 sec") |
| gr.Markdown( |
| """ |
| To find a BBL, visit: |
| <a href="https://experience.arcgis.com/experience/d826b115c87841d491c2b41fcb175305" |
| target="_blank" |
| style="color:#1f77b4; text-decoration:none; font-weight:bold;"> |
| πΊοΈ NYC Map Portal (ArcGIS) |
| </a> |
| """ |
| ) |
| |
| with gr.Row(): |
| question_input = gr.Textbox( |
| label="Enter BBL", |
| placeholder="Example: 1005880006", |
| lines=2, |
| ) |
| |
| with gr.Row(): |
| run_btn = gr.Button("Generate PDF Report", variant="primary") |
| reset_btn = gr.Button("π Reset") |
| |
| with gr.Row(): |
| status_output = gr.Markdown() |
| file_output = gr.File(label="Download Report") |
| |
| run_btn.click( |
| fn=run_bbl_report, |
| inputs=[question_input], |
| outputs=[status_output, file_output], |
| ) |
| |
| reset_btn.click( |
| fn=lambda: ("", None), |
| inputs=None, |
| outputs=[status_output, file_output], |
| ) |
|
|
| demo.launch() |