Risk_Assessment / app.py
Ryan2219's picture
Update app.py
848d70c verified
# -*- coding: utf-8 -*-
"""
Created on Mon Sep 22 13:39:46 2025
@author: rmd2219
"""
import pandas as pd
import numpy as np
import os, json, re, time
from openai import OpenAI
import difflib
import gradio as gr
from huggingface_hub import hf_hub_download, HfApi
from huggingface_hub.utils import EntryNotFoundError
USAGE_DATASET_REPO = os.environ.get("USAGE_DATASET_REPO", "NYSERDA-CRE-Working-Group/nyserda_demo_useage_store")
USAGE_FILENAME = os.environ.get("USAGE_FILENAME", "usage.csv")
MAX_RUNS_PER_USER = int(os.environ.get("MAX_RUNS_PER_USER", "10"))
# Example: load Pluto
map_pluto_df = pd.read_csv("map_pluto.csv", low_memory=False)
LL87_df = pd.read_csv("LL87.csv", low_memory=False)
LL84_df = pd.read_csv('LL84.csv', low_memory=False)
LL84_df = LL84_df.replace("Not Available", np.nan)
permit_df = pd.read_csv('Permit.csv', low_memory = False)
ecb_violation_df = pd.read_csv('ECB_violations.csv', low_memory = False)
violation_df = pd.read_csv('Violations.csv', low_memory = False)
os.environ["OPENAI_API_KEY"] = os.environ.get("OPENAI_API_KEY")
HF_TOKEN = os.environ.get("HF_TOKEN")
client = OpenAI()
api = HfApi(token=HF_TOKEN)
def user_id_from_profile(profile: gr.OAuthProfile | None) -> str | None:
if profile is None:
return None
# You said profile.name exists; normalize it.
# If you later can access preferred_username, use that instead (more unique).
uid = getattr(profile, "name", None)
if not uid:
return None
return uid.strip().lower()
def _load_usage_df() -> pd.DataFrame:
try:
local_path = hf_hub_download(
repo_id=USAGE_DATASET_REPO,
repo_type="dataset",
filename=USAGE_FILENAME,
token=HF_TOKEN,
)
return pd.read_csv(local_path)
except EntryNotFoundError:
# First run: create empty table
return pd.DataFrame(columns=["user_id", "runs", "first_seen", "last_seen"])
def _save_usage_df(df: pd.DataFrame, commit_message: str) -> None:
tmp_path = "/tmp/usage.csv"
df.to_csv(tmp_path, index=False)
api.upload_file(
path_or_fileobj=tmp_path,
path_in_repo=USAGE_FILENAME,
repo_id=USAGE_DATASET_REPO,
repo_type="dataset",
commit_message=commit_message,
)
def check_and_increment_quota(user_id: str) -> tuple[bool, int]:
now = int(time.time())
df = _load_usage_df()
if df.empty or (df["user_id"] == user_id).sum() == 0:
runs = 0
if runs >= MAX_RUNS_PER_USER:
return False, 0
new_row = {
"user_id": user_id,
"runs": 1,
"first_seen": now,
"last_seen": now,
}
df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)
_save_usage_df(df, commit_message=f"usage: increment {user_id} to 1")
return True, MAX_RUNS_PER_USER - 1
idx = df.index[df["user_id"] == user_id][0]
runs = int(df.loc[idx, "runs"])
if runs >= MAX_RUNS_PER_USER:
return False, 0
runs += 1
df.loc[idx, "runs"] = runs
df.loc[idx, "last_seen"] = now
_save_usage_df(df, commit_message=f"usage: increment {user_id} to {runs}")
return True, MAX_RUNS_PER_USER - runs
#%%
# Fixed + cleaned version
# Example placeholders for your real dataframes
dataframes = {
"map_pluto": map_pluto_df,
"LL87": LL87_df,
"LL84": LL84_df,
"Permit": permit_df,
"ECB Violation": ecb_violation_df,
"Violation": violation_df
}
column_names = {
"map_pluto": list(map_pluto_df.columns),
"LL87": list(LL87_df.columns),
"LL84": list(LL84_df.columns),
"Permit": list(permit_df.columns),
"ECB Violation": list(ecb_violation_df.columns),
"Violation": list(ecb_violation_df.columns)
}
for name, df in dataframes.items():
df['BBL'] = df['BBL'].astype(str)
ll97_description = """
Local Law 97 (LL97) of New York City limits greenhouse gas emissions for buildings
over 25,000 square feet. Starting in 2024, each building type has a maximum allowed
emissions intensity (metric tons CO2e per square foot per year). Exceeding this limit
leads to a fine of $268 per metric ton over the limit, per year.
Key points:
- Applies to buildings >25,000 sqft or two or more buildings on the same tax lot that together exceed 50,000 gross square feet.
- Limits depend on occupancy group (e.g., residential, office, university).
- Calculate: building_emissions = Site EUI Γ— emissions_factor Γ— floor_area.
- Compare to LL97 threshold for that occupancy group.
- Fine = (building_emissions – threshold Γ— floor_area) Γ— $268 if positive.
- Retrofits (insulation, HVAC upgrades, electrification) can lower site EUI or
emissions factors to avoid fines.
- Emission Factors are:
Electricity: 0.000288962 tCO2e/kWh
Natural Gas: 0.00005311 tCO2e/kBtu
#2 Fuel Oil: 0.00007421 tCO2e/kBtu
#4 Fuel Oil: 0.00007529 tCO2e/kBtu
District Steam: 0.00004493 tCO2e/kBtu
"""
borough_options = {
"Manhattan": ["MN","mn",1,"Manhattan","MANHATTAN", 'manhattan'],
"Bronx": ["BX", "bx", 2, "Bronx", "BRONX", 'bronx'],
"Brooklyn": ['BK', "bk", 3, "Brooklyn", "BROOKLYN", 'brooklyn'],
"Queens": ['QN', 'qn', 4, "Queens", "QUEENS", 'queens'],
"Staten Island": ['SI', 'si', 5, "Staten Island", "STATEN ISLAND", 'staten island']
}
base_message = {
"role": "system",
"content": (
f'''
You are risk assessor for building acquisition in NYC. You will be given information from the following public databases.
- map_pluto: Parcel-level tax lot data (BBL, addresses, zoning, building attributes).
- LL84: Benchmarking data (annual energy use, emissions, EUI, GFA, property IDs, years).
- LL87: Audit/retrofit data (Energy Conservation Measures, audit year, floor area, systems).
- Permit: Information about all permis for work done to buildings in NYC.
- Violations: Record of all DOB violations recorded for NYC
- ECB Violaations: Record of all ECB violations for NYC
Your job is to summarize the risk posed by aquiring this building given all of the information passed to you.
Your output will be a one page PDF summary, attmept to summarize it in ~1 page of text.
Keep in mind LL97 described here: {ll97_description}
'''
),
}
def generate_bbl_report(question):
#### Get the BBL from the Question
global messages
messages = []
messages.append(base_message)
match = re.search(r'\b\d{10}\b', question)
if match:
bbl = match.group(0)
else:
raise ValueError("No valid BBL found in query")
global results
results = {}
for name, df in dataframes.items():
subset = df[df['BBL'].astype(str).apply(lambda x: str(bbl) in x)]
subset = subset.drop(['BBL'], axis=1)
if not subset.empty:
cols_to_exclude = ['ISSUE_DATE', 'VIOLATION_TYPE_DESC', 'VIOLATION_TYPE', 'VIOLATION_DESCRIPTION', 'VIOLATION_CATEGORY', 'SEVERITY', 'Issuance Date', 'Work Type']
cols_to_check = [c for c in subset.columns if c not in cols_to_exclude]
subset = subset.dropna(subset=cols_to_check, how='all')
results[name] = subset
if not results:
return f"No data found for BBL {bbl}.", None
global combined_text
combined_text = f"### Summary Data for BBL {bbl}\n\n"
for name, df in results.items():
# Convert each subset to markdown (truncated for safety)
text_snippet = df.to_markdown(index=False)
combined_text += f"#### Dataset: {name}\n{text_snippet}\n\n"
messages.append({"role": "user", "content": f"""
You are a risk assesment tool to pre-screen buildings for aquisition for commercial real estate firms.
**Task:** Analyze the available data to identify patterns, risks, and compliance issues.
Give particular attention on the timeline of both permits and violations.
- When looking at permits, note both major renovations as well as frequency of permits for similar systems in case this could indicate a faulty system.
- Cross check this data with energy use/audit information if it is available.
- See if permits were filed to fix previous violations.
Highlight large renovations or major violations that would materially affect a potential new building owner.
Using the data available for BBL {bbl}, write a structured risk assessment report
**Output Requirements:**
Return your full response in **strict JSON** format with the following top-level keys:
- "Assesment"
- "Executive Summary"
- "Building Overview"
- "Risk Factors"
- "Compliance History"
- "Recommendations"
- "Key Items"
Each section must contain a paragraph of text **except** "Assesment", "Key Items", "Recommendations",
"Assesment" should be a string with a breif description of the overall risk. i.e. High Risk, Low Risk or some combination
"Key Items" must follow this specific dictionary structure:
```json
"Key Items": {{
"Permit": {{
"Date": ["YYYY-MM-DD", ...],
"Label": ["Description of permit", ...]
}},
"Violation": {{
"Date": ["YYYY-MM-DD", ...],
"Label": ["Description of violation", ...]
}},
"ECB Violation": {{
"Date": ["YYYY-MM-DD", ...],
"Label": ["Description of ECB violation", ...]
}}
}}
For Key Items:
Include only the most important permits and violations (the ones that should be annotated on a timeline plot).
Select events that represent major renovations, structural or system upgrades, or serious compliance issues.
Keep the descriptions very breif. They are annotating points and should only be a few words long. Only provide more details if absolutely neccesary.
"Recommendations" must be a list of strings where the strings are your reccomendations. These will be shown to the user as a numbered list and can be as detailed as you would like.
- These are reccomandations to a potential buyer, for things they would need to do before and after purchase.
"Executive Summary" must also be a list of strings. The first string must be a summary sentance i.e. "This property is a medium risk property because:"
- All following strings will be organized into bullet points. Only include information relevant to the risk assesment, this section is meant to be breif < 5 bullets and bullets must be concise < 1 line
Ensure the "Compliance History" section is a detailed paracgraph and highlights all important violations, patterns of violations and unresolved violations.
Give a comprehensive overview of compliance history.
When composing resonse keep in mind I am outputting this to a pdf, avoid using characters that may break fonts.
Avoid using smart quotes (β€˜ ’ β€œ ”), en/em dashes (– β€”), and special bullets (β€’, –) in output text. Use ASCII equivalents: straight quotes, single dash (-), and hyphens.
Question: {question}
Data: {combined_text}
"""})
response = client.chat.completions.create(
model="gpt-5.2",
#reasoning_effort = 'high',
#verbosity = 'high',
messages = messages
)
sections = json.loads(response.choices[0].message.content)
pdf_buffer = make_structured_pdf(sections, bbl)
return sections, pdf_buffer
#%%
from reportlab.lib.pagesizes import letter
from reportlab.lib import colors
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.platypus import SimpleDocTemplate, Paragraph, PageBreak, Spacer, HRFlowable, Image, ListFlowable, ListItem
def normalize_text(text):
return (
text.replace("–", "-")
.replace("β€”", "-")
.replace("βˆ’", "-") # minus sign
)
def make_structured_pdf(sections, bbl):
buffer = io.BytesIO()
doc = SimpleDocTemplate(buffer, pagesize=letter,
rightMargin=50, leftMargin=50, topMargin=60, bottomMargin=50)
styles = getSampleStyleSheet()
# Custom styles
title_style = ParagraphStyle(
name="Title", parent=styles["Heading1"],
alignment=1, fontSize=18, spaceAfter=10)
sub_title_style = ParagraphStyle(
name="SubTitle", parent=styles["Heading1"],
alignment=1, fontSize=14, spaceAfter=20)
header_style = ParagraphStyle(
name="Header", parent=styles["Heading2"],
textColor=colors.darkblue, spaceAfter=6)
body_style = ParagraphStyle(
name="Body", parent=styles["BodyText"],
fontSize=11, leading=15, spaceAfter=12)
#body_style.fontName = 'DejaVuSans'
story = []
# Title
story.append(Paragraph(f"Building Risk Report – BBL {bbl}", title_style))
story.append(Paragraph(f"Assesment: {str(sections['Assesment'])}", sub_title_style))
story.append(HRFlowable(width="100%", thickness=1, color=colors.darkblue))
story.append(Spacer(1, 12))
# Sections
for section_name, content in sections.items():
if section_name == "Key Items":
global sample_dict
sample_dict = content
timeline_buf = make_timeline(results, content, bbl)
elif section_name == 'Assesment':
continue
else:
story.append(Paragraph(section_name, header_style))
if section_name.lower() == "recommendations":
# Split into numbered items and make a bullet-style list
list_items = [ListItem(Paragraph(r, body_style)) for r in content]
story.append(ListFlowable(list_items, bulletType='1',bulletFormat='%s)', start='1', leftIndent=20))
story.append(Spacer(1, 16))
elif section_name.lower() == "executive summary":
list_items = [ListItem(Paragraph(r, body_style)) for r in content[1:]]
story.append(Paragraph(content[0], body_style))
#story.append(Spacer(1, 5))
story.append(ListFlowable(list_items, bulletType='1',bulletFormat='%s)', start='1', leftIndent=20))
story.append(Spacer(1, 16))
else:
content = str(content).strip()
# Normal paragraph behavior for other sections
for paragraph in content.split("\n"):
paragraph = paragraph.strip()
if paragraph:
story.append(Paragraph(normalize_text(paragraph), body_style))
story.append(Spacer(1, 16))
if timeline_buf.getbuffer().nbytes > 0:
story.append(PageBreak())
story.append(HRFlowable(width="100%", thickness=1, color=colors.darkblue))
story.append(Spacer(1, 20))
story.append(Paragraph("Timeline of Key Items", header_style))
#story.append(Spacer(1, 12))
story.append(Image(timeline_buf, width=600, height=600))
doc.build(story)
buffer.seek(0)
return buffer
#question = 'What is the risk associated with BBL 3034610030'
#question = 'What is the risk associated with BBL 1000160120'
#question = '4021390001'
#question = '2030620043'
#question = '5005450110'
#question = '1019620019' # Butler
#question = '1005451111'
#print(generate_bbl_report(question))
#%%
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from adjustText import adjust_text
import contextlib, io
import textwrap, random
import matplotlib.patches as mpatches
from matplotlib.lines import Line2D
import matplotlib.gridspec as gridspec
def make_timeline(results, content, bbl):
##### Get Data
permits = results.get('Permit')
violations = results.get('Violation')
ecb_violations = results.get('ECB Violation')
######
fig = plt.figure(figsize=(12, 12))
gs = gridspec.GridSpec(2, 3, height_ratios=[3, 1], hspace=0.2, wspace=0.3)
ax = fig.add_subplot(gs[0, :])
ax1 = fig.add_subplot(gs[1, 0])
ax2 = fig.add_subplot(gs[1, 1])
ax3 = fig.add_subplot(gs[1, 2])
permit_colors = {
"Boiler": "#1f77b4", # blue
"Curb Cut": "#ff7f0e", # orange
"Construction Equipment":"#2ca02c", # green
"Fire Alarm": "#d62728", # red
"Fuel Burning": "#9467bd", # purple
"Fire Suppression": "#8c564b", # brown
"Fuel Storage": "#e377c2", # pink
"Mechanical/HVAC": "#7f7f7f", # gray
"New Building": "#bcbd22", # olive
"Other": "#17becf", # cyan
"Plumbing": "#aec7e8", # light blue
"Standpipe": "#ffbb78", # light orange
"Sprinkler": "#98df8a", # light green
}
x_label = mdates.date2num(pd.to_datetime('2025-06-01'))
if permits is not None and not permits.empty:
permit_dates = pd.to_datetime(permits['Issuance Date'], errors='coerce')
permit_labels = permits['Work Type'].fillna('Other')
mask = permit_dates.notna()
permit_dates = permit_dates[mask]
permit_labels = permit_labels[mask]
# Sort by date
sorted_idx = np.argsort(permit_dates)
permit_dates = permit_dates.iloc[sorted_idx]
permit_labels = permit_labels.iloc[sorted_idx]
########### Sidetrack, make legend / frequency plot
counts = permit_labels.value_counts() if hasattr(permit_labels, 'value_counts') else \
pd.Series(permit_labels).value_counts()
# Extract labels and colors in matching order
labels = counts.index.tolist()
colors = [permit_colors[label] for label in labels]
ax1.bar(range(len(labels)), counts.values, color=colors)
ax1.set_xticks(range(len(labels)))
ax1.set_xticklabels(labels, rotation=45, ha='right', fontsize = 8)
ax1.set_ylabel("Count")
#ax1.set_xlabel("Permit Type")
ax1.set_title('Permit Type and Frequency', fontsize = 10)
# Baseline + points
#x = mdates.date2num(permit_dates.values)
colors = [permit_colors.get(cat, "#333333") for cat in permit_labels]
ax.hlines(1, permit_dates.min(), permit_dates.max(), color='gray', alpha=0.7, linewidth=2)
ax.scatter(permit_dates, np.full(len(permit_dates), 1.0), color=colors, s=60, zorder=3)
ax.text(
x_label+300, 1, # position at right end of line
f"Permit History", # add small space padding
va='center', ha='left', # vertical align with line, left of text
fontsize=8, fontweight='bold', color='gray'
)
severe_dates = pd.to_datetime(content['Permit']['Date'])
severe_labels = content['Permit']['Label']
# --- Highlight severe points with red circles ---
ax.scatter(severe_dates, np.full(len(severe_dates), 1),
facecolor='none', edgecolor='black', s=120, lw=1.2, zorder=4)
label_y = [] # store vertical positions for each label
min_gap = np.timedelta64(5000, 'D') # how close in time before stacking higher
base_y = 1.03
offset_step = 0.02
for i, date in enumerate(severe_dates):
y = base_y
# If this date is within 'min_gap' of previous points, raise the label
for j in range(i):
if abs(date - severe_dates[j]) < min_gap and abs(label_y[j] - y) < offset_step:
y = label_y[j] + offset_step
label_y.append(y)
ax.plot([date, date], [1, y], color='gray', lw=0.8, zorder=2)
ax.text(date, y, severe_labels[i], rotation=0, ha='center', va='bottom',
fontsize=7, color='black')
violation_colors = {
'Boiler': "#1f77b4",
'Construction': "#2ca02c",
'Elevator': 'orange',
'Emergency': 'red',
'High Pressure Boiler': '#00008B',
'Immediate Emergency': '#8B0000',
'Landmark Building': "#7f7f7f",
'Plumbing': "#aec7e8",
'Unsafe Building': "k",
'Zoning': 'yellow',
'Other - Likely Safety': 'k'}
if violations is not None and not violations.empty:
violation_dates = pd.to_datetime(violations['ISSUE_DATE'], errors='coerce')
violation_labels = violations['VIOLATION_TYPE_DESC'].fillna('Other - Likely Safety')
#print(violation_labels.unique())
violation_status = violations['VIOLATION_CATEGORY']
active_mask = violation_status.str.contains("ACTIVE", case=False, na=False)
mask = violation_dates.notna()
violation_dates = violation_dates[mask]
violtion_labels = violation_labels[mask]
########### Sidetrack, make legend / frequency plot
counts = violation_labels.value_counts() if hasattr(violation_labels, 'value_counts') else \
pd.Series(violation_labels).value_counts()
# Extract labels and colors in matching order
labels = counts.index.tolist()
colors = [violation_colors[label] for label in labels]
ax2.bar(range(len(labels)), counts.values, color=colors)
ax2.set_xticks(range(len(labels)))
ax2.set_xticklabels(labels, rotation=45, ha='right', fontsize = 8)
ax2.set_ylabel("Count")
#ax1.set_xlabel("Permit Type")
ax2.set_title('Violation Type and Frequency', fontsize = 10)
# Sort by date
sorted_idx = np.argsort(violation_dates)
violation_dates = violation_dates.iloc[sorted_idx]
violtion_labels = violation_labels.iloc[sorted_idx]
# Baseline + points
#x = mdates.date2num(permit_dates.values)
colors = [violation_colors.get(cat, "#333333") for cat in violation_labels]
ax.hlines(0.8, violation_dates.min(), violation_dates.max(), color='gray', alpha=0.7, linewidth=2)
ax.scatter(violation_dates, np.full(len(violation_dates), 0.8), color=colors, s=60, zorder=3)
ax.scatter(violation_dates[active_mask], np.full(len(violation_dates[active_mask]), 0.8),
color='none', s=80, edgecolor='red', zorder=4)
if sum(active_mask > 0):
active_violations = True
else:
active_violations = False
severe_dates = pd.to_datetime(content['Violation']['Date'])
severe_labels = content['Violation']['Label']
ax.scatter(severe_dates, np.full(len(severe_dates), 0.8),
color='none', s=80, edgecolor='black', zorder=4)
label_y = [] # store vertical positions for each label
min_gap = np.timedelta64(4000, 'D') # how close in time before stacking higher
base_y = 0.83
offset_step = 0.02
for i, date in enumerate(severe_dates):
y = base_y
# If this date is within 'min_gap' of previous points, raise the label
for j in range(i):
if abs(date - severe_dates[j]) < min_gap and abs(label_y[j] - y) < offset_step:
y = label_y[j] + offset_step
label_y.append(y)
ax.plot([date, date], [0.8, y], color='gray', lw=0.8, zorder=2)
ax.text(date, y, severe_labels[i], rotation=0, ha='center', va='bottom',
fontsize=7, color='black')
ax.text(
x_label+200, 0.8, # position at right end of line
f"Violation History", # add small space padding
va='center', ha='left', # vertical align with line, left of text
fontsize=8, fontweight='bold', color='gray'
)
ecb_color = {
'Administrative': 'blue',
'Boilers': "#1f77b4",
'Construction': "#2ca02c",
'Cranes and Derricks':"#2ca02c",
'Elevators': 'orange',
'HPD': '#00008B',
'Local Law': 'cyan',
'Padlock': "#7f7f7f",
'Plumbing': "#aec7e8",
'Public Assembly': 'blue',
'Quality of Life': 'blue',
'Signs': 'yellow',
'Site Safety': 'red',
'Unknown': 'gray',
'Zoning': 'yellow'}
if ecb_violations is not None and not ecb_violations.empty:
ecb_violation_dates = pd.to_datetime(ecb_violations['ISSUE_DATE'], errors='coerce')
ecb_violation_labels = ecb_violations['VIOLATION_TYPE'].fillna('Unknown')
ecb_severity = ecb_violations['SEVERITY']
desc = ecb_violations['VIOLATION_DESCRIPTION']
mask = ecb_violation_dates.notna()
ecb_violation_dates = ecb_violation_dates[mask]
ecb_violtion_labels = ecb_violation_labels[mask] # Sort by date
ecb_severity = ecb_severity[mask]
desc = desc[mask]
counts = ecb_violation_labels.value_counts() if hasattr(ecb_violation_labels, 'value_counts') else \
pd.Series(ecb_violation_labels).value_counts()
# Extract labels and colors in matching order
labels = counts.index.tolist()
colors = [ecb_color[label] for label in labels]
ax3.bar(range(len(labels)), counts.values, color=colors)
ax3.set_xticks(range(len(labels)))
ax3.set_xticklabels(labels, rotation=45, ha='right', fontsize = 8)
ax3.set_ylabel("Count")
#ax1.set_xlabel("Permit Type")
ax3.set_title('ECB Violation Type and Frequency', fontsize = 10)
sorted_idx = np.argsort(ecb_violation_dates)
ecb_violation_dates = ecb_violation_dates.iloc[sorted_idx]
ecb_violtion_labels = ecb_violation_labels.iloc[sorted_idx]
ecb_severity = ecb_severity.iloc[sorted_idx]
desc = desc.iloc[sorted_idx]
# Baseline + points
#x = mdates.date2num(permit_dates.values)
colors = [ecb_color.get(cat, "#333333") for cat in ecb_violation_labels]
ax.hlines(0.6, ecb_violation_dates.min(), ecb_violation_dates.max(), color='gray', alpha=0.7, linewidth=2)
ax.scatter(ecb_violation_dates, np.full(len(ecb_violation_dates), 0.6), color=colors, s=60, zorder=3)
# --- Highlight severe violations ---
severe_mask = ecb_severity.isin(["CLASS - 1", "CLASS - 2", "Hazardous"])
if sum(severe_mask > 0):
active_violations = True
else:
if not active_violations:
active_violations = False
severe_dates = ecb_violation_dates[severe_mask]
#severe_labels = desc[severe_mask]
#print(severe_labels)
# Scatter red circles for severe points
ax.scatter(severe_dates, np.full(len(severe_dates), 0.6),
color='none', s=80, edgecolor='red', zorder=4)
severe_dates = pd.to_datetime(content['ECB Violation']['Date'])
severe_labels = content['ECB Violation']['Label']
ax.scatter(severe_dates, np.full(len(severe_dates), 0.6),
color='none', s=80, edgecolor='black', zorder=4)
label_y = [] # store vertical positions for each label
min_gap = np.timedelta64(5000, 'D') # how close in time before stacking higher
base_y = 0.57
offset_step = 0.02
for i, date in enumerate(severe_dates):
y = base_y
# If this date is within 'min_gap' of previous points, raise the label
for j in range(i):
if abs(date - severe_dates[j]) < min_gap and abs(label_y[j] - y) < offset_step:
y = label_y[j] - offset_step
label_y.append(y)
ax.plot([date, date], [0.6, y], color='gray', lw=0.8, zorder=2)
ax.text(date, y, severe_labels[i], rotation=0, ha='center', va='top',
fontsize=7, color='black')
all_dates = np.concatenate([
mdates.date2num(permit_dates),
mdates.date2num(violation_dates),
mdates.date2num(ecb_violation_dates)
])
ax.text(
x_label, 0.6, # position at right end of line
f"ECB Violation History", # add small space padding
va='center', ha='left', # vertical align with line, left of text
fontsize=8, fontweight='bold', color='gray'
)
if active_violations:
unresolved_handle = Line2D(
[0], [0],
marker='o', linestyle='none',
markerfacecolor='none', # no fill
markeredgecolor='red', # red outline
markeredgewidth=1.2,
markersize=6,
label='Unresolved or Hazardous Violation'
)
leg_unresolved = ax.legend(
handles=[unresolved_handle],
loc='upper center',
bbox_to_anchor=(0.17, 0.4), # slightly lower than previous legend
frameon=True,
fontsize=10, title_fontsize=10
)
fig.add_artist(leg_unresolved)
# Adjust layout to make space
fig.subplots_adjust(bottom=0.25)
ax.set_ylim(0.4, 1.2)
ax.set_xlim(min(all_dates) - 1000, x_label + 2500)
ax.set_yticks([])
ax.set_xlabel("Issuance Date")
ax.set_title("Building Timeline", fontsize=12, fontweight='bold')
timeline_buf = io.BytesIO()
plt.savefig(timeline_buf, format="png", dpi=300, bbox_inches="tight")
plt.close()
timeline_buf.seek(0)
return timeline_buf
#make_timeline(results, sample_dict, 'test')
#%%
import gradio as gr
import re, tempfile, traceback
import os
import json
import pandas as pd
def run_bbl_report(question, profile: gr.OAuthProfile | None):
uid = user_id_from_profile(profile)
if uid is None:
raise gr.Error("Please sign in with Hugging Face to use this demo.")
allowed, remaining = check_and_increment_quota(uid)
if not allowed:
raise gr.Error(f"Usage limit reached: {MAX_RUNS_PER_USER} runs per user.")
if remaining <= 2:
gr.Warning(f"⚠️ Only {remaining} run(s) left!")
else:
gr.Info(f"βœ“ Runs remaining: {remaining}")
try:
sections, pdf_buffer = generate_bbl_report(str(question))
if not pdf_buffer:
return f"❌ **Exception:** BBL Not Found", None
bbl = re.search(r'\b\d{10}\b', str(question)).group(0)
filename = f"BBL_{bbl}_Report.pdf"
with tempfile.NamedTemporaryFile(delete=False, prefix=f"BBL_{bbl}_Report_", suffix=".pdf") as tmp:
# pdf_buffer might be BytesIO or bytes; handle both:
data = pdf_buffer.getvalue() if hasattr(pdf_buffer, "getvalue") else pdf_buffer
tmp.write(data)
tmp_path = tmp.name
return f"βœ… Report successfully generated for BBL {bbl}", tmp_path
except Exception as e:
tb = traceback.format_exc()
# Show full traceback in the Markdown output
return f"❌ **Exception:** {e}\n\n```\n{tb}\n```", None
# -----------------------------
# Create Gradio interface
# -----------------------------
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.LoginButton()
with gr.Group(visible=True) as main_app:
gr.Markdown("# 🏒 NYC Building Risk Assessment Tool")
gr.Markdown("Enter a 10-digit **BBL** (e.g. `1005880006`) to generate a full PDF risk report.")
gr.Markdown("Due to storage limits on this website, databases limited to Manhattan.")
gr.Markdown("Note that generating a PDF takes ~ 100 sec")
gr.Markdown(
"""
To find a BBL, visit:
<a href="https://experience.arcgis.com/experience/d826b115c87841d491c2b41fcb175305"
target="_blank"
style="color:#1f77b4; text-decoration:none; font-weight:bold;">
πŸ—ΊοΈ NYC Map Portal (ArcGIS)
</a>
"""
)
with gr.Row():
question_input = gr.Textbox(
label="Enter BBL",
placeholder="Example: 1005880006",
lines=2,
)
with gr.Row():
run_btn = gr.Button("Generate PDF Report", variant="primary")
reset_btn = gr.Button("πŸ”„ Reset")
with gr.Row():
status_output = gr.Markdown()
file_output = gr.File(label="Download Report")
run_btn.click(
fn=run_bbl_report,
inputs=[question_input],
outputs=[status_output, file_output],
)
reset_btn.click(
fn=lambda: ("", None), # clears outputs
inputs=None,
outputs=[status_output, file_output],
)
demo.launch()