Spaces:

NYSERDA-CRE-Working-Group
/

Risk_Assessment

Sleeping

App Files Files Community

Risk_Assessment / app.py

Ryan2219

Update app.py

848d70c verified 2 months ago

raw

history blame contribute delete

34.2 kB

	# -- coding: utf-8 --
	"""
	Created on Mon Sep 22 13:39:46 2025

	@author: rmd2219
	"""

	import pandas as pd
	import numpy as np
	import os, json, re, time
	from openai import OpenAI
	import difflib
	import gradio as gr

	from huggingface_hub import hf_hub_download, HfApi
	from huggingface_hub.utils import EntryNotFoundError

	USAGE_DATASET_REPO = os.environ.get("USAGE_DATASET_REPO", "NYSERDA-CRE-Working-Group/nyserda_demo_useage_store")
	USAGE_FILENAME = os.environ.get("USAGE_FILENAME", "usage.csv")
	MAX_RUNS_PER_USER = int(os.environ.get("MAX_RUNS_PER_USER", "10"))

	# Example: load Pluto
	map_pluto_df = pd.read_csv("map_pluto.csv", low_memory=False)
	LL87_df = pd.read_csv("LL87.csv", low_memory=False)
	LL84_df = pd.read_csv('LL84.csv', low_memory=False)
	LL84_df = LL84_df.replace("Not Available", np.nan)

	permit_df = pd.read_csv('Permit.csv', low_memory = False)
	ecb_violation_df = pd.read_csv('ECB_violations.csv', low_memory = False)
	violation_df = pd.read_csv('Violations.csv', low_memory = False)

	os.environ["OPENAI_API_KEY"] = os.environ.get("OPENAI_API_KEY")
	HF_TOKEN = os.environ.get("HF_TOKEN")

	client = OpenAI()
	api = HfApi(token=HF_TOKEN)
	def user_id_from_profile(profile: gr.OAuthProfile \| None) -> str \| None:
	if profile is None:
	return None
	# You said profile.name exists; normalize it.
	# If you later can access preferred_username, use that instead (more unique).
	uid = getattr(profile, "name", None)
	if not uid:
	return None
	return uid.strip().lower()

	def _load_usage_df() -> pd.DataFrame:
	try:
	local_path = hf_hub_download(
	repo_id=USAGE_DATASET_REPO,
	repo_type="dataset",
	filename=USAGE_FILENAME,
	token=HF_TOKEN,
	)
	return pd.read_csv(local_path)
	except EntryNotFoundError:
	# First run: create empty table
	return pd.DataFrame(columns=["user_id", "runs", "first_seen", "last_seen"])

	def _save_usage_df(df: pd.DataFrame, commit_message: str) -> None:
	tmp_path = "/tmp/usage.csv"
	df.to_csv(tmp_path, index=False)

	api.upload_file(
	path_or_fileobj=tmp_path,
	path_in_repo=USAGE_FILENAME,
	repo_id=USAGE_DATASET_REPO,
	repo_type="dataset",
	commit_message=commit_message,
	)

	def check_and_increment_quota(user_id: str) -> tuple[bool, int]:
	now = int(time.time())
	df = _load_usage_df()

	if df.empty or (df["user_id"] == user_id).sum() == 0:
	runs = 0
	if runs >= MAX_RUNS_PER_USER:
	return False, 0
	new_row = {
	"user_id": user_id,
	"runs": 1,
	"first_seen": now,
	"last_seen": now,
	}
	df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)
	_save_usage_df(df, commit_message=f"usage: increment {user_id} to 1")
	return True, MAX_RUNS_PER_USER - 1

	idx = df.index[df["user_id"] == user_id][0]
	runs = int(df.loc[idx, "runs"])

	if runs >= MAX_RUNS_PER_USER:
	return False, 0

	runs += 1
	df.loc[idx, "runs"] = runs
	df.loc[idx, "last_seen"] = now

	_save_usage_df(df, commit_message=f"usage: increment {user_id} to {runs}")
	return True, MAX_RUNS_PER_USER - runs


	#%%
	# Fixed + cleaned version
	# Example placeholders for your real dataframes
	dataframes = {
	"map_pluto": map_pluto_df,
	"LL87": LL87_df,
	"LL84": LL84_df,
	"Permit": permit_df,
	"ECB Violation": ecb_violation_df,
	"Violation": violation_df
	}

	column_names = {
	"map_pluto": list(map_pluto_df.columns),
	"LL87": list(LL87_df.columns),
	"LL84": list(LL84_df.columns),
	"Permit": list(permit_df.columns),
	"ECB Violation": list(ecb_violation_df.columns),
	"Violation": list(ecb_violation_df.columns)
	}

	for name, df in dataframes.items():
	df['BBL'] = df['BBL'].astype(str)

	ll97_description = """
	Local Law 97 (LL97) of New York City limits greenhouse gas emissions for buildings
	over 25,000 square feet. Starting in 2024, each building type has a maximum allowed
	emissions intensity (metric tons CO2e per square foot per year). Exceeding this limit
	leads to a fine of $268 per metric ton over the limit, per year.

	Key points:
	- Applies to buildings >25,000 sqft or two or more buildings on the same tax lot that together exceed 50,000 gross square feet.
	- Limits depend on occupancy group (e.g., residential, office, university).
	- Calculate: building_emissions = Site EUI × emissions_factor × floor_area.
	- Compare to LL97 threshold for that occupancy group.
	- Fine = (building_emissions – threshold × floor_area) × $268 if positive.
	- Retrofits (insulation, HVAC upgrades, electrification) can lower site EUI or
	emissions factors to avoid fines.
	- Emission Factors are:
	Electricity: 0.000288962 tCO2e/kWh
	Natural Gas: 0.00005311 tCO2e/kBtu
	#2 Fuel Oil: 0.00007421 tCO2e/kBtu
	#4 Fuel Oil: 0.00007529 tCO2e/kBtu
	District Steam: 0.00004493 tCO2e/kBtu
	"""


	borough_options = {
	"Manhattan": ["MN","mn",1,"Manhattan","MANHATTAN", 'manhattan'],
	"Bronx": ["BX", "bx", 2, "Bronx", "BRONX", 'bronx'],
	"Brooklyn": ['BK', "bk", 3, "Brooklyn", "BROOKLYN", 'brooklyn'],
	"Queens": ['QN', 'qn', 4, "Queens", "QUEENS", 'queens'],
	"Staten Island": ['SI', 'si', 5, "Staten Island", "STATEN ISLAND", 'staten island']
	}

	base_message = {
	"role": "system",
	"content": (
	f'''
	You are risk assessor for building acquisition in NYC. You will be given information from the following public databases.
	- map_pluto: Parcel-level tax lot data (BBL, addresses, zoning, building attributes).
	- LL84: Benchmarking data (annual energy use, emissions, EUI, GFA, property IDs, years).
	- LL87: Audit/retrofit data (Energy Conservation Measures, audit year, floor area, systems).
	- Permit: Information about all permis for work done to buildings in NYC.
	- Violations: Record of all DOB violations recorded for NYC
	- ECB Violaations: Record of all ECB violations for NYC

	Your job is to summarize the risk posed by aquiring this building given all of the information passed to you.
	Your output will be a one page PDF summary, attmept to summarize it in ~1 page of text.

	Keep in mind LL97 described here: {ll97_description}
	'''
	),
	}

	def generate_bbl_report(question):
	#### Get the BBL from the Question
	global messages
	messages = []
	messages.append(base_message)
	match = re.search(r'\b\d{10}\b', question)
	if match:
	bbl = match.group(0)
	else:
	raise ValueError("No valid BBL found in query")

	global results
	results = {}
	for name, df in dataframes.items():
	subset = df[df['BBL'].astype(str).apply(lambda x: str(bbl) in x)]
	subset = subset.drop(['BBL'], axis=1)

	if not subset.empty:
	cols_to_exclude = ['ISSUE_DATE', 'VIOLATION_TYPE_DESC', 'VIOLATION_TYPE', 'VIOLATION_DESCRIPTION', 'VIOLATION_CATEGORY', 'SEVERITY', 'Issuance Date', 'Work Type']
	cols_to_check = [c for c in subset.columns if c not in cols_to_exclude]
	subset = subset.dropna(subset=cols_to_check, how='all')
	results[name] = subset

	if not results:
	return f"No data found for BBL {bbl}.", None

	global combined_text

	combined_text = f"### Summary Data for BBL {bbl}\n\n"
	for name, df in results.items():
	# Convert each subset to markdown (truncated for safety)
	text_snippet = df.to_markdown(index=False)
	combined_text += f"#### Dataset: {name}\n{text_snippet}\n\n"



	messages.append({"role": "user", "content": f"""
	You are a risk assesment tool to pre-screen buildings for aquisition for commercial real estate firms.
	Task: Analyze the available data to identify patterns, risks, and compliance issues.
	Give particular attention on the timeline of both permits and violations.
	- When looking at permits, note both major renovations as well as frequency of permits for similar systems in case this could indicate a faulty system.
	- Cross check this data with energy use/audit information if it is available.
	- See if permits were filed to fix previous violations.
	Highlight large renovations or major violations that would materially affect a potential new building owner.
	Using the data available for BBL {bbl}, write a structured risk assessment report

	Output Requirements:
	Return your full response in strict JSON format with the following top-level keys:
	- "Assesment"
	- "Executive Summary"
	- "Building Overview"
	- "Risk Factors"
	- "Compliance History"
	- "Recommendations"
	- "Key Items"

	Each section must contain a paragraph of text except "Assesment", "Key Items", "Recommendations",
	"Assesment" should be a string with a breif description of the overall risk. i.e. High Risk, Low Risk or some combination

	"Key Items" must follow this specific dictionary structure:

	```json
	"Key Items": {{
	"Permit": {{
	"Date": ["YYYY-MM-DD", ...],
	"Label": ["Description of permit", ...]
	}},
	"Violation": {{
	"Date": ["YYYY-MM-DD", ...],
	"Label": ["Description of violation", ...]
	}},
	"ECB Violation": {{
	"Date": ["YYYY-MM-DD", ...],
	"Label": ["Description of ECB violation", ...]
	}}
	}}

	For Key Items:
	Include only the most important permits and violations (the ones that should be annotated on a timeline plot).
	Select events that represent major renovations, structural or system upgrades, or serious compliance issues.
	Keep the descriptions very breif. They are annotating points and should only be a few words long. Only provide more details if absolutely neccesary.

	"Recommendations" must be a list of strings where the strings are your reccomendations. These will be shown to the user as a numbered list and can be as detailed as you would like.
	- These are reccomandations to a potential buyer, for things they would need to do before and after purchase.

	"Executive Summary" must also be a list of strings. The first string must be a summary sentance i.e. "This property is a medium risk property because:"
	- All following strings will be organized into bullet points. Only include information relevant to the risk assesment, this section is meant to be breif < 5 bullets and bullets must be concise < 1 line

	Ensure the "Compliance History" section is a detailed paracgraph and highlights all important violations, patterns of violations and unresolved violations.
	Give a comprehensive overview of compliance history.

	When composing resonse keep in mind I am outputting this to a pdf, avoid using characters that may break fonts.
	Avoid using smart quotes (‘ ’ “ ”), en/em dashes (– —), and special bullets (•, –) in output text. Use ASCII equivalents: straight quotes, single dash (-), and hyphens.

	Question: {question}
	Data: {combined_text}
	"""})


	response = client.chat.completions.create(
	model="gpt-5.2",
	#reasoning_effort = 'high',
	#verbosity = 'high',
	messages = messages
	)

	sections = json.loads(response.choices[0].message.content)



	pdf_buffer = make_structured_pdf(sections, bbl)

	return sections, pdf_buffer

	#%%
	from reportlab.lib.pagesizes import letter
	from reportlab.lib import colors
	from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
	from reportlab.platypus import SimpleDocTemplate, Paragraph, PageBreak, Spacer, HRFlowable, Image, ListFlowable, ListItem
	def normalize_text(text):
	return (
	text.replace("–", "-")
	.replace("—", "-")
	.replace("−", "-") # minus sign
	)
	def make_structured_pdf(sections, bbl):
	buffer = io.BytesIO()

	doc = SimpleDocTemplate(buffer, pagesize=letter,
	rightMargin=50, leftMargin=50, topMargin=60, bottomMargin=50)
	styles = getSampleStyleSheet()

	# Custom styles
	title_style = ParagraphStyle(
	name="Title", parent=styles["Heading1"],
	alignment=1, fontSize=18, spaceAfter=10)
	sub_title_style = ParagraphStyle(
	name="SubTitle", parent=styles["Heading1"],
	alignment=1, fontSize=14, spaceAfter=20)
	header_style = ParagraphStyle(
	name="Header", parent=styles["Heading2"],
	textColor=colors.darkblue, spaceAfter=6)
	body_style = ParagraphStyle(
	name="Body", parent=styles["BodyText"],
	fontSize=11, leading=15, spaceAfter=12)

	#body_style.fontName = 'DejaVuSans'

	story = []

	# Title
	story.append(Paragraph(f"Building Risk Report – BBL {bbl}", title_style))
	story.append(Paragraph(f"Assesment: {str(sections['Assesment'])}", sub_title_style))
	story.append(HRFlowable(width="100%", thickness=1, color=colors.darkblue))
	story.append(Spacer(1, 12))

	# Sections
	for section_name, content in sections.items():
	if section_name == "Key Items":
	global sample_dict
	sample_dict = content
	timeline_buf = make_timeline(results, content, bbl)
	elif section_name == 'Assesment':
	continue
	else:
	story.append(Paragraph(section_name, header_style))

	if section_name.lower() == "recommendations":
	# Split into numbered items and make a bullet-style list
	list_items = [ListItem(Paragraph(r, body_style)) for r in content]
	story.append(ListFlowable(list_items, bulletType='1',bulletFormat='%s)', start='1', leftIndent=20))
	story.append(Spacer(1, 16))
	elif section_name.lower() == "executive summary":
	list_items = [ListItem(Paragraph(r, body_style)) for r in content[1:]]
	story.append(Paragraph(content[0], body_style))
	#story.append(Spacer(1, 5))
	story.append(ListFlowable(list_items, bulletType='1',bulletFormat='%s)', start='1', leftIndent=20))
	story.append(Spacer(1, 16))
	else:
	content = str(content).strip()
	# Normal paragraph behavior for other sections
	for paragraph in content.split("\n"):
	paragraph = paragraph.strip()
	if paragraph:
	story.append(Paragraph(normalize_text(paragraph), body_style))
	story.append(Spacer(1, 16))

	if timeline_buf.getbuffer().nbytes > 0:
	story.append(PageBreak())
	story.append(HRFlowable(width="100%", thickness=1, color=colors.darkblue))
	story.append(Spacer(1, 20))
	story.append(Paragraph("Timeline of Key Items", header_style))
	#story.append(Spacer(1, 12))
	story.append(Image(timeline_buf, width=600, height=600))

	doc.build(story)

	buffer.seek(0)
	return buffer

	#question = 'What is the risk associated with BBL 3034610030'
	#question = 'What is the risk associated with BBL 1000160120'
	#question = '4021390001'
	#question = '2030620043'
	#question = '5005450110'

	#question = '1019620019' # Butler

	#question = '1005451111'
	#print(generate_bbl_report(question))

	#%%
	import matplotlib.pyplot as plt
	import matplotlib.dates as mdates
	from adjustText import adjust_text
	import contextlib, io
	import textwrap, random
	import matplotlib.patches as mpatches
	from matplotlib.lines import Line2D
	import matplotlib.gridspec as gridspec

	def make_timeline(results, content, bbl):
	##### Get Data
	permits = results.get('Permit')
	violations = results.get('Violation')
	ecb_violations = results.get('ECB Violation')

	######
	fig = plt.figure(figsize=(12, 12))
	gs = gridspec.GridSpec(2, 3, height_ratios=[3, 1], hspace=0.2, wspace=0.3)

	ax = fig.add_subplot(gs[0, :])
	ax1 = fig.add_subplot(gs[1, 0])
	ax2 = fig.add_subplot(gs[1, 1])
	ax3 = fig.add_subplot(gs[1, 2])

	permit_colors = {
	"Boiler": "#1f77b4", # blue
	"Curb Cut": "#ff7f0e", # orange
	"Construction Equipment":"#2ca02c", # green
	"Fire Alarm": "#d62728", # red
	"Fuel Burning": "#9467bd", # purple
	"Fire Suppression": "#8c564b", # brown
	"Fuel Storage": "#e377c2", # pink
	"Mechanical/HVAC": "#7f7f7f", # gray
	"New Building": "#bcbd22", # olive
	"Other": "#17becf", # cyan
	"Plumbing": "#aec7e8", # light blue
	"Standpipe": "#ffbb78", # light orange
	"Sprinkler": "#98df8a", # light green
	}

	x_label = mdates.date2num(pd.to_datetime('2025-06-01'))

	if permits is not None and not permits.empty:
	permit_dates = pd.to_datetime(permits['Issuance Date'], errors='coerce')
	permit_labels = permits['Work Type'].fillna('Other')

	mask = permit_dates.notna()
	permit_dates = permit_dates[mask]
	permit_labels = permit_labels[mask]


	# Sort by date
	sorted_idx = np.argsort(permit_dates)
	permit_dates = permit_dates.iloc[sorted_idx]
	permit_labels = permit_labels.iloc[sorted_idx]

	########### Sidetrack, make legend / frequency plot
	counts = permit_labels.value_counts() if hasattr(permit_labels, 'value_counts') else \
	pd.Series(permit_labels).value_counts()

	# Extract labels and colors in matching order
	labels = counts.index.tolist()
	colors = [permit_colors[label] for label in labels]

	ax1.bar(range(len(labels)), counts.values, color=colors)
	ax1.set_xticks(range(len(labels)))
	ax1.set_xticklabels(labels, rotation=45, ha='right', fontsize = 8)
	ax1.set_ylabel("Count")
	#ax1.set_xlabel("Permit Type")
	ax1.set_title('Permit Type and Frequency', fontsize = 10)

	# Baseline + points
	#x = mdates.date2num(permit_dates.values)

	colors = [permit_colors.get(cat, "#333333") for cat in permit_labels]


	ax.hlines(1, permit_dates.min(), permit_dates.max(), color='gray', alpha=0.7, linewidth=2)
	ax.scatter(permit_dates, np.full(len(permit_dates), 1.0), color=colors, s=60, zorder=3)

	ax.text(
	x_label+300, 1, # position at right end of line
	f"Permit History", # add small space padding
	va='center', ha='left', # vertical align with line, left of text
	fontsize=8, fontweight='bold', color='gray'
	)

	severe_dates = pd.to_datetime(content['Permit']['Date'])
	severe_labels = content['Permit']['Label']


	# --- Highlight severe points with red circles ---
	ax.scatter(severe_dates, np.full(len(severe_dates), 1),
	facecolor='none', edgecolor='black', s=120, lw=1.2, zorder=4)

	label_y = [] # store vertical positions for each label
	min_gap = np.timedelta64(5000, 'D') # how close in time before stacking higher
	base_y = 1.03
	offset_step = 0.02

	for i, date in enumerate(severe_dates):
	y = base_y
	# If this date is within 'min_gap' of previous points, raise the label
	for j in range(i):
	if abs(date - severe_dates[j]) < min_gap and abs(label_y[j] - y) < offset_step:
	y = label_y[j] + offset_step
	label_y.append(y)
	ax.plot([date, date], [1, y], color='gray', lw=0.8, zorder=2)
	ax.text(date, y, severe_labels[i], rotation=0, ha='center', va='bottom',
	fontsize=7, color='black')


	violation_colors = {
	'Boiler': "#1f77b4",
	'Construction': "#2ca02c",
	'Elevator': 'orange',
	'Emergency': 'red',
	'High Pressure Boiler': '#00008B',
	'Immediate Emergency': '#8B0000',
	'Landmark Building': "#7f7f7f",
	'Plumbing': "#aec7e8",
	'Unsafe Building': "k",
	'Zoning': 'yellow',
	'Other - Likely Safety': 'k'}

	if violations is not None and not violations.empty:
	violation_dates = pd.to_datetime(violations['ISSUE_DATE'], errors='coerce')
	violation_labels = violations['VIOLATION_TYPE_DESC'].fillna('Other - Likely Safety')
	#print(violation_labels.unique())
	violation_status = violations['VIOLATION_CATEGORY']

	active_mask = violation_status.str.contains("ACTIVE", case=False, na=False)

	mask = violation_dates.notna()
	violation_dates = violation_dates[mask]
	violtion_labels = violation_labels[mask]

	########### Sidetrack, make legend / frequency plot
	counts = violation_labels.value_counts() if hasattr(violation_labels, 'value_counts') else \
	pd.Series(violation_labels).value_counts()

	# Extract labels and colors in matching order
	labels = counts.index.tolist()
	colors = [violation_colors[label] for label in labels]

	ax2.bar(range(len(labels)), counts.values, color=colors)
	ax2.set_xticks(range(len(labels)))
	ax2.set_xticklabels(labels, rotation=45, ha='right', fontsize = 8)
	ax2.set_ylabel("Count")
	#ax1.set_xlabel("Permit Type")
	ax2.set_title('Violation Type and Frequency', fontsize = 10)

	# Sort by date
	sorted_idx = np.argsort(violation_dates)
	violation_dates = violation_dates.iloc[sorted_idx]
	violtion_labels = violation_labels.iloc[sorted_idx]

	# Baseline + points
	#x = mdates.date2num(permit_dates.values)

	colors = [violation_colors.get(cat, "#333333") for cat in violation_labels]


	ax.hlines(0.8, violation_dates.min(), violation_dates.max(), color='gray', alpha=0.7, linewidth=2)
	ax.scatter(violation_dates, np.full(len(violation_dates), 0.8), color=colors, s=60, zorder=3)

	ax.scatter(violation_dates[active_mask], np.full(len(violation_dates[active_mask]), 0.8),
	color='none', s=80, edgecolor='red', zorder=4)

	if sum(active_mask > 0):
	active_violations = True
	else:
	active_violations = False

	severe_dates = pd.to_datetime(content['Violation']['Date'])
	severe_labels = content['Violation']['Label']

	ax.scatter(severe_dates, np.full(len(severe_dates), 0.8),
	color='none', s=80, edgecolor='black', zorder=4)

	label_y = [] # store vertical positions for each label
	min_gap = np.timedelta64(4000, 'D') # how close in time before stacking higher
	base_y = 0.83
	offset_step = 0.02

	for i, date in enumerate(severe_dates):
	y = base_y
	# If this date is within 'min_gap' of previous points, raise the label
	for j in range(i):
	if abs(date - severe_dates[j]) < min_gap and abs(label_y[j] - y) < offset_step:
	y = label_y[j] + offset_step
	label_y.append(y)
	ax.plot([date, date], [0.8, y], color='gray', lw=0.8, zorder=2)
	ax.text(date, y, severe_labels[i], rotation=0, ha='center', va='bottom',
	fontsize=7, color='black')

	ax.text(
	x_label+200, 0.8, # position at right end of line
	f"Violation History", # add small space padding
	va='center', ha='left', # vertical align with line, left of text
	fontsize=8, fontweight='bold', color='gray'
	)

	ecb_color = {
	'Administrative': 'blue',
	'Boilers': "#1f77b4",
	'Construction': "#2ca02c",
	'Cranes and Derricks':"#2ca02c",
	'Elevators': 'orange',
	'HPD': '#00008B',
	'Local Law': 'cyan',
	'Padlock': "#7f7f7f",
	'Plumbing': "#aec7e8",
	'Public Assembly': 'blue',
	'Quality of Life': 'blue',
	'Signs': 'yellow',
	'Site Safety': 'red',
	'Unknown': 'gray',
	'Zoning': 'yellow'}

	if ecb_violations is not None and not ecb_violations.empty:
	ecb_violation_dates = pd.to_datetime(ecb_violations['ISSUE_DATE'], errors='coerce')
	ecb_violation_labels = ecb_violations['VIOLATION_TYPE'].fillna('Unknown')
	ecb_severity = ecb_violations['SEVERITY']
	desc = ecb_violations['VIOLATION_DESCRIPTION']

	mask = ecb_violation_dates.notna()
	ecb_violation_dates = ecb_violation_dates[mask]
	ecb_violtion_labels = ecb_violation_labels[mask] # Sort by date
	ecb_severity = ecb_severity[mask]
	desc = desc[mask]

	counts = ecb_violation_labels.value_counts() if hasattr(ecb_violation_labels, 'value_counts') else \
	pd.Series(ecb_violation_labels).value_counts()

	# Extract labels and colors in matching order
	labels = counts.index.tolist()
	colors = [ecb_color[label] for label in labels]

	ax3.bar(range(len(labels)), counts.values, color=colors)
	ax3.set_xticks(range(len(labels)))
	ax3.set_xticklabels(labels, rotation=45, ha='right', fontsize = 8)
	ax3.set_ylabel("Count")
	#ax1.set_xlabel("Permit Type")
	ax3.set_title('ECB Violation Type and Frequency', fontsize = 10)

	sorted_idx = np.argsort(ecb_violation_dates)
	ecb_violation_dates = ecb_violation_dates.iloc[sorted_idx]
	ecb_violtion_labels = ecb_violation_labels.iloc[sorted_idx]
	ecb_severity = ecb_severity.iloc[sorted_idx]
	desc = desc.iloc[sorted_idx]
	# Baseline + points
	#x = mdates.date2num(permit_dates.values)
	colors = [ecb_color.get(cat, "#333333") for cat in ecb_violation_labels]
	ax.hlines(0.6, ecb_violation_dates.min(), ecb_violation_dates.max(), color='gray', alpha=0.7, linewidth=2)
	ax.scatter(ecb_violation_dates, np.full(len(ecb_violation_dates), 0.6), color=colors, s=60, zorder=3)

	# --- Highlight severe violations ---
	severe_mask = ecb_severity.isin(["CLASS - 1", "CLASS - 2", "Hazardous"])

	if sum(severe_mask > 0):
	active_violations = True
	else:
	if not active_violations:
	active_violations = False

	severe_dates = ecb_violation_dates[severe_mask]
	#severe_labels = desc[severe_mask]
	#print(severe_labels)
	# Scatter red circles for severe points
	ax.scatter(severe_dates, np.full(len(severe_dates), 0.6),
	color='none', s=80, edgecolor='red', zorder=4)

	severe_dates = pd.to_datetime(content['ECB Violation']['Date'])
	severe_labels = content['ECB Violation']['Label']

	ax.scatter(severe_dates, np.full(len(severe_dates), 0.6),
	color='none', s=80, edgecolor='black', zorder=4)

	label_y = [] # store vertical positions for each label
	min_gap = np.timedelta64(5000, 'D') # how close in time before stacking higher
	base_y = 0.57
	offset_step = 0.02

	for i, date in enumerate(severe_dates):
	y = base_y
	# If this date is within 'min_gap' of previous points, raise the label
	for j in range(i):
	if abs(date - severe_dates[j]) < min_gap and abs(label_y[j] - y) < offset_step:
	y = label_y[j] - offset_step
	label_y.append(y)
	ax.plot([date, date], [0.6, y], color='gray', lw=0.8, zorder=2)
	ax.text(date, y, severe_labels[i], rotation=0, ha='center', va='top',
	fontsize=7, color='black')

	all_dates = np.concatenate([
	mdates.date2num(permit_dates),
	mdates.date2num(violation_dates),
	mdates.date2num(ecb_violation_dates)
	])

	ax.text(
	x_label, 0.6, # position at right end of line
	f"ECB Violation History", # add small space padding
	va='center', ha='left', # vertical align with line, left of text
	fontsize=8, fontweight='bold', color='gray'
	)

	if active_violations:
	unresolved_handle = Line2D(
	[0], [0],
	marker='o', linestyle='none',
	markerfacecolor='none', # no fill
	markeredgecolor='red', # red outline
	markeredgewidth=1.2,
	markersize=6,
	label='Unresolved or Hazardous Violation'
	)

	leg_unresolved = ax.legend(
	handles=[unresolved_handle],
	loc='upper center',
	bbox_to_anchor=(0.17, 0.4), # slightly lower than previous legend
	frameon=True,
	fontsize=10, title_fontsize=10
	)
	fig.add_artist(leg_unresolved)

	# Adjust layout to make space
	fig.subplots_adjust(bottom=0.25)

	ax.set_ylim(0.4, 1.2)
	ax.set_xlim(min(all_dates) - 1000, x_label + 2500)
	ax.set_yticks([])
	ax.set_xlabel("Issuance Date")
	ax.set_title("Building Timeline", fontsize=12, fontweight='bold')
	timeline_buf = io.BytesIO()
	plt.savefig(timeline_buf, format="png", dpi=300, bbox_inches="tight")
	plt.close()
	timeline_buf.seek(0)

	return timeline_buf

	#make_timeline(results, sample_dict, 'test')
	#%%
	import gradio as gr
	import re, tempfile, traceback
	import os
	import json
	import pandas as pd

	def run_bbl_report(question, profile: gr.OAuthProfile \| None):
	uid = user_id_from_profile(profile)
	if uid is None:
	raise gr.Error("Please sign in with Hugging Face to use this demo.")

	allowed, remaining = check_and_increment_quota(uid)
	if not allowed:
	raise gr.Error(f"Usage limit reached: {MAX_RUNS_PER_USER} runs per user.")

	if remaining <= 2:
	gr.Warning(f"⚠️ Only {remaining} run(s) left!")
	else:
	gr.Info(f"✓ Runs remaining: {remaining}")

	try:
	sections, pdf_buffer = generate_bbl_report(str(question))
	if not pdf_buffer:
	return f"❌ Exception: BBL Not Found", None
	bbl = re.search(r'\b\d{10}\b', str(question)).group(0)
	filename = f"BBL_{bbl}_Report.pdf"

	with tempfile.NamedTemporaryFile(delete=False, prefix=f"BBL_{bbl}_Report_", suffix=".pdf") as tmp:
	# pdf_buffer might be BytesIO or bytes; handle both:
	data = pdf_buffer.getvalue() if hasattr(pdf_buffer, "getvalue") else pdf_buffer
	tmp.write(data)
	tmp_path = tmp.name

	return f"✅ Report successfully generated for BBL {bbl}", tmp_path

	except Exception as e:
	tb = traceback.format_exc()
	# Show full traceback in the Markdown output
	return f"❌ Exception: {e}\n\n```\n{tb}\n```", None



	# -----------------------------
	# Create Gradio interface
	# -----------------------------
	with gr.Blocks(theme=gr.themes.Soft()) as demo:
	gr.LoginButton()

	with gr.Group(visible=True) as main_app:
	gr.Markdown("# 🏢 NYC Building Risk Assessment Tool")
	gr.Markdown("Enter a 10-digit BBL (e.g. `1005880006`) to generate a full PDF risk report.")
	gr.Markdown("Due to storage limits on this website, databases limited to Manhattan.")
	gr.Markdown("Note that generating a PDF takes ~ 100 sec")
	gr.Markdown(
	"""
	To find a BBL, visit:
	<a href="https://experience.arcgis.com/experience/d826b115c87841d491c2b41fcb175305"
	target="_blank"
	style="color:#1f77b4; text-decoration:none; font-weight:bold;">
	🗺️ NYC Map Portal (ArcGIS)
	</a>
	"""
	)

	with gr.Row():
	question_input = gr.Textbox(
	label="Enter BBL",
	placeholder="Example: 1005880006",
	lines=2,
	)

	with gr.Row():
	run_btn = gr.Button("Generate PDF Report", variant="primary")
	reset_btn = gr.Button("🔄 Reset")

	with gr.Row():
	status_output = gr.Markdown()
	file_output = gr.File(label="Download Report")

	run_btn.click(
	fn=run_bbl_report,
	inputs=[question_input],
	outputs=[status_output, file_output],
	)

	reset_btn.click(
	fn=lambda: ("", None), # clears outputs
	inputs=None,
	outputs=[status_output, file_output],
	)

	demo.launch()