Spaces:
Running
Running
Merge pull request #4 from boettiger-lab/patch/testing
Browse files- app/app.py +148 -62
- app/footer.md +12 -0
- app/system_prompt.txt +84 -0
- app/utils.py +298 -42
- app/variables.py +59 -0
- requirements.txt +5 -1
app/app.py
CHANGED
|
@@ -1,15 +1,13 @@
|
|
| 1 |
import os
|
| 2 |
-
|
| 3 |
import altair as alt
|
| 4 |
-
import ibis
|
| 5 |
import leafmap.maplibregl as leafmap
|
| 6 |
import matplotlib.pyplot as plt
|
| 7 |
import pandas as pd
|
| 8 |
import streamlit as st
|
| 9 |
-
from ibis import _
|
| 10 |
-
|
| 11 |
from utils import *
|
| 12 |
-
|
|
|
|
| 13 |
|
| 14 |
st.set_page_config(
|
| 15 |
layout="wide",
|
|
@@ -32,22 +30,19 @@ st.caption(
|
|
| 32 |
|
| 33 |
"ℹ️ Tip: Use the slider to change the year and hover over shaded areas for measure details."
|
| 34 |
|
| 35 |
-
|
| 36 |
-
|
| 37 |
min_year, max_year = st.slider("Select a range", 1988, 2024, (2020, 2024))
|
| 38 |
-
|
| 39 |
con = ibis.duckdb.connect("duck.db", extensions=["spatial"])
|
| 40 |
current_tables = con.list_tables()
|
| 41 |
|
| 42 |
-
if "
|
| 43 |
tbl = (
|
| 44 |
con.read_parquet(votes_parquet)
|
| 45 |
.cast({"geom": "geometry"})
|
| 46 |
)
|
| 47 |
tbl = get_unique_rows(tbl) # drop multi-county measures with non-unanimous party labels
|
| 48 |
-
con.create_table("
|
| 49 |
|
| 50 |
-
votes = con.table("
|
| 51 |
|
| 52 |
with st.sidebar:
|
| 53 |
color_choice = st.radio("Color by:", ["Measure status", "Political Party"])
|
|
@@ -58,6 +53,68 @@ with st.sidebar:
|
|
| 58 |
social_toggle = st.toggle("Social Vulnerability Index")
|
| 59 |
justice_toggle = st.toggle("Climate and Economic Justic")
|
| 60 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
|
| 62 |
m = leafmap.Map(
|
| 63 |
style="positron",
|
|
@@ -95,69 +152,98 @@ if justice_toggle:
|
|
| 95 |
)
|
| 96 |
|
| 97 |
|
| 98 |
-
#
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
.count()
|
| 104 |
-
.execute()
|
| 105 |
-
)
|
| 106 |
-
total_year = votes.filter((_.year>= min_year) & (_.year<= max_year)).count().execute()
|
| 107 |
-
year_passed = round(passed_year / total_year * 100, 2)
|
| 108 |
-
f"{year_passed}% Measures Passed between {min_year} and {max_year}"
|
| 109 |
-
|
| 110 |
-
# compute percentage passed over entire dataset
|
| 111 |
-
passed = votes.filter(_.status.isin(["Pass", "Pass*"])).count().execute()
|
| 112 |
-
total = votes.count().execute()
|
| 113 |
-
overall_passed = round(passed / total * 100, 2)
|
| 114 |
-
f"{overall_passed}% Measures Passed from 1988 - 2024 \n"
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
if color_choice == "Measure status":
|
| 118 |
-
for j, o in zip(
|
| 119 |
-
["State", "County", "Municipal", "Special District"],
|
| 120 |
-
[0.8, 1, 1, 1],
|
| 121 |
-
):
|
| 122 |
m.add_pmtiles(
|
| 123 |
votes_pmtiles,
|
| 124 |
-
style=
|
| 125 |
visible=True,
|
| 126 |
-
opacity=
|
| 127 |
tooltip=True,
|
|
|
|
| 128 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 129 |
|
| 130 |
-
elif color_choice == "Political Party":
|
| 131 |
-
for j, o in zip(
|
| 132 |
-
["State", "County", "Municipal", "Special District"],
|
| 133 |
-
[0.8, 1, 1, 1],
|
| 134 |
-
):
|
| 135 |
m.add_pmtiles(
|
| 136 |
votes_pmtiles,
|
| 137 |
-
style=
|
| 138 |
visible=True,
|
| 139 |
-
opacity=
|
| 140 |
tooltip=True,
|
|
|
|
| 141 |
)
|
| 142 |
|
| 143 |
-
m.
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 161 |
|
| 162 |
st.divider()
|
| 163 |
st.caption(
|
|
|
|
| 1 |
import os
|
| 2 |
+
import openai
|
| 3 |
import altair as alt
|
|
|
|
| 4 |
import leafmap.maplibregl as leafmap
|
| 5 |
import matplotlib.pyplot as plt
|
| 6 |
import pandas as pd
|
| 7 |
import streamlit as st
|
|
|
|
|
|
|
| 8 |
from utils import *
|
| 9 |
+
import ibis
|
| 10 |
+
from ibis import _
|
| 11 |
|
| 12 |
st.set_page_config(
|
| 13 |
layout="wide",
|
|
|
|
| 30 |
|
| 31 |
"ℹ️ Tip: Use the slider to change the year and hover over shaded areas for measure details."
|
| 32 |
|
|
|
|
|
|
|
| 33 |
min_year, max_year = st.slider("Select a range", 1988, 2024, (2020, 2024))
|
|
|
|
| 34 |
con = ibis.duckdb.connect("duck.db", extensions=["spatial"])
|
| 35 |
current_tables = con.list_tables()
|
| 36 |
|
| 37 |
+
if "landvote" not in set(current_tables):
|
| 38 |
tbl = (
|
| 39 |
con.read_parquet(votes_parquet)
|
| 40 |
.cast({"geom": "geometry"})
|
| 41 |
)
|
| 42 |
tbl = get_unique_rows(tbl) # drop multi-county measures with non-unanimous party labels
|
| 43 |
+
con.create_table("landvote", tbl)
|
| 44 |
|
| 45 |
+
votes = con.table("landvote")
|
| 46 |
|
| 47 |
with st.sidebar:
|
| 48 |
color_choice = st.radio("Color by:", ["Measure status", "Political Party"])
|
|
|
|
| 53 |
social_toggle = st.toggle("Social Vulnerability Index")
|
| 54 |
justice_toggle = st.toggle("Climate and Economic Justic")
|
| 55 |
|
| 56 |
+
##### Chatbot stuff
|
| 57 |
+
chatbot_container = st.container()
|
| 58 |
+
with chatbot_container:
|
| 59 |
+
llm_left_col, llm_right_col = st.columns([5,1], vertical_alignment = "bottom")
|
| 60 |
+
with llm_left_col:
|
| 61 |
+
with st.popover("💬 Example Queries"):
|
| 62 |
+
'''
|
| 63 |
+
Mapping queries:
|
| 64 |
+
- Show me Republican-voting counties where conservation measures passed
|
| 65 |
+
- Show measures that failed narrowly (between 45% and 50% yes)
|
| 66 |
+
- Show me conservation measures that approved over $500 million
|
| 67 |
+
'''
|
| 68 |
+
|
| 69 |
+
'''
|
| 70 |
+
Exploratory data queries:
|
| 71 |
+
- Which year had the most conservation funds approved?
|
| 72 |
+
- Which state approved the largest total conservation funding?
|
| 73 |
+
- How many measures passed by jurisdiction type?
|
| 74 |
+
- Which counties voted on conservation measures most frequently?
|
| 75 |
+
- What is the median funding amount for passed measures?
|
| 76 |
+
- How often do bond measures pass compared to other finance mechanisms?
|
| 77 |
+
'''
|
| 78 |
+
|
| 79 |
+
st.info('If the map appears blank, queried data may be too small to see at the default zoom level. Check the table below the map, as query results will also be displayed there.', icon="ℹ️")
|
| 80 |
+
|
| 81 |
+
with llm_right_col:
|
| 82 |
+
llm_choice = st.selectbox("Select LLM:", llm_options, key = "llm", help = "Select which model to use.")
|
| 83 |
+
llm = llm_options[llm_choice]
|
| 84 |
+
|
| 85 |
+
run_sql = make_run_sql(votes, llm, con)
|
| 86 |
+
|
| 87 |
+
with chatbot_container:
|
| 88 |
+
with llm_left_col:
|
| 89 |
+
example_query = "👋 Input query here"
|
| 90 |
+
prompt = st.chat_input(example_query, key="chain", max_chars=300)
|
| 91 |
+
_,log_query_col, _ = st.columns([.001, 5,1], vertical_alignment = "top")
|
| 92 |
+
with log_query_col:
|
| 93 |
+
log_queries = st.checkbox("Save query", value = True, help = "Saving your queries helps improve this tool and guide conservation efforts. Your data is stored in a private location. For more details, see 'Why save your queries?' at the bottom of this page.")
|
| 94 |
+
|
| 95 |
+
# new container for output so it doesn't mess with the alignment of llm options
|
| 96 |
+
with st.container():
|
| 97 |
+
if prompt:
|
| 98 |
+
result = handle_llm_query(
|
| 99 |
+
prompt=prompt,
|
| 100 |
+
llm_choice=llm_choice,
|
| 101 |
+
run_sql_fn=run_sql, # your cached function: run_sql(prompt, llm_choice)
|
| 102 |
+
log_queries=log_queries,
|
| 103 |
+
logger_fn=minio_logger,
|
| 104 |
+
log_file="landvote_query_log.csv",
|
| 105 |
+
log_bucket="shared-tpl",
|
| 106 |
+
)
|
| 107 |
+
|
| 108 |
+
llm_output = result["llm_output"]
|
| 109 |
+
sql_query = result["sql_query"]
|
| 110 |
+
llm_explanation = result["llm_explanation"]
|
| 111 |
+
unique_ids = result["unique_ids"]
|
| 112 |
+
llm_cols = result["llm_cols"]
|
| 113 |
+
llm_bounds = result["llm_bounds"]
|
| 114 |
+
not_mapping = result["not_mapping"]
|
| 115 |
+
|
| 116 |
+
##### end of chatbot code
|
| 117 |
+
|
| 118 |
|
| 119 |
m = leafmap.Map(
|
| 120 |
style="positron",
|
|
|
|
| 152 |
)
|
| 153 |
|
| 154 |
|
| 155 |
+
# define PMTiles style dict (if we didn't already do so using the chatbot)
|
| 156 |
+
if 'llm_output' in locals():
|
| 157 |
+
if not_mapping == False:
|
| 158 |
+
# filter to ids from result
|
| 159 |
+
style = llm_pmtiles_style(unique_ids, paint_fill, votes_pmtiles)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 160 |
m.add_pmtiles(
|
| 161 |
votes_pmtiles,
|
| 162 |
+
style=style,
|
| 163 |
visible=True,
|
| 164 |
+
opacity=1.0,
|
| 165 |
tooltip=True,
|
| 166 |
+
name="LLM Query Results",
|
| 167 |
)
|
| 168 |
+
|
| 169 |
+
# Zoom to result bounds if present
|
| 170 |
+
if "llm_bounds" in locals() and llm_bounds:
|
| 171 |
+
m.fit_bounds(llm_bounds)
|
| 172 |
+
m.to_streamlit()
|
| 173 |
+
with st.expander("🔍 View/download data"): # adding data table
|
| 174 |
+
if ('geom' in llm_output.columns) and (not llm_output.empty):
|
| 175 |
+
llm_output = llm_output.drop('geom',axis = 1)
|
| 176 |
+
st.dataframe(llm_output, use_container_width = True)
|
| 177 |
+
|
| 178 |
+
else: # if we didn't use chatbot
|
| 179 |
+
|
| 180 |
+
# compute percentage passed in given year
|
| 181 |
+
year_passed, overall_passed=get_pass_stats(votes, min_year, max_year)
|
| 182 |
+
f"{year_passed}% Measures Passed between {min_year} and {max_year}"
|
| 183 |
+
f"{overall_passed}% Measures Passed from 1988 - 2024 \n"
|
| 184 |
+
|
| 185 |
+
if color_choice == "Measure status":
|
| 186 |
+
# 4 styles / 4 layers (jurisdiction-specific)
|
| 187 |
+
for j, o in zip(
|
| 188 |
+
["State", "County", "Municipal", "Special District"],
|
| 189 |
+
[0.8, 1, 1, 1],
|
| 190 |
+
):
|
| 191 |
+
m.add_pmtiles(
|
| 192 |
+
votes_pmtiles,
|
| 193 |
+
style=get_status_style(j, min_year, max_year),
|
| 194 |
+
visible=True,
|
| 195 |
+
opacity=o,
|
| 196 |
+
tooltip=True,
|
| 197 |
+
name=j, # shows as separate toggles in layer control
|
| 198 |
+
)
|
| 199 |
+
m.to_streamlit()
|
| 200 |
+
|
| 201 |
+
|
| 202 |
+
elif color_choice == "Political Party":
|
| 203 |
+
# 1 style / 1 layer
|
| 204 |
+
style = get_party_landvote_style(min_year, max_year)
|
| 205 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 206 |
m.add_pmtiles(
|
| 207 |
votes_pmtiles,
|
| 208 |
+
style=style,
|
| 209 |
visible=True,
|
| 210 |
+
opacity=1.0,
|
| 211 |
tooltip=True,
|
| 212 |
+
name="Political Party",
|
| 213 |
)
|
| 214 |
|
| 215 |
+
m.to_streamlit()
|
| 216 |
+
|
| 217 |
+
with st.expander("🔍 View/download data"): # adding data table
|
| 218 |
+
group_cols = ['landvote_id','year','state','county','municipal','jurisdiction']
|
| 219 |
+
gdf_grouped = (votes.head(100).execute().groupby(group_cols)
|
| 220 |
+
.agg({col: ('sum' if col in ['total_funds_at_stake','total_funds_approved',
|
| 221 |
+
'conservation_funds_at_stake','conservation_funds_approved'] else 'first')
|
| 222 |
+
for col in votes.columns if col not in group_cols})).reset_index()
|
| 223 |
+
cols = ['landvote_id','year','state','county','municipal','jurisdiction',
|
| 224 |
+
'status', 'percent_yes', 'percent_no', 'date',
|
| 225 |
+
'total_funds_at_stake','total_funds_approved',
|
| 226 |
+
'conservation_funds_at_stake','conservation_funds_approved',
|
| 227 |
+
'finance_mechanism', 'other_comment','purpose',
|
| 228 |
+
'description', 'notes', 'voted_acq_measure', 'party']
|
| 229 |
+
|
| 230 |
+
st.dataframe(gdf_grouped[cols], use_container_width = True)
|
| 231 |
+
|
| 232 |
+
party_df = get_party_df(votes)
|
| 233 |
+
st.altair_chart(party_chart(party_df), use_container_width=True)
|
| 234 |
+
|
| 235 |
+
df_funding = funding_chart(votes)
|
| 236 |
+
st.altair_chart(
|
| 237 |
+
create_chart(
|
| 238 |
+
df_funding,
|
| 239 |
+
"cumulative_funding",
|
| 240 |
+
"Billions of Dollars",
|
| 241 |
+
"Cumulative Funding",
|
| 242 |
+
colors["dark_green"],
|
| 243 |
+
chart_type="bar",
|
| 244 |
+
),
|
| 245 |
+
use_container_width=True,
|
| 246 |
+
)
|
| 247 |
|
| 248 |
st.divider()
|
| 249 |
st.caption(
|
app/footer.md
CHANGED
|
@@ -24,3 +24,15 @@ For details on methodology, please refer to the our data processing code for [La
|
|
| 24 |
- CDC 2020 Social Vulnerability Index by US Census Track. Data: https://source.coop/repositories/cboettig/social-vulnerability/description. License: Public Domain
|
| 25 |
|
| 26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
- CDC 2020 Social Vulnerability Index by US Census Track. Data: https://source.coop/repositories/cboettig/social-vulnerability/description. License: Public Domain
|
| 25 |
|
| 26 |
|
| 27 |
+
#### LLMs
|
| 28 |
+
This app can use a selection of open-weights language models hosted on the National Research Platform (https://nrp.ai/documentation/userdocs/ai/llm-managed/), and Open Router (https://openrouter.ai/models).
|
| 29 |
+
|
| 30 |
+
---
|
| 31 |
+
|
| 32 |
+
### Why save your queries?
|
| 33 |
+
Conservation researchers and practitioners are interested in **learning what matters most to our community**.
|
| 34 |
+
|
| 35 |
+
By saving your anonymous queries, we can identify which topics and areas are drawing the most attention, helping us improve future tools and data products to **better support conservation efforts**. We also save the LLM’s response to each query to monitor its accuracy and ensure the system is working as intended.
|
| 36 |
+
|
| 37 |
+
You can opt out at any time by disabling “Save query”.
|
| 38 |
+
|
app/system_prompt.txt
ADDED
|
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
You are an expert in SQL and an assistant for mapping and analyzing the Trust for Public Land (TPL) LandVote data. You are provided multiple tables and must identify which table(s) to use. Given an input question, create a syntactically correct {dialect} query to run, and then provide an explanation of how you answered the input question. Not every query will require SQL code, users may ask more information about values and columns in the table which you can answer based on the information in this prompt. For these cases, your "sql_query" field should be empty.
|
| 2 |
+
|
| 3 |
+
ONLY write SQL queries using the records and columns that exist in the relevant table. You have access to this table:
|
| 4 |
+
|
| 5 |
+
landvote:
|
| 6 |
+
- Definition: Tracks land conservation ballot measures since 1988.
|
| 7 |
+
- Schema: {landvote}
|
| 8 |
+
|
| 9 |
+
For example:
|
| 10 |
+
{{
|
| 11 |
+
"sql_query": "SELECT cols FROM mydata WHERE condition = 'value';",
|
| 12 |
+
"explanation": "This query retrieves columns from my_table where the condition column equals 'value'."
|
| 13 |
+
}}
|
| 14 |
+
|
| 15 |
+
Ensure the response contains only this JSON object, with no additional text, formatting, or commentary.
|
| 16 |
+
|
| 17 |
+
# Important Details
|
| 18 |
+
- For visualization-related queries (e.g., "show me"), ALWAYS include "landvote_id", "year","state","county","municipal","jurisdiction", and "geom" in the results,
|
| 19 |
+
- Wrap each column name in double quotes (") to denote them as delimited identifiers.
|
| 20 |
+
- Wrap values that are strings in single quotes (') to distinguish them from column names.
|
| 21 |
+
|
| 22 |
+
# Example Questions and How to Approach Them
|
| 23 |
+
|
| 24 |
+
## Example:
|
| 25 |
+
example_user: "Show me measures that passed in Republican-voting counties"
|
| 26 |
+
example_assistant: {{"sql_query":
|
| 27 |
+
SELECT DISTINCT "landvote_id", "year","state","county","municipal","jurisdiction", "geom", "party", "status" FROM landvote
|
| 28 |
+
WHERE "party" == 'Republican'
|
| 29 |
+
AND "jurisdiction"=='County'
|
| 30 |
+
AND "status" IN ('Pass','Pass*')
|
| 31 |
+
ORDER BY "year";
|
| 32 |
+
"explanation":"I filtered the landvote table to county-level measures that passed and occurred in jurisdictions classified as Republican based on the most recent presidential election prior to the measure.
|
| 33 |
+
}}
|
| 34 |
+
|
| 35 |
+
## Example:
|
| 36 |
+
example_user: "Show me measures that failed narrowly (between 45% and 50% yes)"
|
| 37 |
+
example_assistant: {{"sql_query":
|
| 38 |
+
SELECT DISTINCT "landvote_id", "year","state","county","municipal","jurisdiction", "geom", "percent_yes", "status" FROM landvote
|
| 39 |
+
WHERE "status" == 'Fail'
|
| 40 |
+
AND CAST(REPLACE(\"percent_yes\", '%', '') AS DOUBLE) BETWEEN 45 AND 50
|
| 41 |
+
ORDER BY "percent_yes";
|
| 42 |
+
"explanation":"I filtered the landvote table to county-level measures that passed and occurred in jurisdictions classified as Republican based on the most recent presidential election prior to the measure.
|
| 43 |
+
}}
|
| 44 |
+
"explanation": "I selected failed measures where the percent of yes votes was between 45% and 50%, indicating a narrow margin of defeat."
|
| 45 |
+
|
| 46 |
+
## Example:
|
| 47 |
+
example_user: "Show me conservation measures that approved over $500 million"
|
| 48 |
+
example_assistant: {{"sql_query":
|
| 49 |
+
SELECT DISTINCT "landvote_id", "year","state","county","municipal","jurisdiction", "geom", "conservation_funds_approved" FROM landvote
|
| 50 |
+
WHERE "conservation_funds_approved"> 500000000
|
| 51 |
+
ORDER BY "conservation_funds_approved" DESC;
|
| 52 |
+
"explanation":"I filtered the landvote table to county-level measures that passed and occurred in jurisdictions classified as Republican based on the most recent presidential election prior to the measure.
|
| 53 |
+
}}
|
| 54 |
+
"explanation": "I filtered measures to those where the approved conservation funding exceeded $500 million."
|
| 55 |
+
|
| 56 |
+
## Example:
|
| 57 |
+
example_user: "Which year had the most conservation funds approved?"
|
| 58 |
+
example_assistant: {{"sql_query":
|
| 59 |
+
SELECT "landvote_id", "year","state","county","municipal","jurisdiction", "geom", SUM("conservation_funds_approved") AS "total_conservation_funds_approved"
|
| 60 |
+
FROM landvote
|
| 61 |
+
GROUP BY "year",
|
| 62 |
+
ORDER BY "total_conservation_funds_approved" DESC LIMIT 1;
|
| 63 |
+
"explanation":"I took the sum of conservation funds in `landvote` by year and returned the year with the most funds.
|
| 64 |
+
}}
|
| 65 |
+
|
| 66 |
+
## Example:
|
| 67 |
+
example_user: "How many measures passed by jurisdiction type?"
|
| 68 |
+
example_assistant: {{"sql_query":
|
| 69 |
+
SELECT "jurisdiction", COUNT(*) AS "passed_measures" FROM landvote
|
| 70 |
+
WHERE "status" IN ('Pass','Pass*')
|
| 71 |
+
GROUP BY "jurisdiction"
|
| 72 |
+
ORDER BY "passed_measures";
|
| 73 |
+
"explanation":"I grouped passed measures by jurisdiction type and counted how many measures passed in each category.
|
| 74 |
+
}}
|
| 75 |
+
|
| 76 |
+
## Example:
|
| 77 |
+
example_user: "How often do bond measures pass compared to other finance mechanisms?"
|
| 78 |
+
example_assistant: {{"sql_query":
|
| 79 |
+
SELECT "finance_mechanism",
|
| 80 |
+
AVG(CASE WHEN "status" IN ('Pass', 'Pass*') THEN 1 ELSE 0 END) AS "pass_rate" FROM landvote
|
| 81 |
+
GROUP BY "finance_mechanism"
|
| 82 |
+
ORDER BY "pass_rate" DESC;
|
| 83 |
+
"explanation":"I calculated the average pass rate for each finance mechanism by treating passed measures as 1 and failed measures as 0, allowing a comparison of how often bond measures pass relative to other mechanisms.
|
| 84 |
+
}}
|
app/utils.py
CHANGED
|
@@ -1,8 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import ibis
|
| 2 |
from ibis import _
|
| 3 |
import altair as alt
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
from variables import *
|
| 5 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
def get_unique_rows(df):
|
| 7 |
# collapse multi-county measures to one row per landvote_id
|
| 8 |
unique_votes = (
|
|
@@ -11,14 +27,42 @@ def get_unique_rows(df):
|
|
| 11 |
.agg(
|
| 12 |
**{c: ibis._[c].first() for c in df.schema().names if c not in ("landvote_id", "county", "party")},
|
| 13 |
# if spans multiple counties -> set different name for county
|
| 14 |
-
county=ibis.ifelse(ibis._.county.nunique() > 1, "Multiple Counties", ibis._.county.first()),
|
| 15 |
-
# if counties differ in parties -> assign other label to party
|
| 16 |
party=ibis.ifelse(ibis._.party.nunique() > 1, "Mixed", ibis._.party.first()),
|
| 17 |
)
|
| 18 |
)
|
| 19 |
return unique_votes
|
| 20 |
|
| 21 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
def create_chart(df, y_column, ylab, title, color, chart_type="line"):
|
| 23 |
# color encoding - color is a list or single value
|
| 24 |
color_encoding = (
|
|
@@ -86,6 +130,47 @@ def funding_chart(votes):
|
|
| 86 |
)
|
| 87 |
|
| 88 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
def party_style(year):
|
| 90 |
recent_election_year = year - year % 4
|
| 91 |
|
|
@@ -116,7 +201,6 @@ def party_style(year):
|
|
| 116 |
}
|
| 117 |
|
| 118 |
|
| 119 |
-
|
| 120 |
# pmtiles style for status
|
| 121 |
def get_status_style(jurisdiction, min_year, max_year):
|
| 122 |
if jurisdiction == "State":
|
|
@@ -144,20 +228,20 @@ def get_status_style(jurisdiction, min_year, max_year):
|
|
| 144 |
]
|
| 145 |
}
|
| 146 |
|
|
|
|
| 147 |
# pmtiles style for party
|
| 148 |
-
def get_party_landvote_style(
|
| 149 |
return {
|
| 150 |
"layers": [
|
| 151 |
{
|
| 152 |
-
"id":
|
| 153 |
-
"source":
|
| 154 |
"source-layer": "landvote_party",
|
| 155 |
"type": "fill",
|
| 156 |
"filter": [
|
| 157 |
"all",
|
| 158 |
["<=", "year", str(max_year)],
|
| 159 |
[">=", "year", str(min_year)],
|
| 160 |
-
["==", "jurisdiction", jurisdiction],
|
| 161 |
],
|
| 162 |
"paint": {
|
| 163 |
"fill-color": {
|
|
@@ -174,38 +258,210 @@ def get_party_landvote_style(jurisdiction, min_year, max_year):
|
|
| 174 |
}
|
| 175 |
|
| 176 |
|
| 177 |
-
def
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
"
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
title="% passed",
|
| 202 |
-
format=".1%",
|
| 203 |
-
),
|
| 204 |
-
],
|
| 205 |
-
)
|
| 206 |
-
.properties(
|
| 207 |
-
title="Percent of Measures Passed per Year by Political Party"
|
| 208 |
-
)
|
| 209 |
-
)
|
| 210 |
|
| 211 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import re
|
| 3 |
+
import datetime
|
| 4 |
+
|
| 5 |
import ibis
|
| 6 |
from ibis import _
|
| 7 |
import altair as alt
|
| 8 |
+
import minio
|
| 9 |
+
import pandas as pd
|
| 10 |
+
import streamlit as st
|
| 11 |
+
from pydantic import BaseModel, Field
|
| 12 |
+
from langchain_openai import ChatOpenAI
|
| 13 |
+
from langchain_core.prompts import ChatPromptTemplate
|
| 14 |
+
|
| 15 |
from variables import *
|
| 16 |
|
| 17 |
+
|
| 18 |
+
# -----------------------------
|
| 19 |
+
# Data wrangling utils
|
| 20 |
+
# -----------------------------
|
| 21 |
+
|
| 22 |
def get_unique_rows(df):
|
| 23 |
# collapse multi-county measures to one row per landvote_id
|
| 24 |
unique_votes = (
|
|
|
|
| 27 |
.agg(
|
| 28 |
**{c: ibis._[c].first() for c in df.schema().names if c not in ("landvote_id", "county", "party")},
|
| 29 |
# if spans multiple counties -> set different name for county
|
| 30 |
+
county=ibis.ifelse(ibis._.county.nunique() > 1, "Multiple Counties", ibis._.county.first()),
|
| 31 |
+
# if counties differ in parties -> assign other label to party
|
| 32 |
party=ibis.ifelse(ibis._.party.nunique() > 1, "Mixed", ibis._.party.first()),
|
| 33 |
)
|
| 34 |
)
|
| 35 |
return unique_votes
|
| 36 |
|
| 37 |
+
|
| 38 |
+
def get_pass_stats(df, min_year, max_year):
|
| 39 |
+
passed_year = (
|
| 40 |
+
df
|
| 41 |
+
.filter((_.year >= min_year) & (_.year <= max_year))
|
| 42 |
+
.filter(_.status.isin(["Pass", "Pass*"]))
|
| 43 |
+
.count()
|
| 44 |
+
.execute()
|
| 45 |
+
)
|
| 46 |
+
total_year = df.filter((_.year >= min_year) & (_.year <= max_year)).count().execute()
|
| 47 |
+
year_passed = round(passed_year / total_year * 100, 2)
|
| 48 |
+
|
| 49 |
+
# compute percentage passed over entire dataset
|
| 50 |
+
passed = df.filter(_.status.isin(["Pass", "Pass*"])).count().execute()
|
| 51 |
+
total = df.count().execute()
|
| 52 |
+
overall_passed = round(passed / total * 100, 2)
|
| 53 |
+
return year_passed, overall_passed
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
def extract_columns(sql_query):
|
| 57 |
+
# Find all substrings inside double quotes
|
| 58 |
+
columns = list(dict.fromkeys(re.findall(r'"(.*?)"', sql_query)))
|
| 59 |
+
return columns
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
# -----------------------------
|
| 63 |
+
# Chart utils
|
| 64 |
+
# -----------------------------
|
| 65 |
+
|
| 66 |
def create_chart(df, y_column, ylab, title, color, chart_type="line"):
|
| 67 |
# color encoding - color is a list or single value
|
| 68 |
color_encoding = (
|
|
|
|
| 130 |
)
|
| 131 |
|
| 132 |
|
| 133 |
+
def party_chart(df):
|
| 134 |
+
chart = (
|
| 135 |
+
alt.Chart(df)
|
| 136 |
+
.mark_line(point=True)
|
| 137 |
+
.encode(
|
| 138 |
+
x=alt.X("year:O", title="Year"),
|
| 139 |
+
y=alt.Y(
|
| 140 |
+
"pass_fraction:Q",
|
| 141 |
+
title="% of measures passed",
|
| 142 |
+
axis=alt.Axis(format="%"),
|
| 143 |
+
),
|
| 144 |
+
color=alt.Color(
|
| 145 |
+
"party:N",
|
| 146 |
+
scale=alt.Scale(
|
| 147 |
+
domain=["Democrat", "Republican"],
|
| 148 |
+
range=[colors["dem_blue"], colors["rep_red"]],
|
| 149 |
+
),
|
| 150 |
+
legend=alt.Legend(title="Party"),
|
| 151 |
+
),
|
| 152 |
+
tooltip=[
|
| 153 |
+
alt.Tooltip("year:O", title="Year"),
|
| 154 |
+
alt.Tooltip("party:N", title="Party"),
|
| 155 |
+
alt.Tooltip(
|
| 156 |
+
"pass_fraction:Q",
|
| 157 |
+
title="% passed",
|
| 158 |
+
format=".1%",
|
| 159 |
+
),
|
| 160 |
+
],
|
| 161 |
+
)
|
| 162 |
+
.properties(
|
| 163 |
+
title="Percent of Measures Passed per Year by Political Party"
|
| 164 |
+
)
|
| 165 |
+
)
|
| 166 |
+
|
| 167 |
+
return chart
|
| 168 |
+
|
| 169 |
+
|
| 170 |
+
# -----------------------------
|
| 171 |
+
# Mapping / style utils
|
| 172 |
+
# -----------------------------
|
| 173 |
+
|
| 174 |
def party_style(year):
|
| 175 |
recent_election_year = year - year % 4
|
| 176 |
|
|
|
|
| 201 |
}
|
| 202 |
|
| 203 |
|
|
|
|
| 204 |
# pmtiles style for status
|
| 205 |
def get_status_style(jurisdiction, min_year, max_year):
|
| 206 |
if jurisdiction == "State":
|
|
|
|
| 228 |
]
|
| 229 |
}
|
| 230 |
|
| 231 |
+
|
| 232 |
# pmtiles style for party
|
| 233 |
+
def get_party_landvote_style(min_year, max_year):
|
| 234 |
return {
|
| 235 |
"layers": [
|
| 236 |
{
|
| 237 |
+
"id": "party",
|
| 238 |
+
"source": "landvote",
|
| 239 |
"source-layer": "landvote_party",
|
| 240 |
"type": "fill",
|
| 241 |
"filter": [
|
| 242 |
"all",
|
| 243 |
["<=", "year", str(max_year)],
|
| 244 |
[">=", "year", str(min_year)],
|
|
|
|
| 245 |
],
|
| 246 |
"paint": {
|
| 247 |
"fill-color": {
|
|
|
|
| 258 |
}
|
| 259 |
|
| 260 |
|
| 261 |
+
def llm_pmtiles_style(ids, paint, pmtiles):
|
| 262 |
+
source_layer_name = re.sub(r"\W+", "", os.path.splitext(os.path.basename(pmtiles))[0]) #stripping hyphens to get layer name
|
| 263 |
+
ids = [str(x) for x in ids]
|
| 264 |
+
style = {
|
| 265 |
+
"version": 8,
|
| 266 |
+
"sources": {
|
| 267 |
+
"tpl": {
|
| 268 |
+
"type": "vector",
|
| 269 |
+
"url": "pmtiles://" + pmtiles,
|
| 270 |
+
"attribution": "TPL",
|
| 271 |
+
},
|
| 272 |
+
},
|
| 273 |
+
"layers": [
|
| 274 |
+
{
|
| 275 |
+
"id": "tpl",
|
| 276 |
+
"source": "tpl",
|
| 277 |
+
"source-layer": source_layer_name,
|
| 278 |
+
"type": "fill",
|
| 279 |
+
"filter": ["in", ["get", "landvote_id"], ["literal", ids]],
|
| 280 |
+
"paint": paint,
|
| 281 |
+
}
|
| 282 |
+
],
|
| 283 |
+
}
|
| 284 |
+
return style
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 285 |
|
| 286 |
+
|
| 287 |
+
@st.cache_resource(show_spinner=False)
|
| 288 |
+
def get_con(db_path: str = "duck.db"):
|
| 289 |
+
return ibis.duckdb.connect(db_path, extensions=["spatial"])
|
| 290 |
+
|
| 291 |
+
|
| 292 |
+
# -----------------------------
|
| 293 |
+
# Chatbot utils
|
| 294 |
+
# -----------------------------
|
| 295 |
+
|
| 296 |
+
class SQLResponse(BaseModel):
|
| 297 |
+
"""Defines the structure for SQL response."""
|
| 298 |
+
sql_query: str = Field(description="The SQL query generated by the assistant.")
|
| 299 |
+
explanation: str = Field(description="A detailed explanation of how the SQL query answers the input question.")
|
| 300 |
+
|
| 301 |
+
|
| 302 |
+
@st.cache_data(show_spinner=False)
|
| 303 |
+
def _load_template(path: str = "app/system_prompt.txt") -> str:
|
| 304 |
+
with open(path, "r") as f:
|
| 305 |
+
return f.read()
|
| 306 |
+
|
| 307 |
+
|
| 308 |
+
def make_run_sql(votes, llm, con, template_path: str = "app/system_prompt.txt"):
|
| 309 |
+
"""
|
| 310 |
+
Returns a run_sql(query, llm_choice) function that:
|
| 311 |
+
- closes over `con` and the chain
|
| 312 |
+
- uses @st.cache_data exactly like your app.py version
|
| 313 |
+
"""
|
| 314 |
+
|
| 315 |
+
template = _load_template(template_path)
|
| 316 |
+
|
| 317 |
+
prompt_tmpl = ChatPromptTemplate.from_messages([
|
| 318 |
+
("system", template),
|
| 319 |
+
("human", "{input}")
|
| 320 |
+
]).partial(dialect="duckdb", landvote=votes.schema())
|
| 321 |
+
|
| 322 |
+
# Ensure tools/structured output is not streaming
|
| 323 |
+
llm = llm.bind(streaming=False)
|
| 324 |
+
|
| 325 |
+
structured_llm = llm.with_structured_output(SQLResponse)
|
| 326 |
+
few_shot_structured_llm = prompt_tmpl | structured_llm
|
| 327 |
+
|
| 328 |
+
@st.cache_data(show_spinner=False)
|
| 329 |
+
def run_sql(query: str, llm_choice: str):
|
| 330 |
+
output = few_shot_structured_llm.invoke({"input": query})
|
| 331 |
+
sql_query = output.sql_query
|
| 332 |
+
explanation = output.explanation
|
| 333 |
+
|
| 334 |
+
if not sql_query:
|
| 335 |
+
return pd.DataFrame({"landvote_id": []}), "", explanation
|
| 336 |
+
|
| 337 |
+
result = con.sql(sql_query).distinct().execute()
|
| 338 |
+
|
| 339 |
+
if result.empty:
|
| 340 |
+
explanation = "This query did not return any results. Please try again with a different query."
|
| 341 |
+
if "geom" in result.columns:
|
| 342 |
+
return result.drop("geom", axis=1), sql_query, explanation
|
| 343 |
+
return result, sql_query, explanation
|
| 344 |
+
|
| 345 |
+
return result, sql_query, explanation
|
| 346 |
+
|
| 347 |
+
return run_sql
|
| 348 |
+
|
| 349 |
+
|
| 350 |
+
def handle_llm_query(
|
| 351 |
+
prompt: str,
|
| 352 |
+
llm_choice: str,
|
| 353 |
+
run_sql_fn,
|
| 354 |
+
log_queries: bool,
|
| 355 |
+
logger_fn,
|
| 356 |
+
log_file: str = "landvote_query_log.csv",
|
| 357 |
+
log_bucket: str = "shared-tpl",
|
| 358 |
+
):
|
| 359 |
+
"""
|
| 360 |
+
Runs the LLM->SQL pipeline, renders Streamlit output, logs the query,
|
| 361 |
+
and returns mapping-relevant outputs.
|
| 362 |
+
"""
|
| 363 |
+
|
| 364 |
+
not_mapping = False
|
| 365 |
+
unique_ids, llm_cols, llm_bounds = [], [], None
|
| 366 |
+
|
| 367 |
+
if not prompt:
|
| 368 |
+
return {
|
| 369 |
+
"llm_output": None,
|
| 370 |
+
"sql_query": "",
|
| 371 |
+
"llm_explanation": "",
|
| 372 |
+
"unique_ids": [],
|
| 373 |
+
"llm_cols": [],
|
| 374 |
+
"llm_bounds": None,
|
| 375 |
+
"not_mapping": True,
|
| 376 |
+
}
|
| 377 |
+
|
| 378 |
+
st.chat_message("user").write(prompt)
|
| 379 |
+
|
| 380 |
+
with st.chat_message("assistant"):
|
| 381 |
+
with st.spinner("Invoking query..."):
|
| 382 |
+
llm_output, sql_query, llm_explanation = run_sql_fn(prompt, llm_choice)
|
| 383 |
+
|
| 384 |
+
# Log (keep your exact signature)
|
| 385 |
+
logger_fn(
|
| 386 |
+
log_queries,
|
| 387 |
+
prompt,
|
| 388 |
+
sql_query,
|
| 389 |
+
llm_explanation,
|
| 390 |
+
llm_choice,
|
| 391 |
+
log_file,
|
| 392 |
+
log_bucket,
|
| 393 |
+
)
|
| 394 |
+
|
| 395 |
+
# No SQL generated
|
| 396 |
+
if sql_query == "":
|
| 397 |
+
st.success(llm_explanation)
|
| 398 |
+
not_mapping = True
|
| 399 |
+
|
| 400 |
+
else:
|
| 401 |
+
# SQL generated but no results
|
| 402 |
+
if llm_output is not None and llm_output.empty:
|
| 403 |
+
st.warning(llm_explanation, icon="⚠️")
|
| 404 |
+
st.caption("SQL Query:")
|
| 405 |
+
st.code(sql_query, language="sql")
|
| 406 |
+
st.stop()
|
| 407 |
+
|
| 408 |
+
# Output without mapping columns
|
| 409 |
+
elif llm_output is not None and ("landvote_id" not in llm_output.columns and "geom" not in llm_output.columns):
|
| 410 |
+
st.write(llm_output)
|
| 411 |
+
not_mapping = True
|
| 412 |
+
|
| 413 |
+
# Always show explanation + SQL in a popover
|
| 414 |
+
with st.popover("Explanation"):
|
| 415 |
+
st.write(llm_explanation)
|
| 416 |
+
st.caption("SQL Query:")
|
| 417 |
+
st.code(sql_query, language="sql")
|
| 418 |
+
|
| 419 |
+
# Extract ids, columns, bounds if present
|
| 420 |
+
if llm_output is not None and ("landvote_id" in llm_output.columns) and (not llm_output.empty):
|
| 421 |
+
unique_ids = list(set(llm_output["landvote_id"].tolist()))
|
| 422 |
+
llm_cols = extract_columns(sql_query)
|
| 423 |
+
llm_bounds = llm_output.total_bounds.tolist()
|
| 424 |
+
else:
|
| 425 |
+
unique_ids, llm_cols, llm_bounds = [], [], None
|
| 426 |
+
not_mapping = True
|
| 427 |
+
|
| 428 |
+
return {
|
| 429 |
+
"llm_output": llm_output,
|
| 430 |
+
"sql_query": sql_query,
|
| 431 |
+
"llm_explanation": llm_explanation,
|
| 432 |
+
"unique_ids": unique_ids,
|
| 433 |
+
"llm_cols": llm_cols,
|
| 434 |
+
"llm_bounds": llm_bounds,
|
| 435 |
+
"not_mapping": not_mapping,
|
| 436 |
+
}
|
| 437 |
+
|
| 438 |
+
|
| 439 |
+
# -----------------------------
|
| 440 |
+
# Logging utils
|
| 441 |
+
# -----------------------------
|
| 442 |
+
|
| 443 |
+
minio_key = os.getenv("MINIO_KEY")
|
| 444 |
+
if minio_key is None:
|
| 445 |
+
minio_key = st.secrets["MINIO_KEY"]
|
| 446 |
+
|
| 447 |
+
minio_secret = os.getenv("MINIO_SECRET")
|
| 448 |
+
if minio_secret is None:
|
| 449 |
+
minio_secret = st.secrets["MINIO_SECRET"]
|
| 450 |
+
|
| 451 |
+
|
| 452 |
+
def minio_logger(consent, query, sql_query, llm_explanation, llm_choice, filename="landvote_query_log.csv", bucket="shared-tpl",
|
| 453 |
+
key=minio_key, secret=minio_secret,
|
| 454 |
+
endpoint="minio.carlboettiger.info"):
|
| 455 |
+
mc = minio.Minio(endpoint, key, secret)
|
| 456 |
+
mc.fget_object(bucket, filename, filename)
|
| 457 |
+
log = pd.read_csv(filename)
|
| 458 |
+
timestamp = datetime.datetime.now(datetime.timezone.utc).strftime("%Y-%m-%d %H:%M:%S")
|
| 459 |
+
if consent:
|
| 460 |
+
df = pd.DataFrame({"timestamp": [timestamp], "user_query": [query], "llm_sql": [sql_query], "llm_explanation": [llm_explanation], "llm_choice":[llm_choice]})
|
| 461 |
+
|
| 462 |
+
# if user opted out, do not store query
|
| 463 |
+
else:
|
| 464 |
+
df = pd.DataFrame({"timestamp": [timestamp], "user_query": ['USER OPTED OUT'], "llm_sql": [''], "llm_explanation": [''], "llm_choice":['']})
|
| 465 |
+
|
| 466 |
+
pd.concat([log,df]).to_csv(filename, index=False, header=True)
|
| 467 |
+
mc.fput_object(bucket, filename, filename, content_type="text/csv")
|
app/variables.py
CHANGED
|
@@ -124,3 +124,62 @@ party_pmtiles = (
|
|
| 124 |
"https://minio.carlboettiger.info/public-election/"
|
| 125 |
"county/county_political_parties_1988-2024.pmtiles"
|
| 126 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 124 |
"https://minio.carlboettiger.info/public-election/"
|
| 125 |
"county/county_political_parties_1988-2024.pmtiles"
|
| 126 |
)
|
| 127 |
+
|
| 128 |
+
|
| 129 |
+
from langchain_openai import ChatOpenAI
|
| 130 |
+
import streamlit as st
|
| 131 |
+
from langchain_openai.chat_models.base import BaseChatOpenAI
|
| 132 |
+
|
| 133 |
+
## dockerized streamlit app wants to read from os.getenv(), otherwise use st.secrets
|
| 134 |
+
import os
|
| 135 |
+
api_key = os.getenv("NRP_API_KEY")
|
| 136 |
+
if api_key is None:
|
| 137 |
+
api_key = st.secrets["NRP_API_KEY"]
|
| 138 |
+
|
| 139 |
+
openrouter_api = os.getenv("OPENROUTER_API_KEY")
|
| 140 |
+
if openrouter_api is None:
|
| 141 |
+
openrouter_api = st.secrets["OPENROUTER_API_KEY"]
|
| 142 |
+
|
| 143 |
+
openrouter_endpoint="https://openrouter.ai/api/v1"
|
| 144 |
+
nrp_endpoint="https://ellm.nrp-nautilus.io/v1"
|
| 145 |
+
|
| 146 |
+
# don't use a provider that collects data
|
| 147 |
+
data_policy = {
|
| 148 |
+
"provider": {
|
| 149 |
+
"data_collection": "deny"
|
| 150 |
+
}
|
| 151 |
+
}
|
| 152 |
+
|
| 153 |
+
llm_options = {
|
| 154 |
+
"devstral-2512": ChatOpenAI(
|
| 155 |
+
model="mistralai/devstral-2512:free",
|
| 156 |
+
api_key=openrouter_api,
|
| 157 |
+
base_url=openrouter_endpoint,
|
| 158 |
+
temperature=0,
|
| 159 |
+
extra_body=data_policy
|
| 160 |
+
),
|
| 161 |
+
|
| 162 |
+
"trinity-mini": ChatOpenAI(
|
| 163 |
+
model="arcee-ai/trinity-mini:free",
|
| 164 |
+
api_key=openrouter_api,
|
| 165 |
+
base_url=openrouter_endpoint,
|
| 166 |
+
temperature=0,
|
| 167 |
+
extra_body=data_policy
|
| 168 |
+
),
|
| 169 |
+
|
| 170 |
+
"nemotron-nano-9b-v2": ChatOpenAI(
|
| 171 |
+
model="nvidia/nemotron-nano-9b-v2:free",
|
| 172 |
+
api_key=openrouter_api,
|
| 173 |
+
base_url=openrouter_endpoint,
|
| 174 |
+
temperature=0,
|
| 175 |
+
extra_body=data_policy
|
| 176 |
+
),
|
| 177 |
+
|
| 178 |
+
"gemma-3-27b-it": ChatOpenAI(
|
| 179 |
+
model="gemma3",
|
| 180 |
+
api_key=api_key,
|
| 181 |
+
base_url=nrp_endpoint,
|
| 182 |
+
temperature=0
|
| 183 |
+
),
|
| 184 |
+
|
| 185 |
+
}
|
requirements.txt
CHANGED
|
@@ -4,7 +4,11 @@ duckdb==1.2.2
|
|
| 4 |
duckdb_engine== 0.15.0
|
| 5 |
geoarrow-types==0.2.0
|
| 6 |
geoarrow-pandas==0.1.1
|
| 7 |
-
ibis-framework
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
leafmap==0.53.3
|
| 9 |
minio==7.2.15
|
| 10 |
maplibre==0.3.3
|
|
|
|
| 4 |
duckdb_engine== 0.15.0
|
| 5 |
geoarrow-types==0.2.0
|
| 6 |
geoarrow-pandas==0.1.1
|
| 7 |
+
ibis-framework==10.3.1
|
| 8 |
+
langchain==0.2.17
|
| 9 |
+
langchain-community==0.2.19
|
| 10 |
+
langchain-core==0.2.43
|
| 11 |
+
langchain-openai==0.1.25
|
| 12 |
leafmap==0.53.3
|
| 13 |
minio==7.2.15
|
| 14 |
maplibre==0.3.3
|