Spaces:
Build error
Build error
| # This example does not use a langchain agent, | |
| # The langchain sql chain has knowledge of the database, but doesn't interact with it becond intialization. | |
| # The output of the sql chain is parsed seperately and passed to `duckdb.sql()` by streamlit | |
| import os | |
| os.environ["WEBSOCKET_TIMEOUT_MS"] = "300000" # no effect | |
| import streamlit as st | |
| import geopandas as gpd | |
| import pandas as pd | |
| from shapely import wkb | |
| st.set_page_config(page_title="Protected Areas Database Chat", page_icon="🦜", layout="wide") | |
| st.title("Protected Areas Database Chat") | |
| ## Database connection, reading directly from remote parquet file | |
| from sqlalchemy import create_engine | |
| from langchain.sql_database import SQLDatabase | |
| db_uri = "duckdb:///:memory:" | |
| parquet = "https://huggingface.co/datasets/boettiger-lab/pad-us-3/resolve/main/pad-stats.parquet" | |
| engine = create_engine(db_uri) #connect_args={'read_only': True}) | |
| con = engine.connect() | |
| con.execute("install spatial; load spatial;") | |
| h = con.execute(f"create or replace view pad as select * from read_parquet('{parquet}');") | |
| h.fetchall() | |
| db = SQLDatabase(engine, view_support=True) | |
| def query_database(response): | |
| # con.sql(response).to_pandas().head(25) # uses ibis connection | |
| # instead, use direct sqlAlchemy connection | |
| z = con.execute(response).fetchall() | |
| return pd.DataFrame(z).head(25) | |
| query_database("select * from pad limit 1") | |
| def get_geom(tbl): | |
| tbl['geometry'] = tbl['geometry'].apply(wkb.loads) | |
| gdf = gpd.GeoDataFrame(tbl, geometry='geometry') | |
| return gdf | |
| # Helper plotting functions | |
| import pydeck as pdk | |
| def deck_map(gdf): | |
| st.write( | |
| pdk.Deck( | |
| map_style="mapbox://styles/mapbox/light-v9", | |
| initial_view_state={ | |
| "latitude": 35, | |
| "longitude": -100, | |
| "zoom": 3, | |
| "pitch": 50, | |
| }, | |
| layers=[ | |
| pdk.Layer( | |
| "GeoJsonLayer", | |
| gdf, | |
| pickable=True, | |
| stroked=True, | |
| filled=True, | |
| extruded=True, | |
| elevation_scale=10, | |
| get_fill_color=[2, 200, 100], | |
| get_line_color=[0,0,0], | |
| line_width_min_pixels=0, | |
| ), | |
| ], | |
| ) | |
| ) | |
| import leafmap.foliumap as leafmap | |
| def leaf_map(gdf): | |
| m = leafmap.Map(center=[35, -100], zoom=4, layers_control=True) | |
| m.add_gdf(gdf) | |
| return m.to_streamlit() | |
| ## ChatGPT Connection | |
| from langchain_openai import ChatOpenAI | |
| # Requires ollama server running locally | |
| from langchain_community.llms import Ollama | |
| # # should we use ChatOllama instead? | |
| # from langchain_community.llms import ChatOllama | |
| models = {"chatgpt3.5": ChatOpenAI(model="gpt-3.5-turbo", temperature=0, api_key=st.secrets["OPENAI_API_KEY"])} | |
| other_models = { | |
| "chatgpt4": ChatOpenAI(model="gpt-4", temperature=0, api_key=st.secrets["OPENAI_API_KEY"]), | |
| "duckdb-nsql": Ollama(model="duckdb-nsql", temperature=0), | |
| "command-r-plus": Ollama(model="command-r-plus", temperature=0), | |
| "mixtral:8x22b": Ollama(model="mixtral:8x22b", temperature=0), | |
| "wizardlm2:8x22b": Ollama(model="wizardlm2:8x22b", temperature=0), | |
| "sqlcoder": Ollama(model="sqlcoder", temperature=0), | |
| "zephyr": Ollama(model="zephyr", temperature=0), | |
| "gemma:7b": Ollama(model="gemma:7b", temperature=0), | |
| "codegemma": Ollama(model="codegemma", temperature=0), | |
| "llama2": Ollama(model="llama2", temperature=0), | |
| } | |
| map_tool = {"leafmap": leaf_map, | |
| "deckgl": deck_map | |
| } | |
| with st.sidebar: | |
| choice = st.radio("Select an LLM:", models) | |
| llm = models[choice] | |
| map_choice = st.radio("Select mapping tool", map_tool) | |
| mapper = map_tool[map_choice] | |
| ## A SQL Chain | |
| from langchain.chains import create_sql_query_chain | |
| chain = create_sql_query_chain(llm, db) | |
| main = st.container() | |
| ## Does not preserve history | |
| with main: | |
| ''' | |
| The Protected Areas Database of the United States (PAD-US) is the official national inventory of | |
| America’s parks and other protected lands, and is published by the USGS Gap Analysis Project, | |
| [https://doi.org/10.5066/P9Q9LQ4B.](https://doi.org/10.5066/P9Q9LQ4B). | |
| This interactive tool allows users to explore the dataset, as well as a range of biodiversity | |
| and climate indicators associated with each protected area. These indicators are integrated into | |
| a single table format shown below. The chatbot assistant can turn natural language queries into | |
| SQL queries based on the table schema. | |
| See our [Protected Areas Explorer](https://huggingface.co/spaces/boettiger-lab/pad-us) for a companion non-chat-based tool. | |
| ##### Example Queries returning summary tables | |
| - What is the percent area in each gap code as a fraction of the total protected area? | |
| - The manager_type column indicates whether a manager is federal, state, local, private, or NGO. | |
| the manager_name column indicates the responsible agency (National Park Service, Bureau of Land Management, | |
| etc) in the case of federal manager types. Which of the federal managers manage the most land in | |
| gap_code 1 or 2, as a fraction of the total area? | |
| When queries refer to specific managed areas, the chatbot can show those areas on an interactive map. | |
| Do to software limitations, these maps will show no more than 25 polygons, even if more areas match the | |
| requested search. The chatbot sometimes requires help identifying the right columns. In order to create | |
| a map, the SQL query must also return the geometry column. Conisder the following examples: | |
| ##### Example queries returning maps + tables | |
| - Show me all the national monuments (designation_type) in Utah. Include the geometry column | |
| - Show examples of Bureau of Land Management (manager_name) with the highest species richness? Include the geometry column | |
| - Which site has the overall highest range-size-rarity? Include the geometry column, manager_name, and IUCN category. | |
| ''' | |
| st.markdown("## 🦜 Chatbot:") | |
| chatbox = st.container() | |
| with chatbox: | |
| if prompt := st.chat_input(key="chain"): | |
| st.chat_message("user").write(prompt) | |
| with st.chat_message("assistant"): | |
| response = chain.invoke({"question": prompt}) | |
| st.write(response) | |
| tbl = query_database(response) | |
| if 'geometry' in tbl: | |
| gdf = get_geom(tbl) | |
| mapper(gdf) | |
| n = len(gdf) | |
| st.write(f"matching features: {n}") | |
| st.dataframe(tbl) | |
| st.divider() | |
| with st.container(): | |
| st.text("Database schema (top 3 rows)") | |
| tbl = tbl = query_database("select * from pad limit 3") | |
| st.dataframe(tbl) | |
| st.divider() | |
| ''' | |
| Experimental prototype. | |
| - Author: [Carl Boettiger](https://carlboettiger.info) | |
| - For data sources and processing, see: https://beta.source.coop/repositories/cboettig/pad-us-3/description/ | |
| ''' | |