Spaces:
Runtime error
Runtime error
| # IMPORTING TOOLS | |
| import streamlit as st | |
| from rdflib import Graph, Literal | |
| from rdflib.plugins.sparql import prepareQuery | |
| import pandas as pd | |
| import plotly.express as px | |
| import numpy as np | |
| # SET PAGE SETTINGS | |
| st.set_page_config(page_title='Amusement Accidents', layout="centered") | |
| # METHOD TO LOAD THE RDF | |
| def importRDF(filename, format): | |
| graph = Graph().parse(filename, format) | |
| return graph | |
| # IMPORTING THE RDF | |
| with st.spinner('Loading all the stuffs...'): | |
| graph = importRDF("rdf-dataset.ttl", "ttl") | |
| # METHOD TO CONVERT THE QUERY RESULT INTO A DATAFRAME | |
| def sparql_results_to_df(results): | |
| return pd.DataFrame( | |
| data=([None if x is None else x.toPython() for x in row] for row in results), | |
| columns=[str(x) for x in results.vars], | |
| ) | |
| # METHOD TO EXECUTE A GENERIC QUERY | |
| def computeQuery(query, executor): | |
| result = executor.query(query) | |
| res_df = sparql_results_to_df(result) | |
| return res_df | |
| # METHOD TO EXECUTE A PARAMETRIC QUERY | |
| def rideAccidentDescription(ride_name, executor): | |
| ride_name = Literal(ride_name) | |
| query = """ | |
| PREFIX ride_type: <http://example.org/ride_type#> | |
| PREFIX acc: <http://example.org/accident#> | |
| PREFIX ride: <http://example.org/ride#> | |
| SELECT (?manuf AS ?Manufacturer) (?description AS ?Accident_Description) | |
| WHERE { | |
| ?instance acc:description ?description ; | |
| acc:ref-ride_id ?ride_id . | |
| ?ride_id ride:name ?name ; | |
| ride:manufacturer ?manuf . | |
| FILTER (?name = ?ride_name) | |
| } | |
| """ | |
| prep_query = prepareQuery(query) | |
| r = executor.query(prep_query, initBindings={'ride_name': ride_name}) | |
| return sparql_results_to_df(r), query | |
| # PROCESSING & DISPLAY | |
| def display(): | |
| with st.container(): | |
| st.write("#### What are the months with the highest number of accidents?") | |
| res = computeQuery(query_5, graph) | |
| fig = px.bar(res, x="mon", y="count", color="count", labels={"mon":"Month", "count":"Num. of Accidents"}, text_auto="True") | |
| fig.update_xaxes(type="category") | |
| fig.update_yaxes(showticklabels=False) | |
| st.plotly_chart(fig, use_container_width=True) | |
| with st.expander("Show query"): | |
| st.code(query_5, language="sparql") | |
| st.markdown("---") | |
| with st.container(): | |
| st.write("#### Which cities and states have recorded the most accidents?") | |
| res = computeQuery(query_8, graph) | |
| fig = px.treemap(res, path=[px.Constant("U.S"), "state", "city"], values="count", hover_data=["state", "city","count"], | |
| color="count", | |
| color_continuous_scale='tealrose', | |
| color_continuous_midpoint=np.average(res['count'], weights=res['count'])) | |
| st.plotly_chart(fig, use_container_width=True) | |
| with st.expander("Show query"): | |
| st.code(query_8, language="sparql") | |
| st.markdown("---") | |
| with st.container(): | |
| st.write("#### What incidents have occurred on your favorite ride?") | |
| ride_names = computeQuery(query_0, graph) | |
| option = st.selectbox("Select a Ride", options=ride_names) | |
| res, query = rideAccidentDescription(option, graph) | |
| res_count = res.count()[0] | |
| if (res_count < 3): | |
| st.table(res) | |
| else: | |
| limit = st.slider("Num. of Accidents to Visualize", 1, int(res_count), 2, 1) | |
| st.table(res[:limit]) | |
| with st.expander("Show query"): | |
| st.code(query, language="sparql") | |
| st.markdown("---") | |
| with st.container(): | |
| st.write("#### What Are the Most Common Categories of Accidents?") | |
| res = computeQuery(query_4, graph) | |
| fig = px.treemap(res, path=[px.Constant("Accident Category"), "category_name"], values="count", hover_data=["category_name","count"]) | |
| st.plotly_chart(fig, use_container_width=True) | |
| with st.expander("Show query"): | |
| st.code(query_4, language="sparql") | |
| st.markdown("---") | |
| with st.container(): | |
| st.write("#### What are the Most Dangerous Ride Categories?") | |
| res = computeQuery(query_6, graph) | |
| fig = px.pie(res, names="amus_cat_name", values="count", hole=.4) | |
| st.plotly_chart(fig, use_container_width=True) | |
| with st.expander("Show query"): | |
| st.code(query_6, language="sparql") | |
| st.markdown("---") | |
| with st.container(): | |
| st.write("#### What are the Most Dangerous Ride Types?") | |
| res = computeQuery(query_3, graph) | |
| fig = px.bar(res, x="type_name", y="count", labels={"type_name":"Ride Type", "count":"Num. of Accidents"}, text_auto=True) | |
| fig.update_xaxes(tickangle=45) | |
| st.plotly_chart(fig, use_container_width=True) | |
| with st.expander("Show query"): | |
| st.code(query_3, language="sparql") | |
| st.markdown("---") | |
| with st.container(): | |
| st.write("#### How many people are generally involved in an accident?") | |
| res = computeQuery(query_1, graph) | |
| fig = px.bar(res, x="num_inj", y="count", labels={"num_inj":"Injured People", "count":"Num. of Accidents"}, text_auto=True) | |
| fig.update_xaxes(type="category") | |
| st.plotly_chart(fig, use_container_width=True) | |
| with st.expander("Show query"): | |
| st.code(query_1, language="sparql") | |
| st.markdown("---") | |
| return None | |
| # ANALYTICAL QUERIES DEFINITION | |
| # get the names of all the rides | |
| query_0 = """ | |
| PREFIX ride:<http://example.org/ride#> | |
| SELECT DISTINCT ?name | |
| WHERE { | |
| ?ride ride:name ?name . | |
| } | |
| """ | |
| # num of accidents per injured people | |
| query_1 = """ | |
| PREFIX r:<http://example.org/ride#> | |
| PREFIX a:<http://example.org/accident#> | |
| SELECT ?num_inj (COUNT(?num_inj) AS ?count) | |
| WHERE { | |
| ?acc a:num_injured ?num_inj . | |
| } | |
| GROUP BY ?num_inj | |
| ORDER BY (?num_inj) | |
| """ | |
| # manufacturers of the rides subjected to most accidents | |
| query_2 = """ | |
| PREFIX acc: <http://example.org/accident#> | |
| PREFIX ride: <http://example.org/ride#> | |
| SELECT ?ride_manuf (COUNT(?ride_manuf) AS ?count) | |
| WHERE { | |
| ?instance acc:ref-ride_id ?ride_id . | |
| ?ride_id ride:manufacturer ?ride_manuf | |
| } | |
| GROUP BY ?ride_manuf | |
| ORDER BY DESC(?count) | |
| """ | |
| # Top n types of rides most subjected to accidents | |
| query_3 = """ | |
| PREFIX ride_type: <http://example.org/ride_type#> | |
| PREFIX acc: <http://example.org/accident#> | |
| PREFIX ride: <http://example.org/ride#> | |
| SELECT ?type_name (COUNT(?type_name) AS ?count) | |
| WHERE { | |
| ?instance acc:ref-ride_id ?ride_id . | |
| ?ride_id ride:ref-ride_type_id ?type_id . | |
| ?type_id ride_type:type ?type_name . | |
| } | |
| GROUP BY ?type_name | |
| ORDER BY DESC(?count) | |
| LIMIT 7 | |
| """ | |
| # Top 6 categories of rides most subjected to accidents | |
| query_6 = """ | |
| PREFIX amusement_cat: <http://example.org/amusement_category#> | |
| PREFIX ride_type: <http://example.org/ride_type#> | |
| PREFIX acc: <http://example.org/accident#> | |
| PREFIX ride: <http://example.org/ride#> | |
| SELECT ?amus_cat_name (COUNT(?amus_cat_name) AS ?count) | |
| WHERE { | |
| ?instance acc:ref-ride_id ?ride_id . | |
| ?ride_id ride:ref-ride_type_id ?type_id . | |
| ?type_id ride_type:ref-amusement_category_id ?amus_cat_id . | |
| ?amus_cat_id amusement_cat:amusement_category ?amus_cat_name . | |
| } | |
| GROUP BY ?amus_cat_name | |
| ORDER BY DESC(?count) | |
| LIMIT 6 | |
| """ | |
| # most common categories of accidents | |
| query_4 = """ | |
| PREFIX acc_cat: <http://example.org/accident_category#> | |
| PREFIX acc: <http://example.org/accident#> | |
| SELECT ?category_name (COUNT(?category_name) AS ?count) | |
| WHERE { | |
| ?instance acc:ref-accident_category_id ?category_id . | |
| ?category_id acc_cat:accident_category ?category_name . | |
| } | |
| GROUP BY ?category_name | |
| ORDER BY DESC(?count) | |
| """ | |
| # months with the ngher num of accidents | |
| query_5 = """ | |
| PREFIX acc: <http://example.org/accident#> | |
| SELECT ?mon (COUNT(?mon) AS ?count) | |
| WHERE { | |
| ?instance acc:date ?date . | |
| } | |
| GROUP BY (month(?date) AS ?mon) | |
| ORDER BY (?mon) | |
| """ | |
| # cities with the higher num of accidents | |
| query_8 = """ | |
| PREFIX location: <http://example.org/location#> | |
| PREFIX acc: <http://example.org/accident#> | |
| SELECT ?city (COUNT(?city) AS ?count) ?state | |
| WHERE { | |
| ?instance acc:ref-location_id ?location_id . | |
| ?location_id location:city ?city ; | |
| location:state ?state | |
| } | |
| GROUP BY ?city | |
| ORDER BY DESC(?count) | |
| """ | |
| # TITLE | |
| st.header("Theme Park Ride Accidents") | |
| st.markdown("""There are **thousands of amusement parks** around the world that welcome **millions of visitors** each year. | |
| Children, families, and teenagers are ready to spend days of adrenaline and fun. | |
| Unfortunately, **accidents sometimes occur**. This raises some questions: **Are amusement parks safe? Which rides are the most accident-prone? What accidents happen most often? At what time of year are accidents most common?** | |
| Let's try to find out in this **RDF data exploration** using **SPARQL** and **Plotly**.""") | |
| st.markdown("---") | |
| display() | |
| # WRITE & RUN YOUR OWN QUERY | |
| st.write("#### Write & Run your Custom Query") | |
| pers_query = st.text_area('', """ | |
| PREFIX ride:<http://example.org/ride#> | |
| SELECT ?name | |
| WHERE { | |
| ?ride ride:manufacturer "Vekoma" ; | |
| ride:name ?name | |
| } | |
| """, height=200) | |
| with st.container(): | |
| try: | |
| res = computeQuery(pers_query, graph) | |
| st.table(res) | |
| except: | |
| st.error("Ooops! Check you query syntax...") | |
| st.markdown("---") | |
| # SIDEBAR | |
| with st.sidebar: | |
| st.write(""" | |
| This App proposes some visualization about theme park ride accidents. | |
| The original dataset comes from "Saferparks", an organization that reports and collects data about theme park ride accidents in the US. | |
| The original dataset covers years from 2010 to 2017 and comes in CSV or Excel format. I used python to split the dataset and convert it into the | |
| Third Normal Form (3NF) of Database. | |
| I uploaded the data into a PostgreSQL database and I used the Ontop tool to get the final RDF dataset. | |
| Queries are expressed in SPARQL, and charts are generated with Plotly Express. | |
| """) | |
| st.markdown("---") | |
| st.markdown("## Dataset Resources:") | |
| st.markdown(""" | |
| Saferparks Original Dataset: https://ridesdatabase.org/saferparks/data/ | |
| Saferparks Dataset Description: https://ridesdatabase.org/wp-content/uploads/2020/02/Saferparks-data-description.pdf | |
| """) | |