Spaces:
Sleeping
Sleeping
Cache classes to avoid reruns / change button tags to avoid redundency
Browse files
app.py
CHANGED
|
@@ -71,6 +71,7 @@ import streamlit as st
|
|
| 71 |
# Configure logging
|
| 72 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
| 73 |
|
|
|
|
| 74 |
class UAPAnalyzer:
|
| 75 |
"""
|
| 76 |
A class for analyzing and clustering textual data within a pandas DataFrame using
|
|
@@ -785,7 +786,7 @@ def plot_cramers_v_heatmap(data, significance_level=0.05):
|
|
| 785 |
plt.title(f"Heatmap of Cramér's V (p < {significance_level})")
|
| 786 |
return plt
|
| 787 |
|
| 788 |
-
|
| 789 |
class UAPVisualizer:
|
| 790 |
def __init__(self, data=None):
|
| 791 |
pass # Initialization can be added if needed
|
|
@@ -925,7 +926,7 @@ class UAPVisualizer:
|
|
| 925 |
plt.show()
|
| 926 |
|
| 927 |
|
| 928 |
-
|
| 929 |
class UAPParser:
|
| 930 |
def __init__(self, api_key, model="gpt-3.5-turbo-0125", col=None, format_long=None):
|
| 931 |
os.environ['OPENAI_API_KEY'] = api_key
|
|
@@ -1140,9 +1141,19 @@ def filter_dataframe(df: pd.DataFrame) -> pd.DataFrame:
|
|
| 1140 |
start_date, end_date = user_date_input
|
| 1141 |
df_ = df_.loc[df_[column].between(start_date, end_date)]
|
| 1142 |
else:
|
| 1143 |
-
|
| 1144 |
-
|
| 1145 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1146 |
if user_text_input:
|
| 1147 |
df_ = df_[df_[column].astype(str).str.contains(user_text_input)]
|
| 1148 |
# write len of df after filtering with % of original
|
|
@@ -1273,7 +1284,7 @@ def main():
|
|
| 1273 |
st.session_state['stage'] = 1
|
| 1274 |
|
| 1275 |
# Analyze data
|
| 1276 |
-
if st.session_state.stage > 0:
|
| 1277 |
columns_to_analyze = st.multiselect(
|
| 1278 |
label='Select columns to analyze',
|
| 1279 |
options=parsed_responses.columns
|
|
@@ -1340,13 +1351,15 @@ def main():
|
|
| 1340 |
|
| 1341 |
if st.session_state['data_processed']:
|
| 1342 |
parsed2 = st.session_state.get('dataset', pd.DataFrame())
|
|
|
|
|
|
|
| 1343 |
parsed2 = filter_dataframe(parsed2)
|
| 1344 |
col1, col2 = st.columns(2)
|
| 1345 |
st.dataframe(parsed2)
|
| 1346 |
with col1:
|
| 1347 |
col_parsed2 = st.selectbox("Which column do you want to query?", parsed2.columns)
|
| 1348 |
with col2:
|
| 1349 |
-
GEMINI_KEY = st.text_input('Gemini API Key', GEMINI_KEY, type='password', help="Enter
|
| 1350 |
if col_parsed and GEMINI_KEY:
|
| 1351 |
selected_column_data2 = parsed2[col_parsed2].tolist()
|
| 1352 |
question2 = st.text_input("Ask a question / leave empty for summarization")
|
|
|
|
| 71 |
# Configure logging
|
| 72 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
| 73 |
|
| 74 |
+
@st.cache_data(ttl=1800, max_entries=30, show_spinner='Retrieving data...')
|
| 75 |
class UAPAnalyzer:
|
| 76 |
"""
|
| 77 |
A class for analyzing and clustering textual data within a pandas DataFrame using
|
|
|
|
| 786 |
plt.title(f"Heatmap of Cramér's V (p < {significance_level})")
|
| 787 |
return plt
|
| 788 |
|
| 789 |
+
@st.cache_data(ttl=1800, max_entries=30, show_spinner='Retrieving data...')
|
| 790 |
class UAPVisualizer:
|
| 791 |
def __init__(self, data=None):
|
| 792 |
pass # Initialization can be added if needed
|
|
|
|
| 926 |
plt.show()
|
| 927 |
|
| 928 |
|
| 929 |
+
@st.cache_data(ttl=1800, max_entries=30, show_spinner='Retrieving data...')
|
| 930 |
class UAPParser:
|
| 931 |
def __init__(self, api_key, model="gpt-3.5-turbo-0125", col=None, format_long=None):
|
| 932 |
os.environ['OPENAI_API_KEY'] = api_key
|
|
|
|
| 1141 |
start_date, end_date = user_date_input
|
| 1142 |
df_ = df_.loc[df_[column].between(start_date, end_date)]
|
| 1143 |
else:
|
| 1144 |
+
try: # To avoid multiple buttons with same ID
|
| 1145 |
+
user_text_input = right.text_input(
|
| 1146 |
+
f"Substring or regex in {column}",
|
| 1147 |
+
)
|
| 1148 |
+
except:
|
| 1149 |
+
try:
|
| 1150 |
+
user_text_input = right.text_input(
|
| 1151 |
+
f"Substring or regex {column}",
|
| 1152 |
+
)
|
| 1153 |
+
except Exception as e:
|
| 1154 |
+
print(f'Error : {e}')
|
| 1155 |
+
pass
|
| 1156 |
+
|
| 1157 |
if user_text_input:
|
| 1158 |
df_ = df_[df_[column].astype(str).str.contains(user_text_input)]
|
| 1159 |
# write len of df after filtering with % of original
|
|
|
|
| 1284 |
st.session_state['stage'] = 1
|
| 1285 |
|
| 1286 |
# Analyze data
|
| 1287 |
+
if st.session_state.stage > 0 and st.session_state.stage < 10 and parsed_responses is not None:
|
| 1288 |
columns_to_analyze = st.multiselect(
|
| 1289 |
label='Select columns to analyze',
|
| 1290 |
options=parsed_responses.columns
|
|
|
|
| 1351 |
|
| 1352 |
if st.session_state['data_processed']:
|
| 1353 |
parsed2 = st.session_state.get('dataset', pd.DataFrame())
|
| 1354 |
+
if parsed2 is not None:
|
| 1355 |
+
st.session_state['stage'] = 10
|
| 1356 |
parsed2 = filter_dataframe(parsed2)
|
| 1357 |
col1, col2 = st.columns(2)
|
| 1358 |
st.dataframe(parsed2)
|
| 1359 |
with col1:
|
| 1360 |
col_parsed2 = st.selectbox("Which column do you want to query?", parsed2.columns)
|
| 1361 |
with col2:
|
| 1362 |
+
GEMINI_KEY = st.text_input('Gemini API Key', GEMINI_KEY, type='password', help="Enter Gemini API key")
|
| 1363 |
if col_parsed and GEMINI_KEY:
|
| 1364 |
selected_column_data2 = parsed2[col_parsed2].tolist()
|
| 1365 |
question2 = st.text_input("Ask a question / leave empty for summarization")
|