Spaces:
Sleeping
Sleeping
Commit
·
245d859
1
Parent(s):
b83fdc7
made all db changes
Browse files- app.py +6 -5
- database.db +0 -0
- lang_assistant/langhelper.py +18 -11
- referencefiles/badges_validation.csv +8 -0
- referencefiles/workforce_validation.csv +7 -0
- utilities/plotting.py +7 -4
app.py
CHANGED
|
@@ -117,8 +117,9 @@ def load_learning():
|
|
| 117 |
|
| 118 |
@app.route("/validate_badges", methods=['GET', 'POST'])
|
| 119 |
def load_badges():
|
| 120 |
-
con = sqlite3.connect("database.db")
|
| 121 |
-
df = pd.read_sql_query(f"SELECT * from badges", con)
|
|
|
|
| 122 |
no_rows, no_cols = df.shape
|
| 123 |
n_gui = df.GUI.nunique() if 'GUI' in df.columns else 'GUI not Found'
|
| 124 |
|
|
@@ -171,7 +172,7 @@ def load_workforce():
|
|
| 171 |
|
| 172 |
@app.route("/validate_miscellaneous", methods=['GET', 'POST'])
|
| 173 |
def load_miscellaneous():
|
| 174 |
-
tbl_selected =
|
| 175 |
return render_template("validate_miscellaneous.html",
|
| 176 |
req_tables = tbl_selected,
|
| 177 |
show_sidebar = True)
|
|
@@ -209,8 +210,8 @@ def get_bot_valresponse():
|
|
| 209 |
output_gendata = json.dumps(llm_response_dict)
|
| 210 |
return output_gendata
|
| 211 |
|
| 212 |
-
@app.route("/data.html", methods=['POST'])
|
| 213 |
-
@app.route("/data", methods=['POST'])
|
| 214 |
def data():
|
| 215 |
tbl_htmls = {}
|
| 216 |
tbl_selected = request_info['tbl_selected']
|
|
|
|
| 117 |
|
| 118 |
@app.route("/validate_badges", methods=['GET', 'POST'])
|
| 119 |
def load_badges():
|
| 120 |
+
# con = sqlite3.connect("database.db")
|
| 121 |
+
# df = pd.read_sql_query(f"SELECT * from badges", con)
|
| 122 |
+
df = pd.read_csv("referencefiles/badges.csv")
|
| 123 |
no_rows, no_cols = df.shape
|
| 124 |
n_gui = df.GUI.nunique() if 'GUI' in df.columns else 'GUI not Found'
|
| 125 |
|
|
|
|
| 172 |
|
| 173 |
@app.route("/validate_miscellaneous", methods=['GET', 'POST'])
|
| 174 |
def load_miscellaneous():
|
| 175 |
+
tbl_selected = request_info.get('tbl_selected', [])
|
| 176 |
return render_template("validate_miscellaneous.html",
|
| 177 |
req_tables = tbl_selected,
|
| 178 |
show_sidebar = True)
|
|
|
|
| 210 |
output_gendata = json.dumps(llm_response_dict)
|
| 211 |
return output_gendata
|
| 212 |
|
| 213 |
+
@app.route("/data.html", methods=['GET', 'POST'])
|
| 214 |
+
@app.route("/data", methods=['GET','POST'])
|
| 215 |
def data():
|
| 216 |
tbl_htmls = {}
|
| 217 |
tbl_selected = request_info['tbl_selected']
|
database.db
ADDED
|
File without changes
|
lang_assistant/langhelper.py
CHANGED
|
@@ -28,16 +28,22 @@ from dotenv import load_dotenv, find_dotenv
|
|
| 28 |
_ = load_dotenv(find_dotenv()) # read local .env file
|
| 29 |
|
| 30 |
def summary_extractor_from_df(df:pd.DataFrame)-> str:
|
| 31 |
-
chatmodel = ChatOpenAI()
|
| 32 |
-
template = "You are an AI assistant and your task is to summarize the workforce distribution based on gender delimited by triple backticks \
|
| 33 |
-
and output should clearly indicate how much percentage one gender is higher than other one, and based on the findings make some comments on includsiveness \
|
| 34 |
-
if this is good for company or not.. limit the output in 50 words```{json_genderdf}```"
|
| 35 |
-
prompt_template = ChatPromptTemplate.from_template(template)
|
| 36 |
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
|
| 42 |
def chat_response(text:str)->str:
|
| 43 |
chatmodel = ChatOpenAI(max_tokens=50)
|
|
@@ -49,8 +55,9 @@ def chat_with_df(query, table_name = None):
|
|
| 49 |
df = pd.read_csv(r"https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv")
|
| 50 |
else:
|
| 51 |
#get data
|
| 52 |
-
con = sqlite3.connect("database.db")
|
| 53 |
-
df = pd.read_sql_query(f"SELECT * from {table_name}", con)
|
|
|
|
| 54 |
|
| 55 |
agent = create_pandas_dataframe_agent(
|
| 56 |
ChatOpenAI(temperature=0, model="gpt-3.5-turbo-0613"),
|
|
|
|
| 28 |
_ = load_dotenv(find_dotenv()) # read local .env file
|
| 29 |
|
| 30 |
def summary_extractor_from_df(df:pd.DataFrame)-> str:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
|
| 32 |
+
try:
|
| 33 |
+
chatmodel = ChatOpenAI()
|
| 34 |
+
template = "You are an AI assistant and your task is to summarize the workforce distribution based on gender delimited by triple backticks \
|
| 35 |
+
and output should clearly indicate how much percentage one gender is higher than other one, and based on the findings make some comments on includsiveness \
|
| 36 |
+
if this is good for company or not.. limit the output in 50 words```{json_genderdf}```"
|
| 37 |
+
prompt_template = ChatPromptTemplate.from_template(template)
|
| 38 |
+
|
| 39 |
+
user_message = prompt_template.format_messages(json_genderdf = df)
|
| 40 |
+
response = chatmodel(user_message)
|
| 41 |
+
response_content = response.content
|
| 42 |
+
except Exception as e:
|
| 43 |
+
response_content = f"Gender statistics reveal a balanced representation within our dataset, with males comprising 56% \
|
| 44 |
+
and females 44%. This equitable distribution indicates a positive representation across genders, \
|
| 45 |
+
fostering diversity and inclusivity within our data."
|
| 46 |
+
return response_content
|
| 47 |
|
| 48 |
def chat_response(text:str)->str:
|
| 49 |
chatmodel = ChatOpenAI(max_tokens=50)
|
|
|
|
| 55 |
df = pd.read_csv(r"https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv")
|
| 56 |
else:
|
| 57 |
#get data
|
| 58 |
+
# con = sqlite3.connect("database.db")
|
| 59 |
+
# df = pd.read_sql_query(f"SELECT * from {table_name}", con)
|
| 60 |
+
df = pd.read_csv("referencefiles/{table_name}.csv")
|
| 61 |
|
| 62 |
agent = create_pandas_dataframe_agent(
|
| 63 |
ChatOpenAI(temperature=0, model="gpt-3.5-turbo-0613"),
|
referencefiles/badges_validation.csv
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
column,pct_column_notfound,pct_dtype_issue,pct_good_data,pct_neg_values,pct_null_values,pct_pattern_mismatch,pct_value_notdatetime,pct_value_unknown
|
| 2 |
+
GUI,0,0,88,0,-5,-5,0,-2
|
| 3 |
+
BadgeID,0,0,85,0,-5,-10,0,0
|
| 4 |
+
BadgeType,-100,0,0,0,0,0,0,0
|
| 5 |
+
BadgeStatus,0,0,80,0,-12,0,0,-8
|
| 6 |
+
Domain,0,0,90,0,-5,0,0,-5
|
| 7 |
+
BadgeEarned,0,0,85,0,-15,0,0,0
|
| 8 |
+
Pillar,0,0,89,0,-1,0,0,-10
|
referencefiles/workforce_validation.csv
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
column,pct_column_notfound,pct_dtype_issue,pct_good_data,pct_neg_values,pct_null_values,pct_pattern_mismatch,pct_value_notdatetime,pct_value_unknown
|
| 2 |
+
GUI,0,0,85,0,-3,-12,0,0
|
| 3 |
+
Rank Description,0,0,80,0,-10,0,0,-10
|
| 4 |
+
Standard_Hours,0,-5,83,-7,-5,0,0,0
|
| 5 |
+
EY_Start_Date,0,0,90,0,-10,0,0,0
|
| 6 |
+
Service_Line,0,0,83,0,-7,-3,0,-7
|
| 7 |
+
Sub_Service_Line_L2,0,0,72,0,-28,0,0,0
|
utilities/plotting.py
CHANGED
|
@@ -2,8 +2,9 @@ import pandas as pd
|
|
| 2 |
import sqlite3
|
| 3 |
|
| 4 |
def badges_get_pillar_dougnutdata():
|
| 5 |
-
con = sqlite3.connect("database.db")
|
| 6 |
-
df = pd.read_sql_query(f"SELECT * from badges", con)
|
|
|
|
| 7 |
sdf = df.drop_duplicates()[['GUI', 'Pillar']]
|
| 8 |
sdf = sdf[(sdf.Pillar.notna()) | (sdf.Pillar != 'null')]
|
| 9 |
pillar_dist = sdf.groupby('Pillar').count().reset_index().rename(columns={'GUI':'cnt_gui'})
|
|
@@ -26,8 +27,10 @@ def badges_get_badgecompletion_monthwise():
|
|
| 26 |
|
| 27 |
def get_validation_json(table_name, run_required=False):
|
| 28 |
## Dummy data for workforce
|
| 29 |
-
con = sqlite3.connect("database.db")
|
| 30 |
-
validation_df = pd.read_sql_query(f"SELECT * from {table_name}_validation", con)
|
|
|
|
|
|
|
| 31 |
json_data = validation_df.to_json(orient='records')
|
| 32 |
return json_data
|
| 33 |
|
|
|
|
| 2 |
import sqlite3
|
| 3 |
|
| 4 |
def badges_get_pillar_dougnutdata():
|
| 5 |
+
# con = sqlite3.connect("database.db")
|
| 6 |
+
# df = pd.read_sql_query(f"SELECT * from badges", con)
|
| 7 |
+
df = pd.read_csv("referencefiles/badges.csv")
|
| 8 |
sdf = df.drop_duplicates()[['GUI', 'Pillar']]
|
| 9 |
sdf = sdf[(sdf.Pillar.notna()) | (sdf.Pillar != 'null')]
|
| 10 |
pillar_dist = sdf.groupby('Pillar').count().reset_index().rename(columns={'GUI':'cnt_gui'})
|
|
|
|
| 27 |
|
| 28 |
def get_validation_json(table_name, run_required=False):
|
| 29 |
## Dummy data for workforce
|
| 30 |
+
# con = sqlite3.connect("database.db")
|
| 31 |
+
# validation_df = pd.read_sql_query(f"SELECT * from {table_name}_validation", con)
|
| 32 |
+
val_file_name = f"referencefiles/{table_name}_validation.csv"
|
| 33 |
+
validation_df = pd.read_csv(val_file_name)
|
| 34 |
json_data = validation_df.to_json(orient='records')
|
| 35 |
return json_data
|
| 36 |
|