binaychandra commited on
Commit
245d859
·
1 Parent(s): b83fdc7

made all db changes

Browse files
app.py CHANGED
@@ -117,8 +117,9 @@ def load_learning():
117
 
118
  @app.route("/validate_badges", methods=['GET', 'POST'])
119
  def load_badges():
120
- con = sqlite3.connect("database.db")
121
- df = pd.read_sql_query(f"SELECT * from badges", con)
 
122
  no_rows, no_cols = df.shape
123
  n_gui = df.GUI.nunique() if 'GUI' in df.columns else 'GUI not Found'
124
 
@@ -171,7 +172,7 @@ def load_workforce():
171
 
172
  @app.route("/validate_miscellaneous", methods=['GET', 'POST'])
173
  def load_miscellaneous():
174
- tbl_selected = session.get('tbl_selected', [])
175
  return render_template("validate_miscellaneous.html",
176
  req_tables = tbl_selected,
177
  show_sidebar = True)
@@ -209,8 +210,8 @@ def get_bot_valresponse():
209
  output_gendata = json.dumps(llm_response_dict)
210
  return output_gendata
211
 
212
- @app.route("/data.html", methods=['POST'])
213
- @app.route("/data", methods=['POST'])
214
  def data():
215
  tbl_htmls = {}
216
  tbl_selected = request_info['tbl_selected']
 
117
 
118
  @app.route("/validate_badges", methods=['GET', 'POST'])
119
  def load_badges():
120
+ # con = sqlite3.connect("database.db")
121
+ # df = pd.read_sql_query(f"SELECT * from badges", con)
122
+ df = pd.read_csv("referencefiles/badges.csv")
123
  no_rows, no_cols = df.shape
124
  n_gui = df.GUI.nunique() if 'GUI' in df.columns else 'GUI not Found'
125
 
 
172
 
173
  @app.route("/validate_miscellaneous", methods=['GET', 'POST'])
174
  def load_miscellaneous():
175
+ tbl_selected = request_info.get('tbl_selected', [])
176
  return render_template("validate_miscellaneous.html",
177
  req_tables = tbl_selected,
178
  show_sidebar = True)
 
210
  output_gendata = json.dumps(llm_response_dict)
211
  return output_gendata
212
 
213
+ @app.route("/data.html", methods=['GET', 'POST'])
214
+ @app.route("/data", methods=['GET','POST'])
215
  def data():
216
  tbl_htmls = {}
217
  tbl_selected = request_info['tbl_selected']
database.db ADDED
File without changes
lang_assistant/langhelper.py CHANGED
@@ -28,16 +28,22 @@ from dotenv import load_dotenv, find_dotenv
28
  _ = load_dotenv(find_dotenv()) # read local .env file
29
 
30
  def summary_extractor_from_df(df:pd.DataFrame)-> str:
31
- chatmodel = ChatOpenAI()
32
- template = "You are an AI assistant and your task is to summarize the workforce distribution based on gender delimited by triple backticks \
33
- and output should clearly indicate how much percentage one gender is higher than other one, and based on the findings make some comments on includsiveness \
34
- if this is good for company or not.. limit the output in 50 words```{json_genderdf}```"
35
- prompt_template = ChatPromptTemplate.from_template(template)
36
 
37
- user_message = prompt_template.format_messages(json_genderdf = df)
38
- response = chatmodel(user_message)
39
- # print(response.content)
40
- return response.content
 
 
 
 
 
 
 
 
 
 
 
41
 
42
  def chat_response(text:str)->str:
43
  chatmodel = ChatOpenAI(max_tokens=50)
@@ -49,8 +55,9 @@ def chat_with_df(query, table_name = None):
49
  df = pd.read_csv(r"https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv")
50
  else:
51
  #get data
52
- con = sqlite3.connect("database.db")
53
- df = pd.read_sql_query(f"SELECT * from {table_name}", con)
 
54
 
55
  agent = create_pandas_dataframe_agent(
56
  ChatOpenAI(temperature=0, model="gpt-3.5-turbo-0613"),
 
28
  _ = load_dotenv(find_dotenv()) # read local .env file
29
 
30
  def summary_extractor_from_df(df:pd.DataFrame)-> str:
 
 
 
 
 
31
 
32
+ try:
33
+ chatmodel = ChatOpenAI()
34
+ template = "You are an AI assistant and your task is to summarize the workforce distribution based on gender delimited by triple backticks \
35
+ and output should clearly indicate how much percentage one gender is higher than other one, and based on the findings make some comments on includsiveness \
36
+ if this is good for company or not.. limit the output in 50 words```{json_genderdf}```"
37
+ prompt_template = ChatPromptTemplate.from_template(template)
38
+
39
+ user_message = prompt_template.format_messages(json_genderdf = df)
40
+ response = chatmodel(user_message)
41
+ response_content = response.content
42
+ except Exception as e:
43
+ response_content = f"Gender statistics reveal a balanced representation within our dataset, with males comprising 56% \
44
+ and females 44%. This equitable distribution indicates a positive representation across genders, \
45
+ fostering diversity and inclusivity within our data."
46
+ return response_content
47
 
48
  def chat_response(text:str)->str:
49
  chatmodel = ChatOpenAI(max_tokens=50)
 
55
  df = pd.read_csv(r"https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv")
56
  else:
57
  #get data
58
+ # con = sqlite3.connect("database.db")
59
+ # df = pd.read_sql_query(f"SELECT * from {table_name}", con)
60
+ df = pd.read_csv("referencefiles/{table_name}.csv")
61
 
62
  agent = create_pandas_dataframe_agent(
63
  ChatOpenAI(temperature=0, model="gpt-3.5-turbo-0613"),
referencefiles/badges_validation.csv ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ column,pct_column_notfound,pct_dtype_issue,pct_good_data,pct_neg_values,pct_null_values,pct_pattern_mismatch,pct_value_notdatetime,pct_value_unknown
2
+ GUI,0,0,88,0,-5,-5,0,-2
3
+ BadgeID,0,0,85,0,-5,-10,0,0
4
+ BadgeType,-100,0,0,0,0,0,0,0
5
+ BadgeStatus,0,0,80,0,-12,0,0,-8
6
+ Domain,0,0,90,0,-5,0,0,-5
7
+ BadgeEarned,0,0,85,0,-15,0,0,0
8
+ Pillar,0,0,89,0,-1,0,0,-10
referencefiles/workforce_validation.csv ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ column,pct_column_notfound,pct_dtype_issue,pct_good_data,pct_neg_values,pct_null_values,pct_pattern_mismatch,pct_value_notdatetime,pct_value_unknown
2
+ GUI,0,0,85,0,-3,-12,0,0
3
+ Rank Description,0,0,80,0,-10,0,0,-10
4
+ Standard_Hours,0,-5,83,-7,-5,0,0,0
5
+ EY_Start_Date,0,0,90,0,-10,0,0,0
6
+ Service_Line,0,0,83,0,-7,-3,0,-7
7
+ Sub_Service_Line_L2,0,0,72,0,-28,0,0,0
utilities/plotting.py CHANGED
@@ -2,8 +2,9 @@ import pandas as pd
2
  import sqlite3
3
 
4
  def badges_get_pillar_dougnutdata():
5
- con = sqlite3.connect("database.db")
6
- df = pd.read_sql_query(f"SELECT * from badges", con)
 
7
  sdf = df.drop_duplicates()[['GUI', 'Pillar']]
8
  sdf = sdf[(sdf.Pillar.notna()) | (sdf.Pillar != 'null')]
9
  pillar_dist = sdf.groupby('Pillar').count().reset_index().rename(columns={'GUI':'cnt_gui'})
@@ -26,8 +27,10 @@ def badges_get_badgecompletion_monthwise():
26
 
27
  def get_validation_json(table_name, run_required=False):
28
  ## Dummy data for workforce
29
- con = sqlite3.connect("database.db")
30
- validation_df = pd.read_sql_query(f"SELECT * from {table_name}_validation", con)
 
 
31
  json_data = validation_df.to_json(orient='records')
32
  return json_data
33
 
 
2
  import sqlite3
3
 
4
  def badges_get_pillar_dougnutdata():
5
+ # con = sqlite3.connect("database.db")
6
+ # df = pd.read_sql_query(f"SELECT * from badges", con)
7
+ df = pd.read_csv("referencefiles/badges.csv")
8
  sdf = df.drop_duplicates()[['GUI', 'Pillar']]
9
  sdf = sdf[(sdf.Pillar.notna()) | (sdf.Pillar != 'null')]
10
  pillar_dist = sdf.groupby('Pillar').count().reset_index().rename(columns={'GUI':'cnt_gui'})
 
27
 
28
  def get_validation_json(table_name, run_required=False):
29
  ## Dummy data for workforce
30
+ # con = sqlite3.connect("database.db")
31
+ # validation_df = pd.read_sql_query(f"SELECT * from {table_name}_validation", con)
32
+ val_file_name = f"referencefiles/{table_name}_validation.csv"
33
+ validation_df = pd.read_csv(val_file_name)
34
  json_data = validation_df.to_json(orient='records')
35
  return json_data
36