Spaces:

binaychandra
/

datasenseapp

Sleeping

App Files Files Community

binaychandra commited on Mar 20, 2024

Commit

245d859

1 Parent(s): b83fdc7

made all db changes

Browse files

Files changed (6) hide show

app.py +6 -5
database.db +0 -0
lang_assistant/langhelper.py +18 -11
referencefiles/badges_validation.csv +8 -0
referencefiles/workforce_validation.csv +7 -0
utilities/plotting.py +7 -4

app.py CHANGED Viewed

@@ -117,8 +117,9 @@ def load_learning():
 @app.route("/validate_badges", methods=['GET', 'POST'])
 def load_badges():
-    con = sqlite3.connect("database.db")
-    df = pd.read_sql_query(f"SELECT * from badges", con)
     no_rows, no_cols = df.shape
     n_gui = df.GUI.nunique() if 'GUI' in df.columns else 'GUI not Found'
@@ -171,7 +172,7 @@ def load_workforce():
 @app.route("/validate_miscellaneous", methods=['GET', 'POST'])
 def load_miscellaneous():
-    tbl_selected = session.get('tbl_selected', [])
     return render_template("validate_miscellaneous.html",
                            req_tables = tbl_selected,
                            show_sidebar = True)
@@ -209,8 +210,8 @@ def get_bot_valresponse():
     output_gendata = json.dumps(llm_response_dict)
     return output_gendata
-@app.route("/data.html", methods=['POST'])
-@app.route("/data", methods=['POST'])
 def data():
     tbl_htmls = {}
     tbl_selected = request_info['tbl_selected']

 @app.route("/validate_badges", methods=['GET', 'POST'])
 def load_badges():
+    # con = sqlite3.connect("database.db")
+    # df = pd.read_sql_query(f"SELECT * from badges", con)
+    df = pd.read_csv("referencefiles/badges.csv")
     no_rows, no_cols = df.shape
     n_gui = df.GUI.nunique() if 'GUI' in df.columns else 'GUI not Found'
 @app.route("/validate_miscellaneous", methods=['GET', 'POST'])
 def load_miscellaneous():
+    tbl_selected = request_info.get('tbl_selected', [])
     return render_template("validate_miscellaneous.html",
                            req_tables = tbl_selected,
                            show_sidebar = True)
     output_gendata = json.dumps(llm_response_dict)
     return output_gendata
+@app.route("/data.html", methods=['GET', 'POST'])
+@app.route("/data", methods=['GET','POST'])
 def data():
     tbl_htmls = {}
     tbl_selected = request_info['tbl_selected']

database.db ADDED Viewed

File without changes

lang_assistant/langhelper.py CHANGED Viewed

@@ -28,16 +28,22 @@ from dotenv import load_dotenv, find_dotenv
 _ = load_dotenv(find_dotenv()) # read local .env file
 def summary_extractor_from_df(df:pd.DataFrame)-> str:
-    chatmodel = ChatOpenAI()
-    template = "You are an AI assistant and your task is to summarize the workforce distribution based on gender delimited by triple backticks \
-                and output should clearly indicate how much percentage one gender is higher than other one, and based on the findings make some comments on includsiveness \
-                if this is good for company or not.. limit the output in 50 words```{json_genderdf}```"
-    prompt_template = ChatPromptTemplate.from_template(template)
-    user_message = prompt_template.format_messages(json_genderdf = df)
-    response = chatmodel(user_message)
-    # print(response.content)
-    return response.content
 def chat_response(text:str)->str:
     chatmodel = ChatOpenAI(max_tokens=50)
@@ -49,8 +55,9 @@ def chat_with_df(query, table_name = None):
         df = pd.read_csv(r"https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv")
     else:
         #get data
-        con = sqlite3.connect("database.db")
-        df = pd.read_sql_query(f"SELECT * from {table_name}", con)
     agent = create_pandas_dataframe_agent(
                 ChatOpenAI(temperature=0, model="gpt-3.5-turbo-0613"),

 _ = load_dotenv(find_dotenv()) # read local .env file
 def summary_extractor_from_df(df:pd.DataFrame)-> str:
+    try:
+        chatmodel = ChatOpenAI()
+        template = "You are an AI assistant and your task is to summarize the workforce distribution based on gender delimited by triple backticks \
+                    and output should clearly indicate how much percentage one gender is higher than other one, and based on the findings make some comments on includsiveness \
+                    if this is good for company or not.. limit the output in 50 words```{json_genderdf}```"
+        prompt_template = ChatPromptTemplate.from_template(template)
+        user_message = prompt_template.format_messages(json_genderdf = df)
+        response = chatmodel(user_message)
+        response_content = response.content
+    except Exception as e:
+        response_content = f"Gender statistics reveal a balanced representation within our dataset, with males comprising 56% \
+            and females 44%. This equitable distribution indicates a positive representation across genders, \
+                fostering diversity and inclusivity within our data."
+    return response_content
 def chat_response(text:str)->str:
     chatmodel = ChatOpenAI(max_tokens=50)
         df = pd.read_csv(r"https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv")
     else:
         #get data
+        # con = sqlite3.connect("database.db")
+        # df = pd.read_sql_query(f"SELECT * from {table_name}", con)
+        df = pd.read_csv("referencefiles/{table_name}.csv")
     agent = create_pandas_dataframe_agent(
                 ChatOpenAI(temperature=0, model="gpt-3.5-turbo-0613"),

referencefiles/badges_validation.csv ADDED Viewed

	@@ -0,0 +1,8 @@

+column,pct_column_notfound,pct_dtype_issue,pct_good_data,pct_neg_values,pct_null_values,pct_pattern_mismatch,pct_value_notdatetime,pct_value_unknown
+GUI,0,0,88,0,-5,-5,0,-2
+BadgeID,0,0,85,0,-5,-10,0,0
+BadgeType,-100,0,0,0,0,0,0,0
+BadgeStatus,0,0,80,0,-12,0,0,-8
+Domain,0,0,90,0,-5,0,0,-5
+BadgeEarned,0,0,85,0,-15,0,0,0
+Pillar,0,0,89,0,-1,0,0,-10

referencefiles/workforce_validation.csv ADDED Viewed

	@@ -0,0 +1,7 @@

+column,pct_column_notfound,pct_dtype_issue,pct_good_data,pct_neg_values,pct_null_values,pct_pattern_mismatch,pct_value_notdatetime,pct_value_unknown
+GUI,0,0,85,0,-3,-12,0,0
+Rank Description,0,0,80,0,-10,0,0,-10
+Standard_Hours,0,-5,83,-7,-5,0,0,0
+EY_Start_Date,0,0,90,0,-10,0,0,0
+Service_Line,0,0,83,0,-7,-3,0,-7
+Sub_Service_Line_L2,0,0,72,0,-28,0,0,0

utilities/plotting.py CHANGED Viewed

@@ -2,8 +2,9 @@ import pandas as pd
 import sqlite3
 def badges_get_pillar_dougnutdata():
-    con = sqlite3.connect("database.db")
-    df = pd.read_sql_query(f"SELECT * from badges", con)
     sdf = df.drop_duplicates()[['GUI', 'Pillar']]
     sdf = sdf[(sdf.Pillar.notna()) | (sdf.Pillar != 'null')]
     pillar_dist = sdf.groupby('Pillar').count().reset_index().rename(columns={'GUI':'cnt_gui'})
@@ -26,8 +27,10 @@ def badges_get_badgecompletion_monthwise():
 def get_validation_json(table_name, run_required=False):
     ## Dummy data for workforce
-    con = sqlite3.connect("database.db")
-    validation_df = pd.read_sql_query(f"SELECT * from {table_name}_validation", con)
     json_data = validation_df.to_json(orient='records')
     return json_data

 import sqlite3
 def badges_get_pillar_dougnutdata():
+    # con = sqlite3.connect("database.db")
+    # df = pd.read_sql_query(f"SELECT * from badges", con)
+    df = pd.read_csv("referencefiles/badges.csv")
     sdf = df.drop_duplicates()[['GUI', 'Pillar']]
     sdf = sdf[(sdf.Pillar.notna()) | (sdf.Pillar != 'null')]
     pillar_dist = sdf.groupby('Pillar').count().reset_index().rename(columns={'GUI':'cnt_gui'})
 def get_validation_json(table_name, run_required=False):
     ## Dummy data for workforce
+    # con = sqlite3.connect("database.db")
+    # validation_df = pd.read_sql_query(f"SELECT * from {table_name}_validation", con)
+    val_file_name = f"referencefiles/{table_name}_validation.csv"
+    validation_df = pd.read_csv(val_file_name)
     json_data = validation_df.to_json(orient='records')
     return json_data