ArchiMathur commited on
Commit
2f3b171
·
verified ·
1 Parent(s): 0a309f7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +229 -28
app.py CHANGED
@@ -1,6 +1,206 @@
1
 
2
 
3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  import gradio as gr
5
  import pandas as pd
6
  import numpy as np
@@ -8,7 +208,7 @@ import pickle
8
  import sklearn
9
  from datasets import load_dataset
10
  import joblib
11
-
12
 
13
  # Read the data
14
  data = pd.read_csv("mldata.csv")
@@ -24,15 +224,15 @@ def load_model(model_choice):
24
  elif model_choice == "Sequential Model":
25
  try:
26
  # Try loading the Sequential model saved using joblib
27
- with open('my_seq_model_second.pkl', 'rb') as pickleFile:
28
- return joblib.load(pickleFile)
29
  except:
30
  # If joblib loading fails, fallback to TensorFlow loading
31
- return load_model('my_seq_model (1)')
 
32
  else:
33
  raise ValueError("Invalid model selection")
34
 
35
- # Prepare categorical data (same as original code)
36
  categorical_cols = data[[
37
  'certifications',
38
  'workshops',
@@ -47,7 +247,7 @@ for i in categorical_cols:
47
  data[i] = data[i].astype('category')
48
  data[i] = data[i].cat.codes
49
 
50
- # Create reference dictionaries for embeddings (same as original code)
51
  def create_embedding_dict(column):
52
  unique_names = list(categorical_cols[column].unique())
53
  unique_codes = list(data[column].unique())
@@ -60,7 +260,7 @@ career_interest_references = create_embedding_dict('interested career area ')
60
  company_intends_references = create_embedding_dict('Type of company want to settle in?')
61
  book_interest_references = create_embedding_dict('Interested Type of Books')
62
 
63
- # Prediction function (modified to accept model choice)
64
  def rfprediction(model_choice, name, logical_thinking, hackathon_attend, coding_skills, public_speaking_skills,
65
  self_learning, extra_course, certificate_code, worskhop_code, read_writing_skill, memory_capability,
66
  subject_interest, career_interest, company_intend, senior_elder_advise, book_interest, introvert_extro,
@@ -68,7 +268,7 @@ def rfprediction(model_choice, name, logical_thinking, hackathon_attend, coding_
68
  # Load the selected model
69
  rfmodel = load_model(model_choice)
70
 
71
- # Create DataFrame (same as original code)
72
  df = pd.DataFrame.from_dict(
73
  {
74
  "logical_thinking": [logical_thinking],
@@ -107,7 +307,7 @@ def rfprediction(model_choice, name, logical_thinking, hackathon_attend, coding_
107
  "book_interest": book_interest_references
108
  })
109
 
110
- # Dummy encoding (same as original code)
111
  userdata_list = df.values.tolist()
112
 
113
  # Management-Technical dummy encoding
@@ -134,27 +334,29 @@ def rfprediction(model_choice, name, logical_thinking, hackathon_attend, coding_
134
  else:
135
  return "Error in Smart-Hard worker encoding"
136
 
137
- # Prediction
138
- prediction_result_all = rfmodel.predict_proba(userdata_list)
 
 
 
139
 
140
  # Create result dictionary
141
- result_list = {
142
- "Applications Developer": float(prediction_result_all[0][0]),
143
- "CRM Technical Developer": float(prediction_result_all[0][1]),
144
- "Database Developer": float(prediction_result_all[0][2]),
145
- "Mobile Applications Developer": float(prediction_result_all[0][3]),
146
- "Network Security Engineer": float(prediction_result_all[0][4]),
147
- "Software Developer": float(prediction_result_all[0][5]),
148
- "Software Engineer": float(prediction_result_all[0][6]),
149
- "Software Quality Assurance (QA)/ Testing": float(prediction_result_all[0][7]),
150
- "Systems Security Administrator": float(prediction_result_all[0][8]),
151
- "Technical Support": float(prediction_result_all[0][9]),
152
- "UX Designer": float(prediction_result_all[0][10]),
153
- "Web Developer": float(prediction_result_all[0][11]),
154
- }
155
  return result_list
156
 
157
- # Lists for dropdown menus (same as original code)
158
  cert_list = ["app development", "distro making", "full stack", "hadoop", "information security", "machine learning", "python", "r programming", "shell programming"]
159
  workshop_list = ["cloud computing", "data science", "database security", "game development", "hacking", "system designing", "testing", "web technologies"]
160
  skill = ["excellent", "medium", "poor"]
@@ -165,7 +367,7 @@ book_list = ["Action and Adventure", "Anthology", "Art", "Autobiographies", "Bio
165
  Choice_list = ["Management", "Technical"]
166
  worker_list = ["hard worker", "smart worker"]
167
 
168
- # Create Gradio interface (modified to include model selection)
169
  demo = gr.Interface(
170
  fn=rfprediction,
171
  inputs=[
@@ -199,4 +401,3 @@ demo = gr.Interface(
199
  # Main execution
200
  if __name__ == "__main__":
201
  demo.launch(share=True)
202
-
 
1
 
2
 
3
 
4
+ # import gradio as gr
5
+ # import pandas as pd
6
+ # import numpy as np
7
+ # import pickle
8
+ # import sklearn
9
+ # from datasets import load_dataset
10
+ # import joblib
11
+
12
+
13
+ # # Read the data
14
+ # data = pd.read_csv("mldata.csv")
15
+
16
+ # # Function to load model based on selection
17
+ # def load_model(model_choice):
18
+ # if model_choice == "Random Forest":
19
+ # with open('rfweights (1).pkl', 'rb') as pickleFile:
20
+ # return pickle.load(pickleFile)
21
+ # elif model_choice == "Decision Tree":
22
+ # with open('dtreeweights.pkl', 'rb') as pickleFile:
23
+ # return pickle.load(pickleFile)
24
+ # elif model_choice == "Sequential Model":
25
+ # try:
26
+ # # Try loading the Sequential model saved using joblib
27
+ # with open('my_seq_model_second.pkl', 'rb') as pickleFile:
28
+ # return joblib.load(pickleFile)
29
+ # except:
30
+ # # If joblib loading fails, fallback to TensorFlow loading
31
+ # return load_model('my_seq_model (1)')
32
+ # else:
33
+ # raise ValueError("Invalid model selection")
34
+
35
+ # # Prepare categorical data (same as original code)
36
+ # categorical_cols = data[[
37
+ # 'certifications',
38
+ # 'workshops',
39
+ # 'Interested subjects',
40
+ # 'interested career area ',
41
+ # 'Type of company want to settle in?',
42
+ # 'Interested Type of Books'
43
+ # ]]
44
+
45
+ # # Assign category codes
46
+ # for i in categorical_cols:
47
+ # data[i] = data[i].astype('category')
48
+ # data[i] = data[i].cat.codes
49
+
50
+ # # Create reference dictionaries for embeddings (same as original code)
51
+ # def create_embedding_dict(column):
52
+ # unique_names = list(categorical_cols[column].unique())
53
+ # unique_codes = list(data[column].unique())
54
+ # return dict(zip(unique_names, unique_codes))
55
+
56
+ # certificates_references = create_embedding_dict('certifications')
57
+ # workshop_references = create_embedding_dict('workshops')
58
+ # subjects_interest_references = create_embedding_dict('Interested subjects')
59
+ # career_interest_references = create_embedding_dict('interested career area ')
60
+ # company_intends_references = create_embedding_dict('Type of company want to settle in?')
61
+ # book_interest_references = create_embedding_dict('Interested Type of Books')
62
+
63
+ # # Prediction function (modified to accept model choice)
64
+ # def rfprediction(model_choice, name, logical_thinking, hackathon_attend, coding_skills, public_speaking_skills,
65
+ # self_learning, extra_course, certificate_code, worskhop_code, read_writing_skill, memory_capability,
66
+ # subject_interest, career_interest, company_intend, senior_elder_advise, book_interest, introvert_extro,
67
+ # team_player, management_technical, smart_hardworker):
68
+ # # Load the selected model
69
+ # rfmodel = load_model(model_choice)
70
+
71
+ # # Create DataFrame (same as original code)
72
+ # df = pd.DataFrame.from_dict(
73
+ # {
74
+ # "logical_thinking": [logical_thinking],
75
+ # "hackathon_attend": [hackathon_attend],
76
+ # "coding_skills": [coding_skills],
77
+ # "public_speaking_skills": [public_speaking_skills],
78
+ # "self_learning": [self_learning],
79
+ # "extra_course": [extra_course],
80
+ # "certificate": [certificate_code],
81
+ # "workshop": [worskhop_code],
82
+ # "read_writing_skills": [
83
+ # (0 if "poor" in read_writing_skill else 1 if "medium" in read_writing_skill else 2)
84
+ # ],
85
+ # "memory_capability": [
86
+ # (0 if "poor" in memory_capability else 1 if "medium" in memory_capability else 2)
87
+ # ],
88
+ # "subject_interest": [subject_interest],
89
+ # "career_interest": [career_interest],
90
+ # "company_intend": [company_intend],
91
+ # "senior_elder_advise": [senior_elder_advise],
92
+ # "book_interest": [book_interest],
93
+ # "introvert_extro": [introvert_extro],
94
+ # "team_player": [team_player],
95
+ # "management_technical":[management_technical],
96
+ # "smart_hardworker": [smart_hardworker]
97
+ # }
98
+ # )
99
+
100
+ # # Replace string values with numeric representations
101
+ # df = df.replace({
102
+ # "certificate": certificates_references,
103
+ # "workshop": workshop_references,
104
+ # "subject_interest": subjects_interest_references,
105
+ # "career_interest": career_interest_references,
106
+ # "company_intend": company_intends_references,
107
+ # "book_interest": book_interest_references
108
+ # })
109
+
110
+ # # Dummy encoding (same as original code)
111
+ # userdata_list = df.values.tolist()
112
+
113
+ # # Management-Technical dummy encoding
114
+ # if(df["management_technical"].values == "Management"):
115
+ # userdata_list[0].extend([1])
116
+ # userdata_list[0].extend([0])
117
+ # userdata_list[0].remove('Management')
118
+ # elif(df["management_technical"].values == "Technical"):
119
+ # userdata_list[0].extend([0])
120
+ # userdata_list[0].extend([1])
121
+ # userdata_list[0].remove('Technical')
122
+ # else:
123
+ # return "Error in Management-Technical encoding"
124
+
125
+ # # Smart-Hard worker dummy encoding
126
+ # if(df["smart_hardworker"].values == "smart worker"):
127
+ # userdata_list[0].extend([1])
128
+ # userdata_list[0].extend([0])
129
+ # userdata_list[0].remove('smart worker')
130
+ # elif(df["smart_hardworker"].values == "hard worker"):
131
+ # userdata_list[0].extend([0])
132
+ # userdata_list[0].extend([1])
133
+ # userdata_list[0].remove('hard worker')
134
+ # else:
135
+ # return "Error in Smart-Hard worker encoding"
136
+
137
+ # # Prediction
138
+ # prediction_result_all = rfmodel.predict_proba(userdata_list)
139
+
140
+ # # Create result dictionary
141
+ # result_list = {
142
+ # "Applications Developer": float(prediction_result_all[0][0]),
143
+ # "CRM Technical Developer": float(prediction_result_all[0][1]),
144
+ # "Database Developer": float(prediction_result_all[0][2]),
145
+ # "Mobile Applications Developer": float(prediction_result_all[0][3]),
146
+ # "Network Security Engineer": float(prediction_result_all[0][4]),
147
+ # "Software Developer": float(prediction_result_all[0][5]),
148
+ # "Software Engineer": float(prediction_result_all[0][6]),
149
+ # "Software Quality Assurance (QA)/ Testing": float(prediction_result_all[0][7]),
150
+ # "Systems Security Administrator": float(prediction_result_all[0][8]),
151
+ # "Technical Support": float(prediction_result_all[0][9]),
152
+ # "UX Designer": float(prediction_result_all[0][10]),
153
+ # "Web Developer": float(prediction_result_all[0][11]),
154
+ # }
155
+ # return result_list
156
+
157
+ # # Lists for dropdown menus (same as original code)
158
+ # cert_list = ["app development", "distro making", "full stack", "hadoop", "information security", "machine learning", "python", "r programming", "shell programming"]
159
+ # workshop_list = ["cloud computing", "data science", "database security", "game development", "hacking", "system designing", "testing", "web technologies"]
160
+ # skill = ["excellent", "medium", "poor"]
161
+ # subject_list = ["cloud computing", "Computer Architecture", "data engineering", "hacking", "IOT", "Management", "networks", "parallel computing", "programming", "Software Engineering"]
162
+ # career_list = ["Business process analyst", "cloud computing", "developer", "security", "system developer", "testing"]
163
+ # company_list = ["BPA", "Cloud Services", "Finance", "Product based", "product development", "SAaS services", "Sales and Marketing", "Service Based", "Testing and Maintainance Services", "Web Services"]
164
+ # book_list = ["Action and Adventure", "Anthology", "Art", "Autobiographies", "Biographies", "Childrens", "Comics","Cookbooks","Diaries","Dictionaries","Drama","Encyclopedias","Fantasy","Guide","Health","History","Horror","Journals","Math","Mystery","Poetry","Prayer books","Religion-Spirituality","Romance","Satire","Science","Science fiction","Self help","Series","Travel","Trilogy"]
165
+ # Choice_list = ["Management", "Technical"]
166
+ # worker_list = ["hard worker", "smart worker"]
167
+
168
+ # # Create Gradio interface (modified to include model selection)
169
+ # demo = gr.Interface(
170
+ # fn=rfprediction,
171
+ # inputs=[
172
+ # gr.Dropdown(["Random Forest", "Decision Tree","Sequential Model"], label="Select Machine Learning Model"),
173
+ # gr.Textbox(placeholder="What is your name?", label="Name"),
174
+ # gr.Slider(minimum=1, maximum=9, value=3, step=1, label="Are you a logical thinking person?", info="Scale: 1 - 9"),
175
+ # gr.Slider(minimum=0, maximum=6, value=0, step=1, label="Do you attend any Hackathons?", info="Scale: 0 - 6 | 0 - if not attended any"),
176
+ # gr.Slider(minimum=1, maximum=9, value=5, step=1, label="How do you rate your coding skills?", info="Scale: 1 - 9"),
177
+ # gr.Slider(minimum=1, maximum=9, value=3, step=1, label="How do you rate your public speaking skills/confidency?", info="Scale: 1 - 9"),
178
+ # gr.Radio({"Yes", "No"}, type="index", label="Are you a self-learning person? *"),
179
+ # gr.Radio({"Yes", "No"}, type="index", label="Do you take extra courses in uni (other than IT)? *"),
180
+ # gr.Dropdown(cert_list, label="Select a certificate you took!"),
181
+ # gr.Dropdown(workshop_list, label="Select a workshop you attended!"),
182
+ # gr.Dropdown(skill, label="Select your read and writing skill"),
183
+ # gr.Dropdown(skill, label="Is your memory capability good?"),
184
+ # gr.Dropdown(subject_list, label="What subject you are interested in?"),
185
+ # gr.Dropdown(career_list, label="Which IT-Career do you have interests in?"),
186
+ # gr.Dropdown(company_list, label="Do you have any interested company that you intend to settle in?"),
187
+ # gr.Radio({"Yes", "No"}, type="index", label="Do you ever seek any advices from senior or elders? *"),
188
+ # gr.Dropdown(book_list, label="Select your interested genre of book!"),
189
+ # gr.Radio({"Yes", "No"}, type="index", label="Are you an Introvert?| No - extrovert *"),
190
+ # gr.Radio({"Yes", "No"}, type="index", label="Ever worked in a team? *"),
191
+ # gr.Dropdown(Choice_list, label="Which area do you prefer: Management or Technical?"),
192
+ # gr.Dropdown(worker_list, label="Are you a Smart worker or Hard worker?")
193
+ # ],
194
+ # outputs=gr.Label(num_top_classes=5),
195
+ # title="IT-Career Recommendation System: TMI4033 Colletive Intelligence, Group 12",
196
+ # description="Members: Derrick Lim Kin Yeap 74597, Jason Jong Sheng Tat 75125, Jason Ng Yong Xing 75127, Muhamad Hazrie Bin Suhkery 73555 "
197
+ # )
198
+
199
+ # # Main execution
200
+ # if __name__ == "__main__":
201
+ # demo.launch(share=True)
202
+
203
+
204
  import gradio as gr
205
  import pandas as pd
206
  import numpy as np
 
208
  import sklearn
209
  from datasets import load_dataset
210
  import joblib
211
+ import tensorflow as tf
212
 
213
  # Read the data
214
  data = pd.read_csv("mldata.csv")
 
224
  elif model_choice == "Sequential Model":
225
  try:
226
  # Try loading the Sequential model saved using joblib
227
+ model = joblib.load('my_seq_model_second.pkl')
 
228
  except:
229
  # If joblib loading fails, fallback to TensorFlow loading
230
+ model = tf.keras.models.load_model('my_seq_model (1)')
231
+ return model
232
  else:
233
  raise ValueError("Invalid model selection")
234
 
235
+ # Prepare categorical data
236
  categorical_cols = data[[
237
  'certifications',
238
  'workshops',
 
247
  data[i] = data[i].astype('category')
248
  data[i] = data[i].cat.codes
249
 
250
+ # Create reference dictionaries for embeddings
251
  def create_embedding_dict(column):
252
  unique_names = list(categorical_cols[column].unique())
253
  unique_codes = list(data[column].unique())
 
260
  company_intends_references = create_embedding_dict('Type of company want to settle in?')
261
  book_interest_references = create_embedding_dict('Interested Type of Books')
262
 
263
+ # Prediction function
264
  def rfprediction(model_choice, name, logical_thinking, hackathon_attend, coding_skills, public_speaking_skills,
265
  self_learning, extra_course, certificate_code, worskhop_code, read_writing_skill, memory_capability,
266
  subject_interest, career_interest, company_intend, senior_elder_advise, book_interest, introvert_extro,
 
268
  # Load the selected model
269
  rfmodel = load_model(model_choice)
270
 
271
+ # Create DataFrame
272
  df = pd.DataFrame.from_dict(
273
  {
274
  "logical_thinking": [logical_thinking],
 
307
  "book_interest": book_interest_references
308
  })
309
 
310
+ # Dummy encoding
311
  userdata_list = df.values.tolist()
312
 
313
  # Management-Technical dummy encoding
 
334
  else:
335
  return "Error in Smart-Hard worker encoding"
336
 
337
+ # Prediction handling for different model types
338
+ if model_choice in ["Random Forest", "Decision Tree"]:
339
+ prediction_result_all = rfmodel.predict_proba(userdata_list)
340
+ else: # Sequential Model (Keras)
341
+ prediction_result_all = rfmodel.predict(userdata_list)
342
 
343
  # Create result dictionary
344
+ careers = [
345
+ "Applications Developer", "CRM Technical Developer", "Database Developer",
346
+ "Mobile Applications Developer", "Network Security Engineer", "Software Developer",
347
+ "Software Engineer", "Software Quality Assurance (QA)/ Testing",
348
+ "Systems Security Administrator", "Technical Support", "UX Designer", "Web Developer"
349
+ ]
350
+
351
+ # Handle probability extraction based on model type
352
+ if model_choice in ["Random Forest", "Decision Tree"]:
353
+ result_list = {career: float(prediction_result_all[0][i]) for i, career in enumerate(careers)}
354
+ else: # Sequential Model
355
+ result_list = {career: float(prediction_result_all[0][i]) for i, career in enumerate(careers)}
356
+
 
357
  return result_list
358
 
359
+ # Lists for dropdown menus
360
  cert_list = ["app development", "distro making", "full stack", "hadoop", "information security", "machine learning", "python", "r programming", "shell programming"]
361
  workshop_list = ["cloud computing", "data science", "database security", "game development", "hacking", "system designing", "testing", "web technologies"]
362
  skill = ["excellent", "medium", "poor"]
 
367
  Choice_list = ["Management", "Technical"]
368
  worker_list = ["hard worker", "smart worker"]
369
 
370
+ # Create Gradio interface
371
  demo = gr.Interface(
372
  fn=rfprediction,
373
  inputs=[
 
401
  # Main execution
402
  if __name__ == "__main__":
403
  demo.launch(share=True)