ArchiMathur commited on
Commit
3289837
·
verified ·
1 Parent(s): 2f3b171

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -235
app.py CHANGED
@@ -1,206 +1,6 @@
1
 
2
 
3
 
4
- # import gradio as gr
5
- # import pandas as pd
6
- # import numpy as np
7
- # import pickle
8
- # import sklearn
9
- # from datasets import load_dataset
10
- # import joblib
11
-
12
-
13
- # # Read the data
14
- # data = pd.read_csv("mldata.csv")
15
-
16
- # # Function to load model based on selection
17
- # def load_model(model_choice):
18
- # if model_choice == "Random Forest":
19
- # with open('rfweights (1).pkl', 'rb') as pickleFile:
20
- # return pickle.load(pickleFile)
21
- # elif model_choice == "Decision Tree":
22
- # with open('dtreeweights.pkl', 'rb') as pickleFile:
23
- # return pickle.load(pickleFile)
24
- # elif model_choice == "Sequential Model":
25
- # try:
26
- # # Try loading the Sequential model saved using joblib
27
- # with open('my_seq_model_second.pkl', 'rb') as pickleFile:
28
- # return joblib.load(pickleFile)
29
- # except:
30
- # # If joblib loading fails, fallback to TensorFlow loading
31
- # return load_model('my_seq_model (1)')
32
- # else:
33
- # raise ValueError("Invalid model selection")
34
-
35
- # # Prepare categorical data (same as original code)
36
- # categorical_cols = data[[
37
- # 'certifications',
38
- # 'workshops',
39
- # 'Interested subjects',
40
- # 'interested career area ',
41
- # 'Type of company want to settle in?',
42
- # 'Interested Type of Books'
43
- # ]]
44
-
45
- # # Assign category codes
46
- # for i in categorical_cols:
47
- # data[i] = data[i].astype('category')
48
- # data[i] = data[i].cat.codes
49
-
50
- # # Create reference dictionaries for embeddings (same as original code)
51
- # def create_embedding_dict(column):
52
- # unique_names = list(categorical_cols[column].unique())
53
- # unique_codes = list(data[column].unique())
54
- # return dict(zip(unique_names, unique_codes))
55
-
56
- # certificates_references = create_embedding_dict('certifications')
57
- # workshop_references = create_embedding_dict('workshops')
58
- # subjects_interest_references = create_embedding_dict('Interested subjects')
59
- # career_interest_references = create_embedding_dict('interested career area ')
60
- # company_intends_references = create_embedding_dict('Type of company want to settle in?')
61
- # book_interest_references = create_embedding_dict('Interested Type of Books')
62
-
63
- # # Prediction function (modified to accept model choice)
64
- # def rfprediction(model_choice, name, logical_thinking, hackathon_attend, coding_skills, public_speaking_skills,
65
- # self_learning, extra_course, certificate_code, worskhop_code, read_writing_skill, memory_capability,
66
- # subject_interest, career_interest, company_intend, senior_elder_advise, book_interest, introvert_extro,
67
- # team_player, management_technical, smart_hardworker):
68
- # # Load the selected model
69
- # rfmodel = load_model(model_choice)
70
-
71
- # # Create DataFrame (same as original code)
72
- # df = pd.DataFrame.from_dict(
73
- # {
74
- # "logical_thinking": [logical_thinking],
75
- # "hackathon_attend": [hackathon_attend],
76
- # "coding_skills": [coding_skills],
77
- # "public_speaking_skills": [public_speaking_skills],
78
- # "self_learning": [self_learning],
79
- # "extra_course": [extra_course],
80
- # "certificate": [certificate_code],
81
- # "workshop": [worskhop_code],
82
- # "read_writing_skills": [
83
- # (0 if "poor" in read_writing_skill else 1 if "medium" in read_writing_skill else 2)
84
- # ],
85
- # "memory_capability": [
86
- # (0 if "poor" in memory_capability else 1 if "medium" in memory_capability else 2)
87
- # ],
88
- # "subject_interest": [subject_interest],
89
- # "career_interest": [career_interest],
90
- # "company_intend": [company_intend],
91
- # "senior_elder_advise": [senior_elder_advise],
92
- # "book_interest": [book_interest],
93
- # "introvert_extro": [introvert_extro],
94
- # "team_player": [team_player],
95
- # "management_technical":[management_technical],
96
- # "smart_hardworker": [smart_hardworker]
97
- # }
98
- # )
99
-
100
- # # Replace string values with numeric representations
101
- # df = df.replace({
102
- # "certificate": certificates_references,
103
- # "workshop": workshop_references,
104
- # "subject_interest": subjects_interest_references,
105
- # "career_interest": career_interest_references,
106
- # "company_intend": company_intends_references,
107
- # "book_interest": book_interest_references
108
- # })
109
-
110
- # # Dummy encoding (same as original code)
111
- # userdata_list = df.values.tolist()
112
-
113
- # # Management-Technical dummy encoding
114
- # if(df["management_technical"].values == "Management"):
115
- # userdata_list[0].extend([1])
116
- # userdata_list[0].extend([0])
117
- # userdata_list[0].remove('Management')
118
- # elif(df["management_technical"].values == "Technical"):
119
- # userdata_list[0].extend([0])
120
- # userdata_list[0].extend([1])
121
- # userdata_list[0].remove('Technical')
122
- # else:
123
- # return "Error in Management-Technical encoding"
124
-
125
- # # Smart-Hard worker dummy encoding
126
- # if(df["smart_hardworker"].values == "smart worker"):
127
- # userdata_list[0].extend([1])
128
- # userdata_list[0].extend([0])
129
- # userdata_list[0].remove('smart worker')
130
- # elif(df["smart_hardworker"].values == "hard worker"):
131
- # userdata_list[0].extend([0])
132
- # userdata_list[0].extend([1])
133
- # userdata_list[0].remove('hard worker')
134
- # else:
135
- # return "Error in Smart-Hard worker encoding"
136
-
137
- # # Prediction
138
- # prediction_result_all = rfmodel.predict_proba(userdata_list)
139
-
140
- # # Create result dictionary
141
- # result_list = {
142
- # "Applications Developer": float(prediction_result_all[0][0]),
143
- # "CRM Technical Developer": float(prediction_result_all[0][1]),
144
- # "Database Developer": float(prediction_result_all[0][2]),
145
- # "Mobile Applications Developer": float(prediction_result_all[0][3]),
146
- # "Network Security Engineer": float(prediction_result_all[0][4]),
147
- # "Software Developer": float(prediction_result_all[0][5]),
148
- # "Software Engineer": float(prediction_result_all[0][6]),
149
- # "Software Quality Assurance (QA)/ Testing": float(prediction_result_all[0][7]),
150
- # "Systems Security Administrator": float(prediction_result_all[0][8]),
151
- # "Technical Support": float(prediction_result_all[0][9]),
152
- # "UX Designer": float(prediction_result_all[0][10]),
153
- # "Web Developer": float(prediction_result_all[0][11]),
154
- # }
155
- # return result_list
156
-
157
- # # Lists for dropdown menus (same as original code)
158
- # cert_list = ["app development", "distro making", "full stack", "hadoop", "information security", "machine learning", "python", "r programming", "shell programming"]
159
- # workshop_list = ["cloud computing", "data science", "database security", "game development", "hacking", "system designing", "testing", "web technologies"]
160
- # skill = ["excellent", "medium", "poor"]
161
- # subject_list = ["cloud computing", "Computer Architecture", "data engineering", "hacking", "IOT", "Management", "networks", "parallel computing", "programming", "Software Engineering"]
162
- # career_list = ["Business process analyst", "cloud computing", "developer", "security", "system developer", "testing"]
163
- # company_list = ["BPA", "Cloud Services", "Finance", "Product based", "product development", "SAaS services", "Sales and Marketing", "Service Based", "Testing and Maintainance Services", "Web Services"]
164
- # book_list = ["Action and Adventure", "Anthology", "Art", "Autobiographies", "Biographies", "Childrens", "Comics","Cookbooks","Diaries","Dictionaries","Drama","Encyclopedias","Fantasy","Guide","Health","History","Horror","Journals","Math","Mystery","Poetry","Prayer books","Religion-Spirituality","Romance","Satire","Science","Science fiction","Self help","Series","Travel","Trilogy"]
165
- # Choice_list = ["Management", "Technical"]
166
- # worker_list = ["hard worker", "smart worker"]
167
-
168
- # # Create Gradio interface (modified to include model selection)
169
- # demo = gr.Interface(
170
- # fn=rfprediction,
171
- # inputs=[
172
- # gr.Dropdown(["Random Forest", "Decision Tree","Sequential Model"], label="Select Machine Learning Model"),
173
- # gr.Textbox(placeholder="What is your name?", label="Name"),
174
- # gr.Slider(minimum=1, maximum=9, value=3, step=1, label="Are you a logical thinking person?", info="Scale: 1 - 9"),
175
- # gr.Slider(minimum=0, maximum=6, value=0, step=1, label="Do you attend any Hackathons?", info="Scale: 0 - 6 | 0 - if not attended any"),
176
- # gr.Slider(minimum=1, maximum=9, value=5, step=1, label="How do you rate your coding skills?", info="Scale: 1 - 9"),
177
- # gr.Slider(minimum=1, maximum=9, value=3, step=1, label="How do you rate your public speaking skills/confidency?", info="Scale: 1 - 9"),
178
- # gr.Radio({"Yes", "No"}, type="index", label="Are you a self-learning person? *"),
179
- # gr.Radio({"Yes", "No"}, type="index", label="Do you take extra courses in uni (other than IT)? *"),
180
- # gr.Dropdown(cert_list, label="Select a certificate you took!"),
181
- # gr.Dropdown(workshop_list, label="Select a workshop you attended!"),
182
- # gr.Dropdown(skill, label="Select your read and writing skill"),
183
- # gr.Dropdown(skill, label="Is your memory capability good?"),
184
- # gr.Dropdown(subject_list, label="What subject you are interested in?"),
185
- # gr.Dropdown(career_list, label="Which IT-Career do you have interests in?"),
186
- # gr.Dropdown(company_list, label="Do you have any interested company that you intend to settle in?"),
187
- # gr.Radio({"Yes", "No"}, type="index", label="Do you ever seek any advices from senior or elders? *"),
188
- # gr.Dropdown(book_list, label="Select your interested genre of book!"),
189
- # gr.Radio({"Yes", "No"}, type="index", label="Are you an Introvert?| No - extrovert *"),
190
- # gr.Radio({"Yes", "No"}, type="index", label="Ever worked in a team? *"),
191
- # gr.Dropdown(Choice_list, label="Which area do you prefer: Management or Technical?"),
192
- # gr.Dropdown(worker_list, label="Are you a Smart worker or Hard worker?")
193
- # ],
194
- # outputs=gr.Label(num_top_classes=5),
195
- # title="IT-Career Recommendation System: TMI4033 Colletive Intelligence, Group 12",
196
- # description="Members: Derrick Lim Kin Yeap 74597, Jason Jong Sheng Tat 75125, Jason Ng Yong Xing 75127, Muhamad Hazrie Bin Suhkery 73555 "
197
- # )
198
-
199
- # # Main execution
200
- # if __name__ == "__main__":
201
- # demo.launch(share=True)
202
-
203
-
204
  import gradio as gr
205
  import pandas as pd
206
  import numpy as np
@@ -208,7 +8,7 @@ import pickle
208
  import sklearn
209
  from datasets import load_dataset
210
  import joblib
211
- import tensorflow as tf
212
 
213
  # Read the data
214
  data = pd.read_csv("mldata.csv")
@@ -221,18 +21,11 @@ def load_model(model_choice):
221
  elif model_choice == "Decision Tree":
222
  with open('dtreeweights.pkl', 'rb') as pickleFile:
223
  return pickle.load(pickleFile)
224
- elif model_choice == "Sequential Model":
225
- try:
226
- # Try loading the Sequential model saved using joblib
227
- model = joblib.load('my_seq_model_second.pkl')
228
- except:
229
- # If joblib loading fails, fallback to TensorFlow loading
230
- model = tf.keras.models.load_model('my_seq_model (1)')
231
- return model
232
  else:
233
  raise ValueError("Invalid model selection")
234
 
235
- # Prepare categorical data
236
  categorical_cols = data[[
237
  'certifications',
238
  'workshops',
@@ -247,7 +40,7 @@ for i in categorical_cols:
247
  data[i] = data[i].astype('category')
248
  data[i] = data[i].cat.codes
249
 
250
- # Create reference dictionaries for embeddings
251
  def create_embedding_dict(column):
252
  unique_names = list(categorical_cols[column].unique())
253
  unique_codes = list(data[column].unique())
@@ -260,7 +53,7 @@ career_interest_references = create_embedding_dict('interested career area ')
260
  company_intends_references = create_embedding_dict('Type of company want to settle in?')
261
  book_interest_references = create_embedding_dict('Interested Type of Books')
262
 
263
- # Prediction function
264
  def rfprediction(model_choice, name, logical_thinking, hackathon_attend, coding_skills, public_speaking_skills,
265
  self_learning, extra_course, certificate_code, worskhop_code, read_writing_skill, memory_capability,
266
  subject_interest, career_interest, company_intend, senior_elder_advise, book_interest, introvert_extro,
@@ -268,7 +61,7 @@ def rfprediction(model_choice, name, logical_thinking, hackathon_attend, coding_
268
  # Load the selected model
269
  rfmodel = load_model(model_choice)
270
 
271
- # Create DataFrame
272
  df = pd.DataFrame.from_dict(
273
  {
274
  "logical_thinking": [logical_thinking],
@@ -307,7 +100,7 @@ def rfprediction(model_choice, name, logical_thinking, hackathon_attend, coding_
307
  "book_interest": book_interest_references
308
  })
309
 
310
- # Dummy encoding
311
  userdata_list = df.values.tolist()
312
 
313
  # Management-Technical dummy encoding
@@ -334,29 +127,27 @@ def rfprediction(model_choice, name, logical_thinking, hackathon_attend, coding_
334
  else:
335
  return "Error in Smart-Hard worker encoding"
336
 
337
- # Prediction handling for different model types
338
- if model_choice in ["Random Forest", "Decision Tree"]:
339
- prediction_result_all = rfmodel.predict_proba(userdata_list)
340
- else: # Sequential Model (Keras)
341
- prediction_result_all = rfmodel.predict(userdata_list)
342
 
343
  # Create result dictionary
344
- careers = [
345
- "Applications Developer", "CRM Technical Developer", "Database Developer",
346
- "Mobile Applications Developer", "Network Security Engineer", "Software Developer",
347
- "Software Engineer", "Software Quality Assurance (QA)/ Testing",
348
- "Systems Security Administrator", "Technical Support", "UX Designer", "Web Developer"
349
- ]
350
-
351
- # Handle probability extraction based on model type
352
- if model_choice in ["Random Forest", "Decision Tree"]:
353
- result_list = {career: float(prediction_result_all[0][i]) for i, career in enumerate(careers)}
354
- else: # Sequential Model
355
- result_list = {career: float(prediction_result_all[0][i]) for i, career in enumerate(careers)}
356
-
 
357
  return result_list
358
 
359
- # Lists for dropdown menus
360
  cert_list = ["app development", "distro making", "full stack", "hadoop", "information security", "machine learning", "python", "r programming", "shell programming"]
361
  workshop_list = ["cloud computing", "data science", "database security", "game development", "hacking", "system designing", "testing", "web technologies"]
362
  skill = ["excellent", "medium", "poor"]
@@ -367,11 +158,11 @@ book_list = ["Action and Adventure", "Anthology", "Art", "Autobiographies", "Bio
367
  Choice_list = ["Management", "Technical"]
368
  worker_list = ["hard worker", "smart worker"]
369
 
370
- # Create Gradio interface
371
  demo = gr.Interface(
372
  fn=rfprediction,
373
  inputs=[
374
- gr.Dropdown(["Random Forest", "Decision Tree","Sequential Model"], label="Select Machine Learning Model"),
375
  gr.Textbox(placeholder="What is your name?", label="Name"),
376
  gr.Slider(minimum=1, maximum=9, value=3, step=1, label="Are you a logical thinking person?", info="Scale: 1 - 9"),
377
  gr.Slider(minimum=0, maximum=6, value=0, step=1, label="Do you attend any Hackathons?", info="Scale: 0 - 6 | 0 - if not attended any"),
@@ -401,3 +192,5 @@ demo = gr.Interface(
401
  # Main execution
402
  if __name__ == "__main__":
403
  demo.launch(share=True)
 
 
 
1
 
2
 
3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  import gradio as gr
5
  import pandas as pd
6
  import numpy as np
 
8
  import sklearn
9
  from datasets import load_dataset
10
  import joblib
11
+
12
 
13
  # Read the data
14
  data = pd.read_csv("mldata.csv")
 
21
  elif model_choice == "Decision Tree":
22
  with open('dtreeweights.pkl', 'rb') as pickleFile:
23
  return pickle.load(pickleFile)
24
+
 
 
 
 
 
 
 
25
  else:
26
  raise ValueError("Invalid model selection")
27
 
28
+ # Prepare categorical data (same as original code)
29
  categorical_cols = data[[
30
  'certifications',
31
  'workshops',
 
40
  data[i] = data[i].astype('category')
41
  data[i] = data[i].cat.codes
42
 
43
+ # Create reference dictionaries for embeddings (same as original code)
44
  def create_embedding_dict(column):
45
  unique_names = list(categorical_cols[column].unique())
46
  unique_codes = list(data[column].unique())
 
53
  company_intends_references = create_embedding_dict('Type of company want to settle in?')
54
  book_interest_references = create_embedding_dict('Interested Type of Books')
55
 
56
+ # Prediction function (modified to accept model choice)
57
  def rfprediction(model_choice, name, logical_thinking, hackathon_attend, coding_skills, public_speaking_skills,
58
  self_learning, extra_course, certificate_code, worskhop_code, read_writing_skill, memory_capability,
59
  subject_interest, career_interest, company_intend, senior_elder_advise, book_interest, introvert_extro,
 
61
  # Load the selected model
62
  rfmodel = load_model(model_choice)
63
 
64
+ # Create DataFrame (same as original code)
65
  df = pd.DataFrame.from_dict(
66
  {
67
  "logical_thinking": [logical_thinking],
 
100
  "book_interest": book_interest_references
101
  })
102
 
103
+ # Dummy encoding (same as original code)
104
  userdata_list = df.values.tolist()
105
 
106
  # Management-Technical dummy encoding
 
127
  else:
128
  return "Error in Smart-Hard worker encoding"
129
 
130
+ # Prediction
131
+ prediction_result_all = rfmodel.predict_proba(userdata_list)
 
 
 
132
 
133
  # Create result dictionary
134
+ result_list = {
135
+ "Applications Developer": float(prediction_result_all[0][0]),
136
+ "CRM Technical Developer": float(prediction_result_all[0][1]),
137
+ "Database Developer": float(prediction_result_all[0][2]),
138
+ "Mobile Applications Developer": float(prediction_result_all[0][3]),
139
+ "Network Security Engineer": float(prediction_result_all[0][4]),
140
+ "Software Developer": float(prediction_result_all[0][5]),
141
+ "Software Engineer": float(prediction_result_all[0][6]),
142
+ "Software Quality Assurance (QA)/ Testing": float(prediction_result_all[0][7]),
143
+ "Systems Security Administrator": float(prediction_result_all[0][8]),
144
+ "Technical Support": float(prediction_result_all[0][9]),
145
+ "UX Designer": float(prediction_result_all[0][10]),
146
+ "Web Developer": float(prediction_result_all[0][11]),
147
+ }
148
  return result_list
149
 
150
+ # Lists for dropdown menus (same as original code)
151
  cert_list = ["app development", "distro making", "full stack", "hadoop", "information security", "machine learning", "python", "r programming", "shell programming"]
152
  workshop_list = ["cloud computing", "data science", "database security", "game development", "hacking", "system designing", "testing", "web technologies"]
153
  skill = ["excellent", "medium", "poor"]
 
158
  Choice_list = ["Management", "Technical"]
159
  worker_list = ["hard worker", "smart worker"]
160
 
161
+ # Create Gradio interface (modified to include model selection)
162
  demo = gr.Interface(
163
  fn=rfprediction,
164
  inputs=[
165
+ gr.Dropdown(["Random Forest", "Decision Tree"], label="Select Machine Learning Model"),
166
  gr.Textbox(placeholder="What is your name?", label="Name"),
167
  gr.Slider(minimum=1, maximum=9, value=3, step=1, label="Are you a logical thinking person?", info="Scale: 1 - 9"),
168
  gr.Slider(minimum=0, maximum=6, value=0, step=1, label="Do you attend any Hackathons?", info="Scale: 0 - 6 | 0 - if not attended any"),
 
192
  # Main execution
193
  if __name__ == "__main__":
194
  demo.launch(share=True)
195
+
196
+