Spaces:

IAUAI
/

drop_prediction

Runtime error

App Files Files Community

Mustafa Al Hamad commited on Aug 15, 2022

Commit

d30b88c

1 Parent(s): b21b9f6

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -19

app.py CHANGED Viewed

@@ -29,6 +29,7 @@ from sklearn.feature_selection import SequentialFeatureSelector
 from sklearn.model_selection import GridSearchCV, StratifiedKFold
 import docx
 from docx.enum.dml import MSO_THEME_COLOR_INDEX
 def add_hyperlink(paragraph, text, url):
     # This gets access to the document.xml.rels file and gets a new relation id value
     part = paragraph.part
@@ -71,20 +72,10 @@ def savedoc(document,name):
 from sklearn.metrics import accuracy_score, confusion_matrix, roc_auc_score, roc_curve, cohen_kappa_score, f1_score, recall_score, precision_score
 def measures(predicted, y_test):
     accuracy = accuracy_score(y_test, predicted)
-    #print('Accuracy: %f' % accuracy)
     precision = precision_score(y_test, predicted)
-    #print('Precision: %f' % precision)
     recall = recall_score(y_test, predicted)
-    #print('Recall: %f' % recall)
     f1 = f1_score(y_test, predicted)
-    #print('F1 score: %f' % f1)
-    kappa = cohen_kappa_score(y_test, predicted)
-    #print('Cohens kappa: %f' % kappa)
-    auc = roc_auc_score(y_test, predicted)
-    #print('ROC AUC: %f' % auc)
     matrix = confusion_matrix(y_test, predicted)
-    #print('Confusion Matrix')
-    #print(matrix)
     return accuracy
 def greet(operation,filer):
@@ -94,8 +85,13 @@ def greet(operation,filer):
     coset = pd.read_csv(filer.name)
     coset = coset.dropna(how='any')
     document = Document('temp.docx')
     if operation == "retrain":
-      wanted = coset.drop(columns=['SUBJECT','SID','CRSE_ID','COURSE','ROLE','GPA','INPUT','STATUS','GRADUATION TERM','CLASS #','COLLEGE','COLLEGE.1'])
       def termize(x):
           if str(x)[-1] == "1":
             return 0
@@ -173,7 +169,6 @@ def greet(operation,filer):
       document.add_paragraph('This report consists of the models retraining information on the new dataset with ('+str(len(coset))+') records')
       records = []
-      #print(majors,catalog,acad_prog,instructor)
       X = wanted.drop(columns=['GRADE'])
       y = wanted['GRADE']
       smote = BorderlineSMOTE(random_state = 11)
@@ -190,8 +185,6 @@ def greet(operation,filer):
       for model in models1:
           model.fit(X_smote,y_smote)
           preds = cross_val_predict(model, X_smote.values,y_smote.values, cv=kf, n_jobs=-1,);
-          #print("------------AFTER SMOTE------------")
-          #print(model);
           metrics[model] = measures(preds,y_smote.values)
           records.append(((str(type(model).__name__),str(metrics[model]))))
       document.add_paragraph(' ')
@@ -219,6 +212,16 @@ def greet(operation,filer):
       model = max(metrics, key=metrics.get)
       number = number + 1
       filename = str(number)+"="+type(model).__name__+'='+str(acc)+'.sav'
       document.add_paragraph(" ")
       document.add_paragraph(type(model).__name__+' has been chosen as the prediction model for achieving an accuracy of '+str(acc)+'%')
       pickle.dump(model, open(filename, 'wb'))
@@ -228,10 +231,13 @@ def greet(operation,filer):
       savedoc(document,'retraining_report.docx')
       #document.save('retraining_report.docx')
       return 'retraining_report.docx',str(type(model).__name__+' has been chosen as the prediction model for achieving an accuracy of '+str(acc)+'%')
-    majors = ['CIS', 'CS', 'CYS', 'AI']
-    catalog = ['222','223','220','251','252','311','313','314','301','413','315','321','322','325','290','310','411','412','414','415','425','421','422','423','424','444','511','512','513','516','524','521','522','517','162','102','132','122P','211','212','151','152','221','207','401','526','273','274','515','262','272','232','302','312','341','352','333','525','523','112','231','241','202','242','514','142','201','111','131','121','141','271','203','205','208','122','101','402','403','404','407','408','410','409','406','507','534','433','501','502','503','529','506','508','416','417','326','320','426','518','101N','207N','162N','142N','111N','351','341R','311R','361R','322R','332R','342R','312R','551','204','206','210','504','560','209']
-    acad_prog = ['CISFD', 'CSCFD', 'CSC2D', 'CSCMD', 'CIS2D', 'CISMD', 'CIS1D', 'CDFFD', 'CIS6D', 'CIS5D', 'AICFD', 'CDFMD', 'AICMD']
-    instructor = [2235186531, 2165408699, 2145689266, 2135152745, 2194771260, 4883031977, 4920301432, 4913765767, 4920307238, 2139595378, 4920306734, 4890602415, 2231227000, 2129908065, 4920014122, 2130269265, 4852562299, 4956513533, 4992319499, 5021161517, 5001698233, 2156812428, 4822287888, 4920021772, 2185381526, 2136566892, 2198726144, 2136354172, 4921298316, 4953758866, 2129220549, 4954412399, 2137607920, 2273533376, 5003490316, 4925121550, 2137442755, 5007956497, 5032808504, 5043174000, 5012205890, 5048802701, 5007934582, 4965553369, 2193108841, 5034375380, 5018036932, 4775083712, 2177540220, 2190446772, 2281161940, 5042841852, 2194950255, 2253073385, 2279818469, 2280775317, 5037751586, 2226897545, 2220612254, 2270461075, 5002842423, 2174788639, 2201033491, 4998734283, 5048351213, 2184587834, 2241953656, 2247975430, 2279031534, 2280417257, 5048798913, 4843544176, 4840600404, 4880228474, 4844692118, 4810116057, 4841018244, 4857639249, 4875844031, 4308059859, 4955997289, 4565364037, 4920304031, 4935399429, 4964818982, 4960094672, 4956759643, 5006769072, 5003282600, 5034374855, 4965383274, 5048176530, 2251581958, 2177906213, 5042839625, 4823432797, 2234984479, 4878860232, 5089909144, 5111504685, 2292511023, 5147608189, 5102823255, 4471442437, 4603727617, 2198695213, 4624163190, 4312363731, 4718537496, 4883071390, 4741891814, 4603562365, 4623599851, 4808444621, 2175746189, 5042827891, 2299645941, 2139937238, 2159825301, 2193768846, 2244083117, 2204670572, 5148738831, 2132105991, 2140252153, 2191852492, 5149923257, 2133982707, 2188005486, 4912215897, 4948758684, 2145585515, 2211124646, 5046795282, 4744286056, 2248064163, 2245249731, 2190958765, 2286796460, 2305826215, 2141977896, 4723166278, 4742067952, 2144199492, 4701573536, 1385090162, 2245184895, 4721381658, 4808446202, 2152399610, 2159483449, 5106671984, 2157858618, 2287537336, 5205981215, 5131991031, 2232494366, 2242364376, 5091334876, 2248831606, 2259498731, 4960068758, 4964159309, 2274397962, 5003281896, 4917452237, 4955999572, 5003283211, 4980137016, 2170441656, 2259290137, 2272914009, 5149823043, 2129319034, 2190674916, 2303226693, 4959653529, 4955997870, 4962271367, 4965557457, 2317276819, 5043313315, 2343812773, 2225658147, 4959653691, 5106519327, 4967680674, 5005320761, 5005325659, 4960818335, 4959654127, 5002482818, 2226462785, 4958613075, 2153112707, 2306001696, 4925123179, 4916649578, 5149821042, 5154843441, 4351078655, 5227475626, 2187240364, 4962564241, 4975046019, 4959653674, 2176022109, 5020110780, 5006958981, 2225580216, 4987694365, 2267908548, 5191887278, 5192897851, 2310970719, 5006959475, 4958853885, 5009301759, 4962923880, 2202947881, 2266040432, 2244188415, 4960824601, 4958615055, 5096908433, 4414909094, 5106126173, 4880689344, 5125473603, 4638090996, 5190881427, 2260909364, 2177690056, 2164903703, 4987812801, 4958336415, 5003281669, 2334889939, 2187509735, 5225189005, 5005319730, 5007932166, 5008877090, 5003282196, 5014498902, 2138244238, 4659359826, 5148593633, 4604742805, 5010986266, 2273306536, 5011501597, 5150339186, 2247605157, 5008876391, 2301035418, 4958859692, 5009299880, 5003282356, 2135224245, 5164774288, 2355004193, 5149822626, 4962270246, 4954819395, 4958337068, 5094362869, 5005518168, 2315957042, 5006712625, 4318114771, 4978669968, 2267320079, 2328816035, 4471894179, 4931513752, 2254309220, 2190729464, 2273848680, 2132597263, 4712730199, 2279529530, 4585763023, 5045439236, 5039097585, 4914790972, 4998903923, 5010722011, 5033545452, 2189136713, 4925386944, 2246753860, 4967484918, 5010380619, 4918583022, 5167824846, 5112272457, 2256071967, 4551315881, 471199705, 4841645734, 4989400226, 4782685963, 4303177714, 4333866027, 5019192050, 5001246698, 2280149457, 4828320414, 5002953984, 2247295594, 5049403173, 2297916846, 4981949621, 4915644607, 4886002071, 4909665968, 4964343999, 2282147596, 5025351391, 5150333690, 5046499865, 2313480256, 2306057218, 2141728698, 4652689317, 5049210369, 4962261171, 5002946072, 4962925881, 5048801917, 4968282016, 5049210318, 4861313860, 2133940646, 5001245645, 5006151762, 5046795038, 5177326457, 4982758753, 4969506927, 4992345875, 5005319130, 4998344325, 4609286316, 4678011207, 5005205144, 2321632258, 2226184565, 4958335677, 4917071087, 4972185195, 5006955685, 5048800825, 5050532324, 4961680022, 5010706396, 2172272804, 4810952038, 5043134411, 5005320504, 5102169082, 4720186163, 2231386959, 5113419929, 4780192735, 4522246666, 4920006733, 2224456244, 5091183270, 4961080679, 5002293420, 4962576371, 4565120209, 4625705830, 2248884319, 5002574919, 5097813375, 2146039868, 2217503978, 2127283768, 4498752851, 4962267858, 2134885018, 4878546760, 5150325161, 2153866729, 5000093139, 5004630616, 5010746243, 5003488865, 2252281795, 2225864599, 4964158067, 5147734960, 2136924210, 2190891107, 5161862823, 5004177905, 5010382114, 5181025497, 8003128720, 4960708620, 2142197596, 5002952387, 4956934653, 5062687326, 4954413711, 2262570713, 4954411712, 5046642204, 5046666453, 4919033187, 5112369898, 5054270529, 4954413452, 5050400268, 5095301781, 4998935139, 5156370627, 5119649578, 5117621773, 5071143450, 5004457883, 5025389815, 5005396372, 5162503573, 4880588500, 5094691051, 5099020291, 5093233278, 2185589491, 5047982533, 5059111438, 2167629104, 4741380648, 5106051264, 5119649157, 2213909697, 2276903510, 2164183756, 5113457583, 2261109173, 2275834231, 2243662237, 5094930687, 5113432424, 5098166924, 4619333478, 4997070887, 5034154184, 2281560215, 5096891323, 5046868507, 4985889119, 5056524567, 2131337464, 5149322579, 2243161140, 2174268801, 5117622577, 5096413053, 4951938565, 5001073672, 5111264347, 4920306153, 2186186819, 2305272809, 2314082251, 2246745963, 2165982585, 2196609271, 5117620771, 5018036721, 2207999756, 4956759098, 4924472053, 2187506928, 2312861157, 5016856032, 5016062760, 5097052002, 4953337390, 4850384272, 5087918026, 2183432395, 4924662737, 5122552887, 5000354599, 5090641357, 5053036866, 5150260241, 5156293806, 9166701499, 9166702293, 9166702380, 9166701959, 10170573509, 5148549128, 4972189772, 9166702346, 5115224618, 5151869175, 4961300697, 4907504397, 5151867702, 5054099271, 5115225352, 5117733787, 4982588324, 5113932629, 5028111707, 4954415645, 5168247358, 2175811405, 5115721557, 4921756814, 2218059533, 5098369463, 2140645483, 9166702486, 10026817957, 9174045369, 5090631374, 8954014242, 9166702048, 5195588103, 2258310937, 9166701361, 5149311575, 2127935283, 5146997917, 2145436765, 5191893070, 4924374442, 5148739656, 5059485811, 2141132522, 5042827342, 5227234698, 5128730319, 4923774008, 5113613805, 5115668709, 5148977378, 2174892273, 2193369730, 2285982919, 2169514451, 4769948794, 5205480490, 5151841575, 5150156332, 5149047110, 2244450075, 5117212718, 5175749131, 2225895520, 4918655685, 2282522276, 2137380690, 2229108753, 5192202684, 9703033479, 5099381161, 5156055821, 2320154877, 4770708459, 5103716144, 5104706271, 9173405212, 9173407755, 5159965699, 4977637758, 2277344763, 9166701904, 5149892443, 5161204780, 9175509385, 5010991124, 8966158996, 2280181075, 8764107066, 8666975530, 5191869593, 5192287506, 5192050287, 5122244642, 5191145822, 2161462170, 5145277752, 2132778192, 5149825397, 5153349509, 5204551227, 5190942345, 2232498178, 5191122098, 5191560083, 9166692133, 5191211749, 5153146583, 2198539253, 5261973429, 5191145311, 5163880700, 11297416012, 4808648447, 5163880731, 2233894352, 5190771693, 5207104056, 5152070193, 5099381057, 9405541718, 5171208320, 5111946478, 8849524903]
     dir_name = str(os.getcwd())
     test = os.listdir(dir_name)
     modelname = ""
@@ -242,6 +248,20 @@ def greet(operation,filer):
             modelname = item
     if maxnum == 0:
         return None,"No model found, please use retrain operation to build one"
     #modelname = "VotingClassifier=0.95756598831352.sav"
     loaded_model = pickle.load(open(modelname, 'rb'))
     droppers = 0
@@ -261,7 +281,7 @@ def greet(operation,filer):
         elif row['TERM'][-1] == 5:
             semester = 3
         c_id = catalog.index(str(row['CATALOG_NBR']))
-        in_id = instructor.index(row['INSTRUCTOR_ID'])
         p_id = acad_prog.index(row['PROGRAM'])
         major = 0
         x = row['PROGRAM.1']

 from sklearn.model_selection import GridSearchCV, StratifiedKFold
 import docx
 from docx.enum.dml import MSO_THEME_COLOR_INDEX
 def add_hyperlink(paragraph, text, url):
     # This gets access to the document.xml.rels file and gets a new relation id value
     part = paragraph.part
 from sklearn.metrics import accuracy_score, confusion_matrix, roc_auc_score, roc_curve, cohen_kappa_score, f1_score, recall_score, precision_score
 def measures(predicted, y_test):
     accuracy = accuracy_score(y_test, predicted)
     precision = precision_score(y_test, predicted)
     recall = recall_score(y_test, predicted)
     f1 = f1_score(y_test, predicted)
     matrix = confusion_matrix(y_test, predicted)
     return accuracy
 def greet(operation,filer):
     coset = pd.read_csv(filer.name)
     coset = coset.dropna(how='any')
     document = Document('temp.docx')
+    allowedcols = ['SID', 'TERM', 'CATALOG_NBR', 'INSTRUCTOR_ID', 'GRADE', 'CGPA', 'PROGRAM', 'PROGRAM.1']
     if operation == "retrain":
+      allowedcols = allowedcols[1:]
+      for col in coset.columns:
+        if col not in allowedcols:
+          return None,str(col)+" is undefined column name, allowed columns for training are "+str(allowedcols)
+      wanted = coset#.drop(columns=['SUBJECT','SID','CRSE_ID','COURSE','ROLE','GPA','INPUT','STATUS','GRADUATION TERM','CLASS #','COLLEGE','COLLEGE.1'])
       def termize(x):
           if str(x)[-1] == "1":
             return 0
       document.add_paragraph('This report consists of the models retraining information on the new dataset with ('+str(len(coset))+') records')
       records = []
       X = wanted.drop(columns=['GRADE'])
       y = wanted['GRADE']
       smote = BorderlineSMOTE(random_state = 11)
       for model in models1:
           model.fit(X_smote,y_smote)
           preds = cross_val_predict(model, X_smote.values,y_smote.values, cv=kf, n_jobs=-1,);
           metrics[model] = measures(preds,y_smote.values)
           records.append(((str(type(model).__name__),str(metrics[model]))))
       document.add_paragraph(' ')
       model = max(metrics, key=metrics.get)
       number = number + 1
       filename = str(number)+"="+type(model).__name__+'='+str(acc)+'.sav'
+      datavalues = {"majors":str(majors),
+      'acad_prog':str(acad_prog),
+      'catalog':str(catalog),
+      'instructor':str(instructor)
+      }
+      dfv = pd.DataFrame(datavalues,index=[0])
+      dfv.to_csv(str(number)+"="+"values.csv")
       document.add_paragraph(" ")
       document.add_paragraph(type(model).__name__+' has been chosen as the prediction model for achieving an accuracy of '+str(acc)+'%')
       pickle.dump(model, open(filename, 'wb'))
       savedoc(document,'retraining_report.docx')
       #document.save('retraining_report.docx')
       return 'retraining_report.docx',str(type(model).__name__+' has been chosen as the prediction model for achieving an accuracy of '+str(acc)+'%')
+    for col in coset.columns:
+      if col not in allowedcols:
+        return None,str(col)+" is undefined column name, allowed columns for prediction are "+str(allowedcols)
+    majors = []
+    catalog = []
+    acad_prog = []
+    instructor = []
     dir_name = str(os.getcwd())
     test = os.listdir(dir_name)
     modelname = ""
             modelname = item
     if maxnum == 0:
         return None,"No model found, please use retrain operation to build one"
+    dfv = pd.read_csv(str(maxnum)+"=values.csv")
+    cols = [majors,acad_prog,catalog,instructor]
+    indexc = 0
+    for column in dfv.columns:
+        if "[" in str(dfv[column][0]):
+          l = dfv[column][0].replace("'",'')
+          cols[indexc][:] = str(l).strip('][').split(', ')
+          for i,e in enumerate(cols[indexc]):
+            cols[indexc][i] = e.replace(' ','')
+          print(cols[indexc])
+          indexc = indexc + 1
     #modelname = "VotingClassifier=0.95756598831352.sav"
     loaded_model = pickle.load(open(modelname, 'rb'))
     droppers = 0
         elif row['TERM'][-1] == 5:
             semester = 3
         c_id = catalog.index(str(row['CATALOG_NBR']))
+        in_id = instructor.index(str(row['INSTRUCTOR_ID']))
         p_id = acad_prog.index(row['PROGRAM'])
         major = 0
         x = row['PROGRAM.1']