Spaces:
Build error
Build error
| import seaborn as sns | |
| import matplotlib.pyplot as plt | |
| import pandas as pd | |
| import copy | |
| from codeScripts.Dependencies.SentenceTransformer2 import * | |
| from codeScripts.utils import create_file_path, clean_words, save_json | |
| #Done | |
| class SemanticOutput(): | |
| """ | |
| Class to store the semantic processing and extract results | |
| """ | |
| def __init__(self, settings): | |
| self.nota_spacy = dict() | |
| self.nota_spacy["spacy"] = dict() | |
| self.nota_spacy["bert"] = dict() | |
| self.nota_spacy_experimento = dict() | |
| self.nota_spacy_experimento["spacy"] = dict() | |
| self.nota_spacy_experimento["bert"] = dict() | |
| self.identifyLineofResponse = dict() | |
| self.identifyLineofResponse["spacy"] = dict() | |
| self.identifyLineofResponse["bert"] = dict() | |
| self.identifyLineofResponse_toexcel = [] | |
| self.notas_calculadas = dict() | |
| self.notas_calculadas["spacy"] = dict() | |
| self.notas_calculadas["bert"] = dict() | |
| self.min_umbral = [] | |
| self.max_umbral = [] | |
| r= settings.UmbralesSimilitud.split(",") | |
| for i in r: | |
| c_w= clean_words(i) | |
| self.min_umbral.append(float(c_w[0]+'.'+c_w[1])) | |
| self.max_umbral.append(float(c_w[2]+'.'+c_w[3])) | |
| self.notas_calculadas["spacy"]['Umbral ' + c_w[0]+'.'+c_w[1] + ' - ' + c_w[2]+'.'+c_w[3]] = [] | |
| self.notas_calculadas["bert"]['Umbral ' + c_w[0]+'.'+c_w[1] + ' - ' + c_w[2]+'.'+c_w[3]] = [] | |
| #variables taken from the settings | |
| self.answersDF_json2 = dict() | |
| self.answersDF_json2["spacy"] = dict() | |
| self.answersDF_json2["bert"] = dict() | |
| self.indiceMinipreguntas = settings.indice_minipreguntas | |
| print("AAAAAAAAAAAA") | |
| print(self.indiceMinipreguntas) | |
| self.LofRespThreshold = settings.LofRespThreshold | |
| self.indx = 1 | |
| def __createDict__(self, nota_spacy:dict(), studentID, minipregunta, similarity_type, type = 0): | |
| if studentID not in nota_spacy[similarity_type].keys(): | |
| nota_spacy[similarity_type][studentID] = dict() | |
| if type == 0: | |
| nota_spacy[similarity_type][studentID][minipregunta]= [] | |
| else: | |
| nota_spacy[similarity_type][studentID][minipregunta]= dict() | |
| return nota_spacy | |
| def __plotHistogram__(self, save_file, x): | |
| """ | |
| Generates an histogram of the given data. | |
| Inputs: | |
| save_file: The path where the histogram is to be generated. | |
| x: The data to be represented. | |
| """ | |
| ax= sns.histplot( | |
| data = x, | |
| stat = "count", | |
| kde = True, | |
| color = "black" | |
| ) | |
| ax.set(xlabel='Deviation', ylabel='Count') | |
| figure = ax.get_figure() | |
| figure.savefig(create_file_path(save_file,3)) | |
| del figure | |
| ax.cla() | |
| def initInforms(self, studentID, minipregunta, similarity_type): | |
| """ | |
| This function is for initializing the variables where data is to be stored. | |
| Inputs: | |
| studentID: The id of the student | |
| minipregunta: The minipregunta that is being studied | |
| """ | |
| #identificar donde está la respuesta por minipreguta | |
| self.identifyLineofResponse = self.__createDict__(self.identifyLineofResponse, studentID, minipregunta, similarity_type, 1) | |
| #almacenar notas del evaluation process | |
| #self.nota_spacy_experimento = self.__createDict__(self.nota_spacy_experimento, studentID, similarity_type, 1) | |
| self.nota_spacy_experimento[similarity_type][studentID] = dict() | |
| #Almacenar similitudes por minipregunta | |
| self.nota_spacy = self.__createDict__(self.nota_spacy, studentID, minipregunta, similarity_type) | |
| #separar y almacenar una a una las lineas de la respuesta | |
| self.answersDF_json2[similarity_type][studentID] = dict() | |
| self.answersDF_json2[similarity_type][studentID]["respuesta"] = dict() | |
| def updateInformsBucle(self, studentID, minipregunta, response, response_label, numberOfSentences, similarity, similarity_type, isMaxSimil): | |
| """ | |
| This function is the previous needed step before using updateInforms. Stores the important iterative-generated information | |
| Inputs: | |
| -studentID: The id of the student | |
| -minipregunta: The minipregunta that is being studied | |
| -response: The student's response | |
| -response_label: The generated label that indicates the sentence number of the extracted response in the text. | |
| -numberOfSentences: The number of splitted sentences. | |
| -similarity: The obtained similarity score. | |
| -isMaxSimil: If the similarity score is the highest obtained at the moment or not. | |
| """ | |
| #Storing the similarity score obtained for only one sentence | |
| if numberOfSentences == 1: | |
| self.identifyLineofResponse[similarity_type][studentID][minipregunta][str(self.indx)] = dict() | |
| self.identifyLineofResponse[similarity_type][studentID][minipregunta][str(self.indx)]["Similitud"] = similarity | |
| self.identifyLineofResponse[similarity_type][studentID][minipregunta][str(self.indx)]["Frase"] = response | |
| self.identifyLineofResponse[similarity_type][studentID][minipregunta][str(self.indx)]["Lineas"] = response_label | |
| self.answersDF_json2[similarity_type][studentID]["respuesta"][self.indx] = response | |
| self.indx+=1 | |
| else: | |
| self.indx = 1 | |
| #storing the maximum similarity for each set of sentences length | |
| if isMaxSimil: | |
| self.nota_spacy_experimento[similarity_type][studentID][str(numberOfSentences)] = dict() | |
| self.nota_spacy_experimento[similarity_type][studentID][str(numberOfSentences)]["MaxSimilitud"] = similarity | |
| self.nota_spacy_experimento[similarity_type][studentID][str(numberOfSentences)]["Frase"] = response | |
| self.nota_spacy_experimento[similarity_type][studentID][str(numberOfSentences)]["Lineas"] = response_label | |
| #storing the similarity in every case | |
| self.nota_spacy[similarity_type][studentID][minipregunta].append([response, None, None] if response == "" else [response, similarity, response_label]) | |
| def updateInforms(self, studentID, umbralL, umbralH, calculatedMark, similarity_type, response = ""): | |
| """ | |
| This function is to store the obtained results from the processing of one response. | |
| Inputs: | |
| -studentID: The id of the student | |
| -umbralL: The fixed low threshold (config json) | |
| -umbralH: The fixed high threshold (config json) | |
| -calculatedMark: The calculated mark. | |
| -response: The student's response | |
| """ | |
| print("ZZZZZ") | |
| print(similarity_type) | |
| #storing calculated marks | |
| self.notas_calculadas[similarity_type]['Umbral ' + str(umbralL) + ' - ' + str(umbralH)].append(0 if response == "" else calculatedMark/len(self.indiceMinipreguntas)) | |
| #storing where the model thought the answer was | |
| for minipregunta in self.indiceMinipreguntas: | |
| print("EEEEE") | |
| print(self.identifyLineofResponse) | |
| aux = copy.deepcopy(self.identifyLineofResponse) | |
| for indx in aux[similarity_type][studentID][minipregunta].keys(): | |
| if abs(self.identifyLineofResponse[similarity_type][studentID][minipregunta][indx]["Similitud"] - self.nota_spacy_experimento[similarity_type][studentID]["1"]["MaxSimilitud"]) > 0.075: | |
| del self.identifyLineofResponse[similarity_type][studentID][minipregunta][indx] | |
| #Getting the number of the guess | |
| if response == "": | |
| self.identifyLineofResponse_toexcel.append([minipregunta, ""]) | |
| else: | |
| max_n = -999999 | |
| indx_queue = 0 | |
| queue = [] | |
| highlightedrows = "" | |
| highlightedmarks = "" | |
| for iter in self.identifyLineofResponse[similarity_type][studentID][minipregunta].keys(): | |
| for indx in self.identifyLineofResponse[similarity_type][studentID][minipregunta].keys(): | |
| if self.identifyLineofResponse[similarity_type][studentID][minipregunta][indx]["Similitud"] > max_n and not indx in queue and self.identifyLineofResponse[similarity_type][studentID][minipregunta][indx]["Similitud"]>self.LofRespThreshold: | |
| max_n = self.identifyLineofResponse[similarity_type][studentID][minipregunta][indx]["Similitud"] | |
| indx_queue = indx | |
| queue.append(indx_queue) | |
| highlightedrows = highlightedrows + str(indx_queue) + " " | |
| highlightedmarks = highlightedmarks + str(max_n) + " " | |
| max_n = -999999 | |
| indx_queue = 0 | |
| self.identifyLineofResponse_toexcel.append([minipregunta, highlightedrows, highlightedmarks]) | |
| highlightedrows = "" | |
| highlightedmarks = "" | |
| queue = [] | |
| def saveSimilarityResults(self, settings, similarity_type): | |
| """ | |
| Saves the recopiled data in the corresponding format and path differentiating the types of semantic calculation. | |
| Inputs: | |
| -settings: system settings. | |
| -similarity_type: "spacy" if similarity is being calculated from Spacy (if it is not, bert is selected) | |
| """ | |
| savePrefix = "Spacy - " if similarity_type == "spacy" else str(settings.modelr) + str(settings.epochr) + " - " | |
| #previous name - "AnalisisSemantico.json" | |
| save_json(create_file_path(savePrefix + "SimilitudPorConjunto.json",2), self.nota_spacy[similarity_type]) | |
| save_json(create_file_path(savePrefix + "MaxSimilitudPorConjunto.json",2), self.nota_spacy_experimento[similarity_type]) | |
| save_json(create_file_path(savePrefix + "LineaRespuesta.json",2), self.identifyLineofResponse[similarity_type]) | |
| save_json(create_file_path(savePrefix + "RespuestaSeparadaPorFrases.json",2), self.answersDF_json2[similarity_type]) | |
| Notasdf = pd.DataFrame() | |
| for intervaloUmbral in self.notas_calculadas[similarity_type]: | |
| Notasdf[intervaloUmbral] = self.notas_calculadas[similarity_type][intervaloUmbral] | |
| Notasdf.to_excel(create_file_path(savePrefix +'NotasCalculadas.xlsx',2), sheet_name='notas') | |
| #self.__plotHistogram__(savePrefix + "HistogramaNotasGeneradas.png", self.notas_calculadas[similarity_type]) | |
| class SintacticOutput(): | |
| """ | |
| Class to store the sintactic processing | |
| """ | |
| def __init__(self): | |
| self.leg_FH =[] | |
| self.leg_mu = [] | |
| def saveLegibilityResults(self): | |
| """ | |
| Saves the recopiled data in the corresponding format. | |
| """ | |
| save_json(create_file_path("FH-Readability.json",2), self.leg_FH, False) | |
| save_json(create_file_path("mu-Readability.json",2), self.leg_mu, False) | |
| x = [] | |
| for i in range(len(self.leg_FH)): | |
| x.append(i) | |
| plt.figure(figsize=(15,7)) | |
| plt.plot(x, self.leg_FH, label = "FH", color = (0.1,0.1,0.1)) | |
| plt.plot(x, self.leg_mu, '--', label = "mu", color = (0.5,0.5,0.5)) | |
| plt.xlabel("Student") | |
| plt.ylabel("Legibility (0-100)") | |
| plt.legend(loc=1) | |
| plt.title("FH vs mu") | |
| plt.xticks(rotation=-45) | |
| plt.grid() | |
| plt.savefig(create_file_path("Img_FHvsMu.png",3)) | |
| plt.cla() | |
| class OrtographicOutput(): | |
| """ | |
| Class to store the ortographic processing | |
| """ | |
| def __init__(self): | |
| self.notaOrtografia = [] | |
| self.mistakes = [] | |
| self.number_mistakes = [] | |
| def saveOrtographicResults(self): | |
| """ | |
| Saves the ortographic generated marks. | |
| """ | |
| save_json(create_file_path("NotasOrtografia.json",2), self.notaOrtografia, False) | |