Spaces:
Runtime error
Runtime error
| from flask import Flask, request, render_template,url_for, current_app, abort | |
| from tqdm import tqdm | |
| import numpy as np | |
| # import nbformat | |
| # from nbconvert import PythonExporter | |
| # import os | |
| import torch | |
| from transformers import AutoModel,AutoTokenizer | |
| import pickle | |
| from xgboost import XGBClassifier | |
| app = Flask(__name__) | |
| # Load the model during the application startup | |
| # @before_first_request | |
| def load_model(): | |
| try: | |
| with open('static/ipynbFiles/classifier2.pkl', 'rb') as file: | |
| current_app.clf = pickle.load(file) | |
| except Exception as e: | |
| print(f"Error loading model: {str(e)}") | |
| abort(500) # Internal Server Error | |
| app.before_first_request(load_model) | |
| def model_extract(input_string): | |
| param ={'maxLen' :256,} | |
| model = AutoModel.from_pretrained("ai4bharat/indic-bert") | |
| tokenizer = AutoTokenizer.from_pretrained("ai4bharat/indic-bert") | |
| def pad_sequences(sequences, maxlen=None, dtype='int32', padding='pre', truncating='pre', value=0.0): | |
| padded_sequences = [] | |
| for seq in sequences: | |
| if padding == 'pre': | |
| padded_seq = np.pad(seq, (maxlen - len(seq), 0), 'constant', constant_values=value) | |
| elif padding == 'post': | |
| padded_seq = np.pad(seq, (0, maxlen - len(seq)), 'constant', constant_values=value) | |
| else: | |
| raise ValueError("Padding should be 'pre' or 'post'.") | |
| if truncating == 'pre': | |
| padded_seq = padded_seq[-maxlen:] | |
| elif truncating == 'post': | |
| padded_seq = padded_seq[:maxlen] | |
| else: | |
| raise ValueError("Truncating should be 'pre' or 'post'.") | |
| padded_sequences.append(padded_seq) | |
| return np.array(padded_sequences, dtype=dtype) | |
| def create_attention_masks(input_ids): | |
| attention_masks = [] | |
| for seq in tqdm(input_ids): | |
| seq_mask = [float(i>0) for i in seq] | |
| attention_masks.append(seq_mask) | |
| return np.array(attention_masks) | |
| def getFeaturesandLabel(single_string, label): | |
| # Wrap the single string in a list | |
| sentences = ["[CLS] " + single_string + " [SEP]"] | |
| # Tokenize and preprocess | |
| tokenizer_texts = list(map(lambda t: tokenizer.tokenize(t)[:512], tqdm(sentences))) | |
| input_ids = [tokenizer.convert_tokens_to_ids(x) for x in tqdm(tokenizer_texts)] | |
| # Pad sequences and create attention masks | |
| input_ids = pad_sequences(sequences=input_ids, maxlen=param['maxLen'], dtype='long', padding='post', truncating='post') | |
| attention_masks_data = create_attention_masks(input_ids) | |
| # Convert to torch tensors | |
| X_data = torch.tensor(input_ids) | |
| attention_masks_data = torch.tensor(attention_masks_data) | |
| y_data = torch.tensor(label) | |
| return X_data, attention_masks_data, y_data | |
| text_input=input_string | |
| label_input = [0] | |
| X_data, attention_masks_data, y_data = getFeaturesandLabel(text_input, label_input) | |
| return X_data | |
| # def model_heart(): | |
| # # Path to the notebook file | |
| # notebook_path = os.path.join('static', 'ipynbFiles', 'trail.ipynb') | |
| # # Read the notebook content | |
| # with open(notebook_path, 'r', encoding='utf-8') as notebook_file: | |
| # notebook_content = nbformat.read(notebook_file, as_version=4) | |
| # # Create a PythonExporter | |
| # python_exporter = PythonExporter() | |
| # # Convert the notebook to a Python script | |
| # python_script, _ = python_exporter.from_notebook_node(notebook_content) | |
| # print(python_script) | |
| # # Execute the Python script | |
| # exec(python_script) | |
| # model_heart() | |
| # Now you can use the variables and functions defined in the notebook in your app.py | |
| from tempCodeRunnerFile import match | |
| def index(): | |
| return render_template('index.html') | |
| def predict(): | |
| input_string=request.form['text'] | |
| print('text: ',input_string) | |
| with open('static/ipynbFiles/classifier_10epochs_updated.pkl','rb') as file: | |
| clf=pickle.load(file) | |
| if any(c in input_string for c in match): | |
| prediction = [0] | |
| else: | |
| ans=model_extract(input_string) | |
| print('torch.tensor variable: ',ans) | |
| prediction = clf.predict(ans) | |
| print('prediction=',prediction) | |
| if prediction==[0]: | |
| return render_template('index.html', pred='Cyberbullying Text', question='వాక్యం - '+input_string) | |
| else: | |
| return render_template('index.html', pred='Non-Cyberbullying Text', question='వాక్యం - '+input_string) | |
| if __name__ == "__main__": | |
| app.run(debug=True,port=8001) | |
| #for creating a pickle file: | |
| # with open('classifier.pkl','wb') as file: | |
| # pickle.dump(xgb, file) |