from flask import Flask, request, jsonify, render_template import nltk from nltk.stem import PorterStemmer from nltk.corpus import stopwords import string import pickle import os # Initialize the app app = Flask(__name__) # Ensure NLTK resources are downloaded nltk.data.path.append(os.getenv("NLTK_DATA", "/app/nltk_data")) # Load pre-trained model and vectorizer with open('model.pkl', 'rb') as file: model = pickle.load(file) with open('vectorizer.pkl', 'rb') as file: vectorizer = pickle.load(file) # Initialize stop words and stemmer stop_words = set(stopwords.words('english')) stemmer = PorterStemmer() # Preprocessing function def preprocess_text(input_text): lowered = input_text.lower() translator = str.maketrans('', '', string.punctuation) cleaned_text = lowered.translate(translator) tokenized_text = nltk.word_tokenize(cleaned_text) stop_words_removed = [word for word in tokenized_text if word not in stop_words] stemmed = [stemmer.stem(word) for word in stop_words_removed] return ' '.join(stemmed) # Route for the HTML form @app.route('/') def home(): return render_template('index.html') # Prediction API @app.route('/predict', methods=['POST']) def predict(): data = request.json input_text = data.get("text", "") preprocessed = preprocess_text(input_text) prediction = model.predict(vectorizer.transform([preprocessed])) return jsonify({"prediction": prediction[0]}) # Run the app if __name__ == '__main__': app.run(host='0.0.0.0', port=7860)