File size: 1,537 Bytes
912ab32
 
 
 
 
 
4b0f4ba
 
912ab32
 
 
 
 
4b0f4ba
912ab32
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
271340a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
from flask import Flask, request, jsonify, render_template
import nltk
from nltk.stem import PorterStemmer
from nltk.corpus import stopwords
import string
import pickle
import os


# Initialize the app
app = Flask(__name__)

# Ensure NLTK resources are downloaded
nltk.data.path.append(os.getenv("NLTK_DATA", "/app/nltk_data"))

# Load pre-trained model and vectorizer
with open('model.pkl', 'rb') as file:
    model = pickle.load(file)

with open('vectorizer.pkl', 'rb') as file:
    vectorizer = pickle.load(file)

# Initialize stop words and stemmer
stop_words = set(stopwords.words('english'))
stemmer = PorterStemmer()

# Preprocessing function
def preprocess_text(input_text):
    lowered = input_text.lower()
    translator = str.maketrans('', '', string.punctuation)
    cleaned_text = lowered.translate(translator)
    tokenized_text = nltk.word_tokenize(cleaned_text)
    stop_words_removed = [word for word in tokenized_text if word not in stop_words]
    stemmed = [stemmer.stem(word) for word in stop_words_removed]
    return ' '.join(stemmed)

# Route for the HTML form
@app.route('/')
def home():
    return render_template('index.html')

# Prediction API
@app.route('/predict', methods=['POST'])
def predict():
    data = request.json
    input_text = data.get("text", "")
    preprocessed = preprocess_text(input_text)
    prediction = model.predict(vectorizer.transform([preprocessed]))
    return jsonify({"prediction": prediction[0]})

# Run the app
if __name__ == '__main__':
    app.run(host='0.0.0.0', port=7860)