Spaces:
No application file
No application file
add files
Browse files- classifier.pkl +0 -0
- classifier.py +20 -0
- main.py +35 -0
- process_text.py +33 -0
- requirements.txt +24 -0
- vectorizer.pkl +0 -0
classifier.pkl
ADDED
|
Binary file (11.6 kB). View file
|
|
|
classifier.py
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pickle
|
| 2 |
+
from process_text import Preprocessor
|
| 3 |
+
|
| 4 |
+
class Classifier:
|
| 5 |
+
def __init__(self) -> None:
|
| 6 |
+
# load model and vectorizer
|
| 7 |
+
self.model = pickle.load(open("classifier.pkl", "rb"))
|
| 8 |
+
self.vectorizer = pickle.load(open("vectorizer.pkl", "rb"))
|
| 9 |
+
|
| 10 |
+
def classify(self, text):
|
| 11 |
+
# Preprocess given string
|
| 12 |
+
sentences = [text]
|
| 13 |
+
preprocessor = Preprocessor()
|
| 14 |
+
processed_text = preprocessor.preprocess(sentences)
|
| 15 |
+
|
| 16 |
+
# Vectorize the text and infer the output
|
| 17 |
+
vectorized_text = self.vectorizer.transform(processed_text)
|
| 18 |
+
prediction = self.model.predict(vectorized_text)
|
| 19 |
+
|
| 20 |
+
return prediction[0]
|
main.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import FastAPI
|
| 2 |
+
from fastapi.openapi.utils import get_openapi
|
| 3 |
+
from pydantic import BaseModel
|
| 4 |
+
from classifier import Classifier
|
| 5 |
+
|
| 6 |
+
class SentimentText(BaseModel):
|
| 7 |
+
text: str
|
| 8 |
+
|
| 9 |
+
app = FastAPI()
|
| 10 |
+
|
| 11 |
+
@app.get("/")
|
| 12 |
+
def root(request: SentimentText):
|
| 13 |
+
|
| 14 |
+
classfier = Classifier()
|
| 15 |
+
prediction = classfier.classify(request.text)
|
| 16 |
+
|
| 17 |
+
return {"sentiment": prediction}
|
| 18 |
+
|
| 19 |
+
def endpoint_openapi():
|
| 20 |
+
if app.openapi_schema:
|
| 21 |
+
return app.openapi_schema
|
| 22 |
+
openapi_schema = get_openapi(
|
| 23 |
+
title="Sentiment Analysis",
|
| 24 |
+
version="2.5.0",
|
| 25 |
+
description="This is the OpenAPI schema for the app",
|
| 26 |
+
routes=app.routes,
|
| 27 |
+
)
|
| 28 |
+
openapi_schema["info"]["x-logo"] = {
|
| 29 |
+
"url": "https://fastapi.tiangolo.com/img/logo-margin/logo-teal.png"
|
| 30 |
+
}
|
| 31 |
+
app.openapi_schema = openapi_schema
|
| 32 |
+
return app.openapi_schema
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
app.openapi = endpoint_openapi
|
process_text.py
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import re
|
| 2 |
+
import nltk
|
| 3 |
+
from nltk import WordNetLemmatizer
|
| 4 |
+
from nltk.corpus import stopwords
|
| 5 |
+
|
| 6 |
+
class Preprocessor:
|
| 7 |
+
def __init__(self) -> None:
|
| 8 |
+
nltk.download('stopwords')
|
| 9 |
+
nltk.download('wordnet')
|
| 10 |
+
nltk.download('omw-1.4')
|
| 11 |
+
|
| 12 |
+
def tokenize_and_remove_stopwords(self, text):
|
| 13 |
+
tweet_list = [ele for ele in text.split()]
|
| 14 |
+
clean_tokens = [t for t in tweet_list if re.match(r'[^\W\d]*$', t)]
|
| 15 |
+
clean_s = ' '.join(clean_tokens)
|
| 16 |
+
clean_mess = [word for word in clean_s.split() if word.lower() not in stopwords.words('english')]
|
| 17 |
+
return clean_mess
|
| 18 |
+
|
| 19 |
+
def normalization(self, text):
|
| 20 |
+
lem = WordNetLemmatizer()
|
| 21 |
+
normalized_text = ""
|
| 22 |
+
for word in text:
|
| 23 |
+
normalized_word = lem.lemmatize(word,'v')
|
| 24 |
+
normalized_text += normalized_word + " "
|
| 25 |
+
return normalized_text.strip()
|
| 26 |
+
|
| 27 |
+
def preprocess(self, textlist):
|
| 28 |
+
preprocessed_text = []
|
| 29 |
+
for text in textlist:
|
| 30 |
+
text = self.tokenize_and_remove_stopwords(text)
|
| 31 |
+
text = self.normalization(text)
|
| 32 |
+
preprocessed_text.append(text)
|
| 33 |
+
return preprocessed_text
|
requirements.txt
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
anyio==3.6.1
|
| 2 |
+
click==8.1.3
|
| 3 |
+
fastapi==0.82.0
|
| 4 |
+
h11==0.13.0
|
| 5 |
+
httptools==0.4.0
|
| 6 |
+
idna==3.3
|
| 7 |
+
joblib==1.1.0
|
| 8 |
+
nltk==3.7
|
| 9 |
+
numpy==1.23.2
|
| 10 |
+
pydantic==1.10.2
|
| 11 |
+
python-dotenv==0.21.0
|
| 12 |
+
PyYAML==6.0
|
| 13 |
+
regex==2022.8.17
|
| 14 |
+
scikit-learn==1.1.2
|
| 15 |
+
scipy==1.9.1
|
| 16 |
+
sniffio==1.3.0
|
| 17 |
+
starlette==0.19.1
|
| 18 |
+
threadpoolctl==3.1.0
|
| 19 |
+
tqdm==4.64.1
|
| 20 |
+
typing_extensions==4.3.0
|
| 21 |
+
uvicorn==0.18.3
|
| 22 |
+
uvloop==0.16.0
|
| 23 |
+
watchfiles==0.16.1
|
| 24 |
+
websockets==10.3
|
vectorizer.pkl
ADDED
|
Binary file (116 kB). View file
|
|
|