hazardous commited on
Commit
94e649c
·
1 Parent(s): b446438
Files changed (6) hide show
  1. classifier.pkl +0 -0
  2. classifier.py +20 -0
  3. main.py +35 -0
  4. process_text.py +33 -0
  5. requirements.txt +24 -0
  6. vectorizer.pkl +0 -0
classifier.pkl ADDED
Binary file (11.6 kB). View file
 
classifier.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pickle
2
+ from process_text import Preprocessor
3
+
4
+ class Classifier:
5
+ def __init__(self) -> None:
6
+ # load model and vectorizer
7
+ self.model = pickle.load(open("classifier.pkl", "rb"))
8
+ self.vectorizer = pickle.load(open("vectorizer.pkl", "rb"))
9
+
10
+ def classify(self, text):
11
+ # Preprocess given string
12
+ sentences = [text]
13
+ preprocessor = Preprocessor()
14
+ processed_text = preprocessor.preprocess(sentences)
15
+
16
+ # Vectorize the text and infer the output
17
+ vectorized_text = self.vectorizer.transform(processed_text)
18
+ prediction = self.model.predict(vectorized_text)
19
+
20
+ return prediction[0]
main.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from fastapi.openapi.utils import get_openapi
3
+ from pydantic import BaseModel
4
+ from classifier import Classifier
5
+
6
+ class SentimentText(BaseModel):
7
+ text: str
8
+
9
+ app = FastAPI()
10
+
11
+ @app.get("/")
12
+ def root(request: SentimentText):
13
+
14
+ classfier = Classifier()
15
+ prediction = classfier.classify(request.text)
16
+
17
+ return {"sentiment": prediction}
18
+
19
+ def endpoint_openapi():
20
+ if app.openapi_schema:
21
+ return app.openapi_schema
22
+ openapi_schema = get_openapi(
23
+ title="Sentiment Analysis",
24
+ version="2.5.0",
25
+ description="This is the OpenAPI schema for the app",
26
+ routes=app.routes,
27
+ )
28
+ openapi_schema["info"]["x-logo"] = {
29
+ "url": "https://fastapi.tiangolo.com/img/logo-margin/logo-teal.png"
30
+ }
31
+ app.openapi_schema = openapi_schema
32
+ return app.openapi_schema
33
+
34
+
35
+ app.openapi = endpoint_openapi
process_text.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import nltk
3
+ from nltk import WordNetLemmatizer
4
+ from nltk.corpus import stopwords
5
+
6
+ class Preprocessor:
7
+ def __init__(self) -> None:
8
+ nltk.download('stopwords')
9
+ nltk.download('wordnet')
10
+ nltk.download('omw-1.4')
11
+
12
+ def tokenize_and_remove_stopwords(self, text):
13
+ tweet_list = [ele for ele in text.split()]
14
+ clean_tokens = [t for t in tweet_list if re.match(r'[^\W\d]*$', t)]
15
+ clean_s = ' '.join(clean_tokens)
16
+ clean_mess = [word for word in clean_s.split() if word.lower() not in stopwords.words('english')]
17
+ return clean_mess
18
+
19
+ def normalization(self, text):
20
+ lem = WordNetLemmatizer()
21
+ normalized_text = ""
22
+ for word in text:
23
+ normalized_word = lem.lemmatize(word,'v')
24
+ normalized_text += normalized_word + " "
25
+ return normalized_text.strip()
26
+
27
+ def preprocess(self, textlist):
28
+ preprocessed_text = []
29
+ for text in textlist:
30
+ text = self.tokenize_and_remove_stopwords(text)
31
+ text = self.normalization(text)
32
+ preprocessed_text.append(text)
33
+ return preprocessed_text
requirements.txt ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ anyio==3.6.1
2
+ click==8.1.3
3
+ fastapi==0.82.0
4
+ h11==0.13.0
5
+ httptools==0.4.0
6
+ idna==3.3
7
+ joblib==1.1.0
8
+ nltk==3.7
9
+ numpy==1.23.2
10
+ pydantic==1.10.2
11
+ python-dotenv==0.21.0
12
+ PyYAML==6.0
13
+ regex==2022.8.17
14
+ scikit-learn==1.1.2
15
+ scipy==1.9.1
16
+ sniffio==1.3.0
17
+ starlette==0.19.1
18
+ threadpoolctl==3.1.0
19
+ tqdm==4.64.1
20
+ typing_extensions==4.3.0
21
+ uvicorn==0.18.3
22
+ uvloop==0.16.0
23
+ watchfiles==0.16.1
24
+ websockets==10.3
vectorizer.pkl ADDED
Binary file (116 kB). View file