Spaces:

hazardous
/

sentiment-analysis

No application file

hazardous commited on Sep 10, 2022

Commit

94e649c

1 Parent(s): b446438

add files

Files changed (6) hide show

classifier.pkl ADDED Viewed

Binary file (11.6 kB). View file

classifier.py ADDED Viewed

+import pickle
+from process_text import Preprocessor
+class Classifier:
+    def __init__(self) -> None:
+        # load model and vectorizer
+        self.model = pickle.load(open("classifier.pkl", "rb"))
+        self.vectorizer = pickle.load(open("vectorizer.pkl", "rb"))
+    def classify(self, text):
+        # Preprocess given string
+        sentences = [text]
+        preprocessor = Preprocessor()
+        processed_text = preprocessor.preprocess(sentences)
+        # Vectorize the text and infer the output
+        vectorized_text = self.vectorizer.transform(processed_text)
+        prediction = self.model.predict(vectorized_text)
+        return prediction[0]

main.py ADDED Viewed

+from fastapi import FastAPI
+from fastapi.openapi.utils import get_openapi
+from pydantic import BaseModel
+from classifier import Classifier
+class SentimentText(BaseModel):
+    text: str
+app = FastAPI()
+@app.get("/")
+def root(request: SentimentText):
+    classfier = Classifier()
+    prediction = classfier.classify(request.text)
+    return {"sentiment": prediction}
+def endpoint_openapi():
+    if app.openapi_schema:
+        return app.openapi_schema
+    openapi_schema = get_openapi(
+        title="Sentiment Analysis",
+        version="2.5.0",
+        description="This is the OpenAPI schema for the app",
+        routes=app.routes,
+    )
+    openapi_schema["info"]["x-logo"] = {
+        "url": "https://fastapi.tiangolo.com/img/logo-margin/logo-teal.png"
+    }
+    app.openapi_schema = openapi_schema
+    return app.openapi_schema
+app.openapi = endpoint_openapi

process_text.py ADDED Viewed

+import re
+import nltk
+from nltk import WordNetLemmatizer
+from nltk.corpus import stopwords
+class Preprocessor:
+    def __init__(self) -> None:
+        nltk.download('stopwords')
+        nltk.download('wordnet')
+        nltk.download('omw-1.4')
+    def tokenize_and_remove_stopwords(self, text):
+        tweet_list = [ele for ele in text.split()]
+        clean_tokens = [t for t in tweet_list if re.match(r'[^\W\d]*$', t)]
+        clean_s = ' '.join(clean_tokens)
+        clean_mess = [word for word in clean_s.split() if word.lower() not in stopwords.words('english')]
+        return clean_mess
+    def normalization(self, text):
+        lem = WordNetLemmatizer()
+        normalized_text = ""
+        for word in text:
+            normalized_word = lem.lemmatize(word,'v')
+            normalized_text += normalized_word + " "
+        return normalized_text.strip()
+    def preprocess(self, textlist):
+        preprocessed_text = []
+        for text in textlist:
+            text = self.tokenize_and_remove_stopwords(text)
+            text = self.normalization(text)
+            preprocessed_text.append(text)
+        return preprocessed_text

requirements.txt ADDED Viewed

+anyio==3.6.1
+click==8.1.3
+fastapi==0.82.0
+h11==0.13.0
+httptools==0.4.0
+idna==3.3
+joblib==1.1.0
+nltk==3.7
+numpy==1.23.2
+pydantic==1.10.2
+python-dotenv==0.21.0
+PyYAML==6.0
+regex==2022.8.17
+scikit-learn==1.1.2
+scipy==1.9.1
+sniffio==1.3.0
+starlette==0.19.1
+threadpoolctl==3.1.0
+tqdm==4.64.1
+typing_extensions==4.3.0
+uvicorn==0.18.3
+uvloop==0.16.0
+watchfiles==0.16.1
+websockets==10.3

vectorizer.pkl ADDED Viewed

Binary file (116 kB). View file