Dineflow commited on
Commit
b61e8ad
·
verified ·
1 Parent(s): f01947b

Upload 5 files

Browse files
handler.py CHANGED
@@ -1,56 +1,55 @@
1
- import nltk
2
- # Ensure NLTK data is downloaded
3
- nltk.download('punkt')
4
-
5
- import joblib
6
- import os
7
- import json
8
-
9
-
10
-
11
- class EndpointHandler:
12
- def __init__(self, model_dir):
13
- self.model_dir = model_dir
14
- self.vectorizer = joblib.load(os.path.join(model_dir, 'vectorizer.joblib'))
15
- self.model = joblib.load(os.path.join(model_dir, 'logistic_classifier.joblib'))
16
-
17
- # Define the custom tokenizer function
18
- self.tokenizer = nltk.word_tokenize
19
-
20
- # Verify that the tokenizer configuration is correct
21
- with open(os.path.join(model_dir, "tokenizer.json"), "r") as file:
22
- tokenizer_config = json.load(file)
23
- if tokenizer_config['tokenizer'] != 'nltk.word_tokenize':
24
- raise ValueError("Tokenizer configuration does not match the expected tokenizer.")
25
-
26
- def predict_rating(self, review):
27
- review_tfidf = self.vectorizer.transform([review])
28
- predicted_rating = self.model.predict(review_tfidf)[0]
29
- return int(predicted_rating)
30
-
31
- def __call__(self, inputs):
32
- try:
33
- inputs_dict = json.loads(inputs)
34
-
35
- if 'review' not in inputs_dict:
36
- return json.dumps({"error": "No valid review provided"})
37
-
38
- review = inputs_dict['review']
39
-
40
- if not review:
41
- return json.dumps({"error": "No valid review provided"})
42
-
43
- predicted_rating = self.predict_rating(review)
44
-
45
- response = {
46
- "review": review,
47
- "predicted_rating": predicted_rating
48
- }
49
-
50
- return json.dumps(response)
51
-
52
- except json.JSONDecodeError:
53
- return json.dumps({"error": "Invalid JSON format"})
54
-
55
- except Exception as e:
56
- return json.dumps({"error": str(e)})
 
1
+ import joblib
2
+ import os
3
+ import json
4
+
5
+ class EndpointHandler:
6
+ def __init__(self, model_dir):
7
+ self.model_dir = model_dir
8
+ self.vectorizer = joblib.load(os.path.join(model_dir, 'vectorizer.joblib'))
9
+ self.model = joblib.load(os.path.join(model_dir, 'logistic_classifier.joblib'))
10
+
11
+ # Define the custom tokenizer function
12
+ def custom_tokenizer(text):
13
+ return text.split()
14
+
15
+ self.tokenizer = custom_tokenizer
16
+
17
+ # Verify that the tokenizer configuration is correct
18
+ with open(os.path.join(model_dir, "tokenizer.json"), "r") as file:
19
+ tokenizer_config = json.load(file)
20
+ if tokenizer_config['tokenizer'] != 'split':
21
+ raise ValueError("Tokenizer configuration does not match the expected tokenizer.")
22
+
23
+ def predict_rating(self, review):
24
+ review_tfidf = self.vectorizer.transform([review])
25
+ predicted_rating = self.model.predict(review_tfidf)[0]
26
+ return int(predicted_rating)
27
+
28
+ def __call__(self, inputs):
29
+ try:
30
+ inputs_dict = json.loads(inputs)
31
+
32
+ if 'review' not in inputs_dict:
33
+ return json.dumps({"error": "No valid review provided"})
34
+
35
+ review = inputs_dict['review']
36
+
37
+ if not review:
38
+ return json.dumps({"error": "No valid review provided"})
39
+
40
+ predicted_rating = self.predict_rating(review)
41
+
42
+ response = {
43
+ "review": review,
44
+ "predicted_rating": predicted_rating
45
+ }
46
+
47
+ return json.dumps(response)
48
+
49
+ except json.JSONDecodeError:
50
+ return json.dumps({"error": "Invalid JSON format"})
51
+
52
+ except Exception as e:
53
+ return json.dumps({"error": str(e)})
54
+
55
+
 
logistic_classifier.joblib CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:66ca534a1485c744bb11cfb827f4af94e1e8d46182267d5e2fe0d33b67d8df3e
3
  size 200927
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1320450110eb5efffddbb80edd4c1ffbad72753b2529db2f3ab79aac637883dc
3
  size 200927
requirement.txt CHANGED
@@ -1,5 +1,4 @@
1
- joblib
2
- scikit-learn
3
- pandas
4
- nlp
5
- nltk
 
1
+ joblib
2
+ scikit-learn
3
+ pandas
4
+ nlp
 
tokenizer.json CHANGED
@@ -1 +1 @@
1
- {"tokenizer": "nltk.word_tokenize"}
 
1
+ {"tokenizer": "split"}
vectorizer.joblib CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:32f170a717c19ff2a6fc4480f609d6fd3252669cc88a1b4e38842d6dbdb7bd4a
3
- size 161885
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3533c6722416bede62ff5d61e33be74a6f6691fc077c228a5edbd78ed018d1db
3
+ size 162076