sharshar1 commited on
Commit
eb2e1fc
·
verified ·
1 Parent(s): 068a8f1

Upload 6 files

Browse files
Files changed (7) hide show
  1. .gitattributes +1 -0
  2. Dockerfile +18 -0
  3. README_HuggingFace.md +73 -0
  4. app.py +212 -0
  5. glove.6B.50d.txt +3 -0
  6. model.weights.h5 +3 -0
  7. requirements.txt +6 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ glove.6B.50d.txt filter=lfs diff=lfs merge=lfs -text
Dockerfile ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ WORKDIR /app
4
+
5
+ # Install dependencies
6
+ COPY requirements.txt .
7
+ RUN pip install --no-cache-dir -r requirements.txt
8
+
9
+ # Copy application files
10
+ COPY app.py .
11
+ COPY glove.6B.50d.txt .
12
+ COPY model.weights.h5 .
13
+
14
+ # Expose port
15
+ EXPOSE 7860
16
+
17
+ # Run the application
18
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
README_HuggingFace.md ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Emoji Predictor API
3
+ emoji: 😄
4
+ colorFrom: blue
5
+ colorTo: purple
6
+ sdk: docker
7
+ pinned: false
8
+ license: mit
9
+ ---
10
+
11
+ # 🎯 Emoji Predictor API
12
+
13
+ Predict the appropriate emoji for any English sentence using LSTM deep learning model.
14
+
15
+ ## 🚀 API Endpoints
16
+
17
+ ### Health Check
18
+ ```
19
+ GET /
20
+ GET /health
21
+ ```
22
+
23
+ ### Predict Single Text
24
+ ```
25
+ POST /predict
26
+ Content-Type: application/json
27
+
28
+ {
29
+ "text": "I love you"
30
+ }
31
+ ```
32
+
33
+ **Response:**
34
+ ```json
35
+ {
36
+ "text": "I love you",
37
+ "emoji": "❤️",
38
+ "emoji_meaning": "love",
39
+ "confidence": 0.95,
40
+ "all_predictions": {
41
+ "❤️": {"probability": 0.95, "meaning": "love"},
42
+ "⚾": {"probability": 0.01, "meaning": "sports"},
43
+ "😄": {"probability": 0.02, "meaning": "happy"},
44
+ "😞": {"probability": 0.01, "meaning": "sad"},
45
+ "🍴": {"probability": 0.01, "meaning": "food"}
46
+ }
47
+ }
48
+ ```
49
+
50
+ ### Predict Batch
51
+ ```
52
+ POST /predict/batch
53
+ Content-Type: application/json
54
+
55
+ ["I love you", "I am hungry", "Let's play baseball"]
56
+ ```
57
+
58
+ ## 📊 Supported Emojis
59
+
60
+ | Emoji | Meaning |
61
+ |-------|---------|
62
+ | ❤️ | Love |
63
+ | ⚾ | Sports |
64
+ | 😄 | Happy |
65
+ | 😞 | Sad |
66
+ | 🍴 | Food |
67
+
68
+ ## 🔧 Technology Stack
69
+
70
+ - FastAPI
71
+ - TensorFlow/Keras
72
+ - LSTM Neural Network
73
+ - GloVe Word Embeddings
app.py ADDED
@@ -0,0 +1,212 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import os
3
+ from fastapi import FastAPI, HTTPException
4
+ from fastapi.middleware.cors import CORSMiddleware
5
+ from pydantic import BaseModel
6
+ from keras.models import Model
7
+ from keras.layers import Dense, Input, Dropout, LSTM, Activation, Embedding
8
+
9
+ # Initialize FastAPI
10
+ app = FastAPI(
11
+ title="Emoji Predictor API",
12
+ description="Predict emoji from text using LSTM model",
13
+ version="1.0.0"
14
+ )
15
+
16
+ # Enable CORS for Flutter app
17
+ app.add_middleware(
18
+ CORSMiddleware,
19
+ allow_origins=["*"],
20
+ allow_credentials=True,
21
+ allow_methods=["*"],
22
+ allow_headers=["*"],
23
+ )
24
+
25
+ # Global variables
26
+ model = None
27
+ word_to_index = None
28
+ word_to_vec_map = None
29
+ maxLen = 10
30
+
31
+ emoji_dictionary = {
32
+ 0: "❤️",
33
+ 1: "⚾",
34
+ 2: "😄",
35
+ 3: "😞",
36
+ 4: "🍴"
37
+ }
38
+
39
+ emoji_meanings = {
40
+ 0: "love",
41
+ 1: "sports",
42
+ 2: "happy",
43
+ 3: "sad",
44
+ 4: "food"
45
+ }
46
+
47
+ # Request/Response models
48
+ class PredictRequest(BaseModel):
49
+ text: str
50
+
51
+ class PredictResponse(BaseModel):
52
+ text: str
53
+ emoji: str
54
+ emoji_meaning: str
55
+ confidence: float
56
+ all_predictions: dict
57
+
58
+ class HealthResponse(BaseModel):
59
+ status: str
60
+ model_loaded: bool
61
+
62
+ # Helper functions
63
+ def read_glove_vecs(glove_file):
64
+ with open(glove_file, 'r', encoding="utf8") as f:
65
+ words = set()
66
+ word_to_vec_map = {}
67
+ for line in f:
68
+ line = line.strip().split()
69
+ curr_word = line[0]
70
+ words.add(curr_word)
71
+ word_to_vec_map[curr_word] = np.array(line[1:], dtype=np.float64)
72
+
73
+ i = 1
74
+ words_to_index = {}
75
+ for w in sorted(words):
76
+ words_to_index[w] = i
77
+ i = i + 1
78
+ return words_to_index, word_to_vec_map
79
+
80
+ def sentences_to_indices(X, word_to_index, max_len):
81
+ m = X.shape[0]
82
+ X_indices = np.zeros((m, max_len))
83
+ for i in range(m):
84
+ sentence_words = (X[i].lower()).split()
85
+ j = 0
86
+ for w in sentence_words:
87
+ if w in word_to_index:
88
+ X_indices[i, j] = word_to_index[w]
89
+ j = j + 1
90
+ if j >= max_len:
91
+ break
92
+ return X_indices
93
+
94
+ def pretrained_embedding_layer(word_to_vec_map, word_to_index):
95
+ vocab_len = len(word_to_index) + 1
96
+ emb_dim = 50
97
+ emb_matrix = np.zeros((vocab_len, emb_dim))
98
+ for word, index in word_to_index.items():
99
+ if word in word_to_vec_map:
100
+ emb_matrix[index, :] = word_to_vec_map[word]
101
+ embedding_layer = Embedding(vocab_len, emb_dim)
102
+ embedding_layer.build((None,))
103
+ embedding_layer.set_weights([emb_matrix])
104
+ return embedding_layer
105
+
106
+ def build_model(input_shape, word_to_vec_map, word_to_index):
107
+ sentence_indices = Input(shape=input_shape, dtype=np.int32)
108
+ embedding_layer = pretrained_embedding_layer(word_to_vec_map, word_to_index)
109
+ embeddings = embedding_layer(sentence_indices)
110
+ X = LSTM(128, return_sequences=True)(embeddings)
111
+ X = Dropout(0.5)(X)
112
+ X = LSTM(128)(X)
113
+ X = Dropout(0.5)(X)
114
+ X = Dense(5, activation='softmax')(X)
115
+ X = Activation('softmax')(X)
116
+ model = Model(sentence_indices, X)
117
+ return model
118
+
119
+
120
+ @app.on_event("startup")
121
+ async def load_model():
122
+ global model, word_to_index, word_to_vec_map
123
+
124
+ print("Loading GloVe vectors...")
125
+ word_to_index, word_to_vec_map = read_glove_vecs('glove.6B.50d.txt')
126
+
127
+ print("Building model...")
128
+ model = build_model((maxLen,), word_to_vec_map, word_to_index)
129
+ model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
130
+
131
+ # Load weights if exists
132
+ if os.path.exists('model.weights.h5'):
133
+ print("Loading trained weights...")
134
+ model.load_weights('model.weights.h5')
135
+ else:
136
+ print("Warning: No trained weights found. Model will use random weights.")
137
+
138
+ print("Model loaded successfully!")
139
+
140
+ @app.get("/", response_model=HealthResponse)
141
+ async def root():
142
+ return HealthResponse(
143
+ status="running",
144
+ model_loaded=model is not None
145
+ )
146
+
147
+ @app.get("/health", response_model=HealthResponse)
148
+ async def health_check():
149
+ return HealthResponse(
150
+ status="healthy",
151
+ model_loaded=model is not None
152
+ )
153
+
154
+ @app.post("/predict", response_model=PredictResponse)
155
+ async def predict_emoji(request: PredictRequest):
156
+ if model is None:
157
+ raise HTTPException(status_code=503, detail="Model not loaded")
158
+
159
+ text = request.text.strip()
160
+ if not text:
161
+ raise HTTPException(status_code=400, detail="Text cannot be empty")
162
+
163
+ # Prepare input
164
+ x_test = np.array([text])
165
+ X_test_indices = sentences_to_indices(x_test, word_to_index, maxLen)
166
+
167
+ # Predict
168
+ predictions = model.predict(X_test_indices, verbose=0)
169
+ predicted_class = int(np.argmax(predictions[0]))
170
+ confidence = float(predictions[0][predicted_class])
171
+
172
+ # All predictions with probabilities
173
+ all_preds = {
174
+ emoji_dictionary[i]: {
175
+ "probability": float(predictions[0][i]),
176
+ "meaning": emoji_meanings[i]
177
+ }
178
+ for i in range(5)
179
+ }
180
+
181
+ return PredictResponse(
182
+ text=text,
183
+ emoji=emoji_dictionary[predicted_class],
184
+ emoji_meaning=emoji_meanings[predicted_class],
185
+ confidence=confidence,
186
+ all_predictions=all_preds
187
+ )
188
+
189
+ @app.post("/predict/batch")
190
+ async def predict_batch(texts: list[str]):
191
+ if model is None:
192
+ raise HTTPException(status_code=503, detail="Model not loaded")
193
+
194
+ results = []
195
+ for text in texts:
196
+ x_test = np.array([text.strip()])
197
+ X_test_indices = sentences_to_indices(x_test, word_to_index, maxLen)
198
+ predictions = model.predict(X_test_indices, verbose=0)
199
+ predicted_class = int(np.argmax(predictions[0]))
200
+
201
+ results.append({
202
+ "text": text,
203
+ "emoji": emoji_dictionary[predicted_class],
204
+ "emoji_meaning": emoji_meanings[predicted_class],
205
+ "confidence": float(predictions[0][predicted_class])
206
+ })
207
+
208
+ return {"predictions": results}
209
+
210
+ if __name__ == "__main__":
211
+ import uvicorn
212
+ uvicorn.run(app, host="0.0.0.0", port=7860)
glove.6B.50d.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8f717f8dd4b545cb7f418ef9f3d0c3e6e68a6f48b97d32f8b7aae40cb31f96f
3
+ size 171350079
model.weights.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21c1f170e39b6a9ecd6378c3183b3c75a16f9e2d81ee002a0c8380841be3c76e
3
+ size 242727480
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ fastapi==0.104.1
2
+ uvicorn[standard]==0.24.0
3
+ tensorflow==2.15.0
4
+ numpy==1.26.2
5
+ pydantic==2.5.2
6
+ python-multipart==0.0.6