yezdata commited on
Commit
2592d48
·
0 Parent(s):

initial commit

Browse files
Files changed (5) hide show
  1. .gitignore +10 -0
  2. .python-version +1 -0
  3. main.py +221 -0
  4. pyproject.toml +14 -0
  5. uv.lock +0 -0
.gitignore ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python-generated files
2
+ __pycache__/
3
+ *.py[oc]
4
+ build/
5
+ dist/
6
+ wheels/
7
+ *.egg-info
8
+
9
+ # Virtual environments
10
+ .venv
.python-version ADDED
@@ -0,0 +1 @@
 
 
1
+ 3.13
main.py ADDED
@@ -0,0 +1,221 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import gradio as gr
3
+ from fastapi import FastAPI
4
+ import uvicorn
5
+ from pydantic import BaseModel
6
+ from transformers import AutoTokenizer, AutoModel
7
+
8
+ app = FastAPI(title="EmCoder API & UI")
9
+
10
+
11
+ repo_id = "yezdata/EmCoder"
12
+
13
+ tokenizer = AutoTokenizer.from_pretrained(repo_id, trust_remote_code=True)
14
+ model = AutoModel.from_pretrained(repo_id, trust_remote_code=True)
15
+ max_labels = getattr(model.config, "num_labels", 28)
16
+
17
+ model.eval()
18
+
19
+
20
+ def compute_binary_entropy(p: torch.Tensor, eps: float = 1e-9) -> torch.Tensor:
21
+ p = torch.clamp(p, min=eps, max=1.0 - eps)
22
+ return -(p * torch.log(p) + (1.0 - p) * torch.log(1.0 - p))
23
+
24
+
25
+ def compute_uncertainty(probs_samples: torch.Tensor, mean_probs: torch.Tensor) -> dict:
26
+ total_unc = compute_binary_entropy(mean_probs) # (num_labels,)
27
+
28
+ # Aleatoric (Expected Entropy)
29
+ sample_entropies = compute_binary_entropy(probs_samples) # (n_samples, num_labels)
30
+ aleatoric_unc = sample_entropies.mean(dim=0) # (num_labels,)
31
+
32
+ # Epistemic (Mutual Information)
33
+ epistemic_unc = total_unc - aleatoric_unc
34
+ epistemic_unc = torch.clamp(epistemic_unc, min=0.0)
35
+
36
+ return {"total": total_unc, "aleatoric": aleatoric_unc, "epistemic": epistemic_unc}
37
+
38
+
39
+
40
+
41
+ class PredictRequest(BaseModel):
42
+ text: str
43
+ monte_carlo: bool = False
44
+ n_samples: int = 10
45
+
46
+ @app.post("/predict")
47
+ def predict_api(request: PredictRequest):
48
+ encoded = tokenizer(request.text, return_tensors="pt")
49
+
50
+ input_ids = encoded["input_ids"]
51
+ attention_mask = encoded["attention_mask"]
52
+
53
+ id2label = model.config.id2label
54
+
55
+ if request.monte_carlo:
56
+ with torch.no_grad():
57
+ outputs = model.mc_forward(
58
+ input_ids=input_ids,
59
+ attention_mask=attention_mask,
60
+ n_samples=request.n_samples
61
+ )
62
+
63
+ mc_logits = outputs.logits
64
+ logits_samples = mc_logits.squeeze(1)
65
+
66
+ probs_samples = torch.sigmoid(logits_samples) # (n_samples, num_labels)
67
+
68
+ mean_probs = probs_samples.mean(dim=0) # (num_labels,)
69
+
70
+ unc_dict = compute_uncertainty(probs_samples=probs_samples, mean_probs=mean_probs)
71
+
72
+ predictions = {}
73
+ for i in range(model.config.num_labels):
74
+ label_name = id2label[i]
75
+ predictions[label_name] = {
76
+ "mean_probability": float(mean_probs[i]),
77
+ "uncertainty": {
78
+ "total_entropy": float(unc_dict["total"][i]),
79
+ "epistemic": float(unc_dict["epistemic"][i]),
80
+ "aleatoric": float(unc_dict["aleatoric"][i]),
81
+ }
82
+ }
83
+
84
+ return {
85
+ "mode": "monte_carlo",
86
+ "n_samples": request.n_samples,
87
+ "predictions": predictions
88
+ }
89
+
90
+
91
+ else:
92
+ with torch.no_grad():
93
+ outputs = model(input_ids=input_ids, attention_mask=attention_mask)
94
+
95
+ logits = outputs.logits.squeeze(0)
96
+ probs = torch.sigmoid(logits)
97
+
98
+ predictions = {}
99
+ for i in range(model.config.num_labels):
100
+ label_name = id2label[i]
101
+ predictions[label_name] = {
102
+ "probability": float(probs[i])
103
+ }
104
+
105
+ return {
106
+ "mode": "standard",
107
+ "predictions": predictions
108
+ }
109
+
110
+
111
+
112
+ @app.get("/health")
113
+ def health_check():
114
+ return {"status": "healthy"}
115
+
116
+ def gradio_predict(text, top_n, monte_carlo, n_samples):
117
+ request_data = PredictRequest(text=text, monte_carlo=bool(monte_carlo), n_samples=int(n_samples))
118
+ response = predict_api(request_data)
119
+
120
+ sorted_preds = sorted(
121
+ response["predictions"].items(),
122
+ key=lambda item: item[1]["mean_probability"] if monte_carlo else item[1]["probability"],
123
+ reverse=True
124
+ )
125
+
126
+ top_preds = sorted_preds[:int(top_n)]
127
+
128
+ standard_rows = []
129
+ mc_rows = []
130
+
131
+ for label_name, metrics in top_preds:
132
+ if monte_carlo:
133
+ prob = metrics["mean_probability"]
134
+ mc_rows.append([
135
+ label_name,
136
+ f"{prob * 100:.2f}%",
137
+ f"{metrics['uncertainty']['total_entropy']:.4f}",
138
+ f"{metrics['uncertainty']['epistemic']:.4f}",
139
+ f"{metrics['uncertainty']['aleatoric']:.4f}"
140
+ ])
141
+ else:
142
+ prob = metrics["probability"]
143
+ standard_rows.append([
144
+ label_name,
145
+ f"{prob * 100:.2f}%"
146
+ ])
147
+
148
+ if monte_carlo:
149
+ return (
150
+ gr.update(value=[], visible=False),
151
+ gr.update(value=mc_rows, visible=True)
152
+ )
153
+ else:
154
+ return (
155
+ gr.update(value=standard_rows, visible=True),
156
+ gr.update(value=[], visible=False)
157
+ )
158
+
159
+
160
+ with gr.Blocks(title="EmCoder - Probabilistic Emotion Recognition") as ui:
161
+ gr.Markdown("# EmCoder - Probabilistic Emotion Recognition")
162
+ gr.Markdown("Live API service and graphical interface demonstrating EmCoder's epistemic and aleatoric uncertainty decomposition via Monte Carlo Dropout.")
163
+
164
+ with gr.Row():
165
+ with gr.Column(scale=1):
166
+ input_text = gr.Textbox(
167
+ label="Input text",
168
+ placeholder="Input text for classification...",
169
+ lines=3
170
+ )
171
+ top_n_slider = gr.Slider(
172
+ minimum=1,
173
+ maximum=max_labels,
174
+ value=min(5, max_labels),
175
+ step=1,
176
+ label="Top Emotions to display"
177
+ )
178
+ use_mc = gr.Checkbox(label="Use Monte Carlo Dropout (Uncertainty Estimation)", value=False)
179
+ mc_samples_slider = gr.Slider(
180
+ minimum=5,
181
+ maximum=50,
182
+ value=10,
183
+ step=1,
184
+ label="MC samples"
185
+ )
186
+ submit_btn = gr.Button("Analyze Emotion", variant="primary")
187
+
188
+ with gr.Column(scale=2):
189
+ output_table_standard = gr.DataFrame(
190
+ headers=["Emotion", "Probability"],
191
+ datatype=["str", "str"],
192
+ label="Prediction Report",
193
+ visible=True
194
+ )
195
+
196
+ output_table_mc = gr.DataFrame(
197
+ headers=[
198
+ "Emotion",
199
+ "Probability (Mean)",
200
+ "Total Uncertainty (Entropy)",
201
+ "Epistemic (Model Knowledge)",
202
+ "Aleatoric (Data Noise)"
203
+ ],
204
+ datatype=["str", "str", "str", "str", "str"],
205
+ label="Prediction & Bayesian Uncertainty Report",
206
+ visible=False
207
+ )
208
+
209
+ submit_btn.click(
210
+ fn=gradio_predict,
211
+ inputs=[input_text, top_n_slider, use_mc, mc_samples_slider],
212
+ outputs=[output_table_standard, output_table_mc]
213
+ )
214
+
215
+ app = gr.mount_gradio_app(app, ui, path="/")
216
+
217
+
218
+
219
+ if __name__ == "__main__":
220
+ uvicorn.run("main:app", host="0.0.0.0", port=8000, reload=True)
221
+
pyproject.toml ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "api"
3
+ version = "0.1.0"
4
+ description = "Add your description here"
5
+ readme = "README.md"
6
+ requires-python = ">=3.13"
7
+ dependencies = [
8
+ "fastapi>=0.136.1",
9
+ "gradio>=6.14.0",
10
+ "pydantic>=2.13.4",
11
+ "torch>=2.12.0",
12
+ "transformers>=5.9.0",
13
+ "uvicorn>=0.47.0",
14
+ ]
uv.lock ADDED
The diff for this file is too large to render. See raw diff