RustyLinux commited on
Commit
2a21df8
·
verified ·
1 Parent(s): 6a31658

Delete duplicate files

Browse files
MiaMotion/.gitattributes DELETED
@@ -1,35 +0,0 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
MiaMotion/README.md DELETED
@@ -1,3 +0,0 @@
1
- ---
2
- license: gpl-2.0
3
- ---
 
 
 
 
MiaMotion/best_model.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:80051b5bd4ac73173990e59b712e8a7300b3a23ae5d23f13ffbec2715651d2a0
3
- size 1315023871
 
 
 
 
MiaMotion/config.json DELETED
@@ -1,9 +0,0 @@
1
- {
2
- "base_model_id": "dccuchile/bert-base-spanish-wwm-cased",
3
- "pretrained_encoder": "beto",
4
- "max_length": 128,
5
- "hidden1": 128,
6
- "hidden2": 64,
7
- "num_classes": 6,
8
- "dropout": 0.3
9
- }
 
 
 
 
 
 
 
 
 
 
MiaMotion/emotion_classifier_model.py DELETED
@@ -1,207 +0,0 @@
1
- """
2
- === MIA · Clasificador de Emociones (Pretrained Encoder + MLP) ===
3
- - Mantiene compatibilidad con tu API pública.
4
- - Permite usar tu TextEmbedder aleatorio (emb_dim) o un encoder preentrenado (BETO) con 768D.
5
- - Expone freeze/unfreeze para controlar el fine-tuning desde el trainer.
6
- """
7
-
8
- import torch
9
- import torch.nn as nn
10
- from typing import List, Optional
11
- from transformers import AutoTokenizer, AutoModel
12
-
13
-
14
- # ==================== MÓDULO 1A: TextEmbedder (embedding aleatorio) ====================
15
- class TextEmbedder(nn.Module):
16
- """
17
- Módulo de Embedding simple:
18
- - Usa el tokenizador de BETO para sub-palabras (por conveniencia, vocab, pad_id, etc.)
19
- - La representación es un embedding aleatorio + mean pooling (no contextual).
20
- """
21
- def __init__(
22
- self,
23
- model_name: str = "dccuchile/bert-base-spanish-wwm-cased",
24
- emb_dim: int = 300,
25
- max_length: int = 128,
26
- device: Optional[torch.device] = None
27
- ):
28
- super().__init__()
29
- self.tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
30
- self.vocab_size = self.tokenizer.vocab_size
31
- self.pad_id = self.tokenizer.pad_token_id
32
- self.cls_id = self.tokenizer.cls_token_id
33
- self.sep_id = self.tokenizer.sep_token_id
34
- self.max_length = max_length
35
-
36
- # Capa de embedding
37
- self.embedding = nn.Embedding(self.vocab_size, emb_dim, padding_idx=self.pad_id)
38
- nn.init.xavier_uniform_(self.embedding.weight)
39
- with torch.no_grad():
40
- if self.pad_id is not None:
41
- self.embedding.weight[self.pad_id].zero_()
42
-
43
- # Regularización opcional (ayuda contra sobreajuste)
44
- self.emb_dropout = nn.Dropout(p=0.1)
45
-
46
- self.device = device or torch.device("cuda" if torch.cuda.is_available() else "cpu")
47
- self.to(self.device)
48
-
49
- def embed_batch(self, texts: List[str]) -> torch.Tensor:
50
- batch = self.tokenizer(
51
- texts, padding=True, truncation=True, max_length=self.max_length, return_tensors="pt"
52
- )
53
- input_ids = batch["input_ids"].to(self.device) # [B, T]
54
- attention_mask = batch["attention_mask"].to(self.device) # [B, T]
55
-
56
- embeds = self.embedding(input_ids) # [B, T, E]
57
- if self.training:
58
- embeds = self.emb_dropout(embeds)
59
-
60
- mask = attention_mask.bool() # [B, T]
61
- if self.cls_id is not None:
62
- mask = mask & (input_ids != self.cls_id)
63
- if self.sep_id is not None:
64
- mask = mask & (input_ids != self.sep_id)
65
-
66
- mask_f = mask.unsqueeze(-1).float() # [B, T, 1]
67
- summed = (embeds * mask_f).sum(dim=1) # [B, E]
68
- counts = mask_f.sum(dim=1).clamp(min=1.0) # [B, 1]
69
- sentence_vecs = summed / counts # [B, E]
70
- return sentence_vecs
71
-
72
- def embed_sentence(self, text: str) -> torch.Tensor:
73
- return self.embed_batch([text])[0]
74
-
75
-
76
- # ==================== MÓDULO 1B: BETOEmbedder (encoder preentrenado) ====================
77
- class BETOEmbedder(nn.Module):
78
- """
79
- Usa el encoder de BETO (BERT en español) para obtener embeddings contextuales.
80
- Mean pooling sobre last_hidden_state.
81
- Salida: [B, 768]
82
- """
83
- def __init__(
84
- self,
85
- model_name: str = "dccuchile/bert-base-spanish-wwm-cased",
86
- max_length: int = 128,
87
- device: Optional[torch.device] = None
88
- ):
89
- super().__init__()
90
- self.device = device or torch.device("cuda" if torch.cuda.is_available() else "cpu")
91
- self.tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
92
- self.encoder = AutoModel.from_pretrained(model_name)
93
- self.max_length = max_length
94
- self.encoder.to(self.device)
95
-
96
- def embed_batch(self, texts: List[str]) -> torch.Tensor:
97
- inputs = self.tokenizer(
98
- texts, padding=True, truncation=True, max_length=self.max_length, return_tensors="pt"
99
- ).to(self.device)
100
- outputs = self.encoder(**inputs) # last_hidden_state [B, T, 768]
101
- last_hidden = outputs.last_hidden_state
102
- mask = inputs["attention_mask"].unsqueeze(-1).float() # [B, T, 1]
103
- pooled = (last_hidden * mask).sum(dim=1) / mask.sum(dim=1).clamp(min=1e-9) # [B, 768]
104
- return pooled
105
-
106
-
107
- # ==================== MÓDULO 2: MLP Classifier ====================
108
- class MLPClassifier(nn.Module):
109
- """
110
- Feedforward para clasificación de emociones:
111
- Input → 128 → 64 → 6 (logits)
112
- """
113
- def __init__(
114
- self,
115
- input_dim: int = 300,
116
- hidden1: int = 128,
117
- hidden2: int = 64,
118
- num_classes: int = 6,
119
- dropout: float = 0.3
120
- ):
121
- super().__init__()
122
- self.fc1 = nn.Linear(input_dim, hidden1)
123
- self.relu1 = nn.ReLU()
124
- self.dropout1 = nn.Dropout(dropout)
125
-
126
- self.fc2 = nn.Linear(hidden1, hidden2)
127
- self.relu2 = nn.ReLU()
128
- self.dropout2 = nn.Dropout(dropout)
129
-
130
- self.fc3 = nn.Linear(hidden2, num_classes)
131
-
132
- def forward(self, x: torch.Tensor) -> torch.Tensor:
133
- x = self.fc1(x); x = self.relu1(x); x = self.dropout1(x)
134
- x = self.fc2(x); x = self.relu2(x); x = self.dropout2(x)
135
- x = self.fc3(x)
136
- return x
137
-
138
-
139
- # ==================== MÓDULO 3: Modelo Completo ====================
140
- class EmotionClassifier(nn.Module):
141
- """
142
- Integra embedder (aleatorio o BETO) + MLP.
143
- - `pretrained_encoder=None` → usa TextEmbedder (emb_dim configurable)
144
- - `pretrained_encoder="beto"` → usa BETOEmbedder (salida 768D)
145
- """
146
- def __init__(
147
- self,
148
- model_name: str = "dccuchile/bert-base-spanish-wwm-cased",
149
- emb_dim: int = 300,
150
- max_length: int = 128,
151
- hidden1: int = 128,
152
- hidden2: int = 64,
153
- num_classes: int = 6,
154
- dropout: float = 0.3,
155
- device: Optional[torch.device] = None,
156
- pretrained_encoder: Optional[str] = None
157
- ):
158
- super().__init__()
159
- self.device = device or torch.device("cuda" if torch.cuda.is_available() else "cpu")
160
-
161
- if pretrained_encoder == "beto":
162
- self.embedder = BETOEmbedder(model_name=model_name, max_length=max_length, device=self.device)
163
- embed_dim = 768
164
- else:
165
- self.embedder = TextEmbedder(model_name=model_name, emb_dim=emb_dim, max_length=max_length, device=self.device)
166
- embed_dim = emb_dim
167
-
168
- self.classifier = MLPClassifier(
169
- input_dim=embed_dim, hidden1=hidden1, hidden2=hidden2, num_classes=num_classes, dropout=dropout
170
- )
171
-
172
- self.label_map = {0: "tristeza", 1: "alegría", 2: "amor", 3: "ira", 4: "miedo", 5: "sorpresa"}
173
-
174
- self.to(self.device)
175
-
176
- # ---------- Forward & Utils ----------
177
- def forward(self, texts: List[str]) -> torch.Tensor:
178
- embeddings = self.embedder.embed_batch(texts) # [B, D]
179
- logits = self.classifier(embeddings) # [B, C]
180
- return logits
181
-
182
- def predict(self, texts: List[str], return_probs: bool = False):
183
- self.eval()
184
- with torch.no_grad():
185
- logits = self.forward(texts)
186
- probs = torch.softmax(logits, dim=-1)
187
- predictions = torch.argmax(probs, dim=-1)
188
- emotions = [self.label_map[p.item()] for p in predictions]
189
- if return_probs:
190
- return emotions, probs.cpu().numpy()
191
- return emotions
192
-
193
- def predict_single(self, text: str, return_probs: bool = False):
194
- out = self.predict([text], return_probs=return_probs)
195
- if return_probs:
196
- emotions, probs = out
197
- return emotions[0], probs[0]
198
- return out[0]
199
-
200
- # ---------- Fine-tuning helpers ----------
201
- def freeze_encoder(self):
202
- for p in self.embedder.parameters():
203
- p.requires_grad = False
204
-
205
- def unfreeze_encoder(self):
206
- for p in self.embedder.parameters():
207
- p.requires_grad = True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
MiaMotion/label_map.json DELETED
@@ -1,8 +0,0 @@
1
- {
2
- "0": "tristeza",
3
- "1": "alegría",
4
- "2": "amor",
5
- "3": "ira",
6
- "4": "miedo",
7
- "5": "sorpresa"
8
- }
 
 
 
 
 
 
 
 
 
MiaMotion/requirements.txt DELETED
@@ -1,86 +0,0 @@
1
- aiohappyeyeballs==2.6.1
2
- aiohttp==3.12.15
3
- aiosignal==1.4.0
4
- anyio==4.10.0
5
- attrs==25.3.0
6
- beautifulsoup4==4.13.5
7
- certifi==2025.8.3
8
- charset-normalizer==3.4.3
9
- contourpy==1.3.3
10
- cycler==0.12.1
11
- datasets==4.1.1
12
- deep-translator==1.11.4
13
- dill==0.4.0
14
- filelock==3.19.1
15
- fonttools==4.60.1
16
- frozenlist==1.7.0
17
- fsspec==2025.9.0
18
- googletrans==4.0.2
19
- h11==0.16.0
20
- h2==4.3.0
21
- hf-xet==1.1.10
22
- hpack==4.1.0
23
- httpcore==1.0.9
24
- httpx==0.28.1
25
- huggingface-hub==0.35.0
26
- hyperframe==6.1.0
27
- idna==3.10
28
- ijson==3.4.0
29
- Jinja2==3.1.6
30
- joblib==1.5.2
31
- kiwisolver==1.4.9
32
- MarkupSafe==3.0.3
33
- matplotlib==3.10.7
34
- mpmath==1.3.0
35
- multidict==6.6.4
36
- multiprocess==0.70.16
37
- networkx==3.5
38
- numpy==2.3.3
39
- nvidia-cublas-cu12==12.8.4.1
40
- nvidia-cuda-cupti-cu12==12.8.90
41
- nvidia-cuda-nvrtc-cu12==12.8.93
42
- nvidia-cuda-runtime-cu12==12.8.90
43
- nvidia-cudnn-cu12==9.10.2.21
44
- nvidia-cufft-cu12==11.3.3.83
45
- nvidia-cufile-cu12==1.13.1.3
46
- nvidia-curand-cu12==10.3.9.90
47
- nvidia-cusolver-cu12==11.7.3.90
48
- nvidia-cusparse-cu12==12.5.8.93
49
- nvidia-cusparselt-cu12==0.7.1
50
- nvidia-nccl-cu12==2.27.5
51
- nvidia-nvjitlink-cu12==12.8.93
52
- nvidia-nvshmem-cu12==3.3.20
53
- nvidia-nvtx-cu12==12.8.90
54
- packaging==25.0
55
- pandas==2.3.2
56
- pillow==12.0.0
57
- propcache==0.3.2
58
- protobuf==6.33.0
59
- pyarrow==21.0.0
60
- pyparsing==3.2.5
61
- python-dateutil==2.9.0.post0
62
- pytz==2025.2
63
- PyYAML==6.0.2
64
- regex==2025.10.23
65
- requests==2.32.5
66
- safetensors==0.6.2
67
- scikit-learn==1.7.2
68
- scipy==1.16.2
69
- seaborn==0.13.2
70
- setuptools==80.9.0
71
- six==1.17.0
72
- sniffio==1.3.1
73
- soupsieve==2.8
74
- sympy==1.14.0
75
- threadpoolctl==3.6.0
76
- tiktoken==0.12.0
77
- tokenizers==0.22.1
78
- torch==2.9.0
79
- tqdm==4.67.1
80
- transformers==4.57.1
81
- triton==3.5.0
82
- typing_extensions==4.15.0
83
- tzdata==2025.2
84
- urllib3==2.5.0
85
- xxhash==3.5.0
86
- yarl==1.20.1