DavidHosp commited on
Commit
f64d280
verified
1 Parent(s): 6c2f9ff

Upload 8 files

Browse files
Files changed (8) hide show
  1. Dockerfile +22 -0
  2. app.py +428 -0
  3. config.json +45 -0
  4. model.safetensors +3 -0
  5. requirements.txt +15 -0
  6. special_tokens_map.json +7 -0
  7. tokenizer_config.json +58 -0
  8. vocab.txt +0 -0
Dockerfile ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ WORKDIR /app
4
+
5
+ # Copiar archivos de configuraci贸n
6
+ COPY requirements.txt .
7
+
8
+ # Instalar dependencias
9
+ RUN pip install --no-cache-dir -r requirements.txt
10
+
11
+ # Copiar aplicaci贸n y modelo
12
+ COPY app.py .
13
+ COPY model/ ./model/
14
+
15
+ # Exponer puerto est谩ndar de HuggingFace Spaces
16
+ EXPOSE 7860
17
+
18
+ # Variable de entorno para PyTorch
19
+ ENV PYTORCH_ENABLE_MPS_FALLBACK=1
20
+
21
+ # Comando de inicio
22
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,428 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ 馃殌 PERI BERT Classifier - FastAPI Backend para HuggingFace Space
3
+
4
+ API REST para clasificaci贸n de reflexiones 茅ticas sobre IA usando BERT fine-tuneado.
5
+ Soporta predicci贸n con MC Dropout para uncertainty quantification.
6
+
7
+ Endpoints:
8
+ - POST /predict - Clasificar una reflexi贸n
9
+ - POST /predict-batch - Clasificar m煤ltiples reflexiones
10
+ - GET /health - Health check
11
+ - GET /info - Informaci贸n del modelo
12
+ """
13
+
14
+ from fastapi import FastAPI, HTTPException
15
+ from fastapi.middleware.cors import CORSMiddleware
16
+ from pydantic import BaseModel, Field
17
+ from typing import List, Optional, Dict, Any
18
+ import torch
19
+ from transformers import BertTokenizer, BertForSequenceClassification
20
+ import numpy as np
21
+ from pathlib import Path
22
+ import time
23
+ import logging
24
+
25
+ # Configurar logging
26
+ logging.basicConfig(level=logging.INFO)
27
+ logger = logging.getLogger(__name__)
28
+
29
+ # ============================================================================
30
+ # CONFIGURACI脫N
31
+ # ============================================================================
32
+
33
+ # Mapeo de arquetipos
34
+ ARCHETYPE_LABELS = {
35
+ 0: "TECNOCRATA_OPTIMIZADOR",
36
+ 1: "HUMANISTA_CRITICO",
37
+ 2: "PRAGMATICO_EQUILIBRADO",
38
+ 3: "VISIONARIO_ADAPTATIVO",
39
+ 4: "ESCEPTICO_CONSERVADOR",
40
+ }
41
+
42
+ ARCHETYPE_NAMES = {
43
+ 0: "Tecn贸crata Optimizador",
44
+ 1: "Humanista Cr铆tico",
45
+ 2: "Pragm谩tico Equilibrado",
46
+ 3: "Visionario Adaptativo",
47
+ 4: "Esc茅ptico Conservador",
48
+ }
49
+
50
+ ARCHETYPE_DESCRIPTIONS = {
51
+ 0: "Conf铆a en la eficiencia y objetividad de los sistemas automatizados",
52
+ 1: "Prioriza el bienestar humano y cuestiona activamente los sesgos tecnol贸gicos",
53
+ 2: "Busca balance entre innovaci贸n tecnol贸gica y consideraciones humanas",
54
+ 3: "Abraza la transformaci贸n tecnol贸gica con enfoque adaptativo y progresista",
55
+ 4: "Mantiene una postura cautelosa y cr铆tica hacia la adopci贸n de IA",
56
+ }
57
+
58
+ # Device configuration
59
+ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
60
+ MAX_LENGTH = 512
61
+ MC_SAMPLES = 10 # N煤mero de muestras para MC Dropout
62
+
63
+ # ============================================================================
64
+ # MODELOS PYDANTIC
65
+ # ============================================================================
66
+
67
+ class ReflectionInput(BaseModel):
68
+ """Input para clasificaci贸n individual"""
69
+ text: str = Field(..., min_length=100, max_length=5000, description="Reflexi贸n 茅tica sobre IA")
70
+ use_mc_dropout: bool = Field(default=True, description="Usar MC Dropout para uncertainty")
71
+
72
+
73
+ class BatchReflectionInput(BaseModel):
74
+ """Input para clasificaci贸n en batch"""
75
+ texts: List[str] = Field(..., max_items=50, description="Lista de reflexiones (m谩x 50)")
76
+ use_mc_dropout: bool = Field(default=True, description="Usar MC Dropout para uncertainty")
77
+
78
+
79
+ class ArchetypeResult(BaseModel):
80
+ """Resultado de clasificaci贸n"""
81
+ id: str
82
+ name: str
83
+ description: str
84
+
85
+
86
+ class PredictionResponse(BaseModel):
87
+ """Respuesta de predicci贸n individual"""
88
+ archetype: ArchetypeResult
89
+ confidence: float = Field(..., ge=0.0, le=1.0, description="Confianza de la predicci贸n")
90
+ uncertainty: Optional[float] = Field(None, ge=0.0, description="Incertidumbre (MC Dropout)")
91
+ top3_predictions: List[Dict[str, Any]] = Field(..., description="Top 3 predicciones")
92
+ inference_time_ms: float = Field(..., description="Tiempo de inferencia en milisegundos")
93
+ method: str = Field(default="bert", description="M茅todo de clasificaci贸n")
94
+
95
+
96
+ class BatchPredictionResponse(BaseModel):
97
+ """Respuesta de predicci贸n en batch"""
98
+ predictions: List[PredictionResponse]
99
+ total_inference_time_ms: float
100
+
101
+
102
+ class HealthResponse(BaseModel):
103
+ """Health check response"""
104
+ status: str
105
+ model_loaded: bool
106
+ device: str
107
+ timestamp: float
108
+
109
+
110
+ class InfoResponse(BaseModel):
111
+ """Informaci贸n del modelo"""
112
+ model_name: str
113
+ num_classes: int
114
+ max_length: int
115
+ device: str
116
+ mc_dropout_samples: int
117
+ archetypes: List[Dict[str, str]]
118
+
119
+
120
+ # ============================================================================
121
+ # CARGA DEL MODELO
122
+ # ============================================================================
123
+
124
+ class BERTClassifier:
125
+ """Wrapper para el modelo BERT con MC Dropout"""
126
+
127
+ def __init__(self, model_path: str):
128
+ logger.info(f"Cargando modelo desde {model_path}...")
129
+ self.tokenizer = BertTokenizer.from_pretrained(model_path)
130
+ self.model = BertForSequenceClassification.from_pretrained(model_path)
131
+ self.model.to(DEVICE)
132
+ self.model.eval()
133
+ logger.info(f"Modelo cargado exitosamente en {DEVICE}")
134
+
135
+ def predict(
136
+ self,
137
+ text: str,
138
+ use_mc_dropout: bool = True
139
+ ) -> Dict[str, Any]:
140
+ """
141
+ Realizar predicci贸n con o sin MC Dropout
142
+
143
+ Returns:
144
+ dict con keys: predicted_class, confidence, uncertainty, all_probabilities
145
+ """
146
+ start_time = time.time()
147
+
148
+ # Tokenizar
149
+ encoding = self.tokenizer(
150
+ text,
151
+ max_length=MAX_LENGTH,
152
+ padding="max_length",
153
+ truncation=True,
154
+ return_tensors="pt"
155
+ )
156
+
157
+ input_ids = encoding["input_ids"].to(DEVICE)
158
+ attention_mask = encoding["attention_mask"].to(DEVICE)
159
+
160
+ if use_mc_dropout:
161
+ # MC Dropout: m煤ltiples predicciones con dropout activado
162
+ self.model.train() # Activar dropout
163
+ all_probs = []
164
+
165
+ with torch.no_grad():
166
+ for _ in range(MC_SAMPLES):
167
+ outputs = self.model(
168
+ input_ids=input_ids,
169
+ attention_mask=attention_mask
170
+ )
171
+ logits = outputs.logits
172
+ probs = torch.softmax(logits, dim=1).cpu().numpy()[0]
173
+ all_probs.append(probs)
174
+
175
+ # Calcular estad铆sticas
176
+ all_probs = np.array(all_probs) # (MC_SAMPLES, num_classes)
177
+ mean_probs = np.mean(all_probs, axis=0)
178
+ predicted_class = int(np.argmax(mean_probs))
179
+ confidence = float(mean_probs[predicted_class])
180
+
181
+ # Calcular incertidumbre (entrop铆a)
182
+ epsilon = 1e-10
183
+ uncertainty = float(-np.sum(mean_probs * np.log(mean_probs + epsilon)))
184
+
185
+ self.model.eval() # Volver a modo evaluaci贸n
186
+
187
+ else:
188
+ # Predicci贸n est谩ndar sin MC Dropout
189
+ with torch.no_grad():
190
+ outputs = self.model(
191
+ input_ids=input_ids,
192
+ attention_mask=attention_mask
193
+ )
194
+
195
+ logits = outputs.logits
196
+ probs = torch.softmax(logits, dim=1).cpu().numpy()[0]
197
+ mean_probs = probs
198
+ predicted_class = int(np.argmax(probs))
199
+ confidence = float(probs[predicted_class])
200
+ uncertainty = None
201
+ all_probs = probs.reshape(1, -1)
202
+
203
+ # Top 3 predicciones
204
+ top3_indices = np.argsort(mean_probs)[-3:][::-1]
205
+ top3 = [
206
+ {
207
+ "archetype_id": ARCHETYPE_LABELS[int(idx)],
208
+ "archetype_name": ARCHETYPE_NAMES[int(idx)],
209
+ "probability": float(mean_probs[idx])
210
+ }
211
+ for idx in top3_indices
212
+ ]
213
+
214
+ inference_time = (time.time() - start_time) * 1000 # ms
215
+
216
+ return {
217
+ "predicted_class": predicted_class,
218
+ "confidence": confidence,
219
+ "uncertainty": uncertainty,
220
+ "top3": top3,
221
+ "inference_time_ms": inference_time,
222
+ "all_probabilities": mean_probs.tolist()
223
+ }
224
+
225
+
226
+ # Inicializar modelo global
227
+ classifier: Optional[BERTClassifier] = None
228
+
229
+
230
+ def load_model():
231
+ """Cargar modelo al iniciar la aplicaci贸n"""
232
+ global classifier
233
+
234
+ # En HuggingFace Space, el modelo estar谩 en ./model/
235
+ # Localmente, usar path relativo
236
+ model_paths = [
237
+ Path("./model"), # HF Space
238
+ Path("../../../models/peri-bert/best_model"), # Local
239
+ ]
240
+
241
+ model_path = None
242
+ for path in model_paths:
243
+ if path.exists():
244
+ model_path = str(path)
245
+ break
246
+
247
+ if model_path is None:
248
+ logger.error("No se encontr贸 el modelo. Aseg煤rate de subirlo a HuggingFace Space.")
249
+ raise RuntimeError("Model not found")
250
+
251
+ classifier = BERTClassifier(model_path)
252
+
253
+
254
+ # ============================================================================
255
+ # FASTAPI APP
256
+ # ============================================================================
257
+
258
+ app = FastAPI(
259
+ title="PERI BERT Classifier API",
260
+ description="API REST para clasificaci贸n de arquetipos 茅ticos en reflexiones sobre IA",
261
+ version="1.0.0",
262
+ docs_url="/", # Swagger UI en la ra铆z
263
+ )
264
+
265
+ # CORS middleware
266
+ app.add_middleware(
267
+ CORSMiddleware,
268
+ allow_origins=["*"], # En producci贸n, especificar dominios permitidos
269
+ allow_credentials=True,
270
+ allow_methods=["*"],
271
+ allow_headers=["*"],
272
+ )
273
+
274
+
275
+ @app.on_event("startup")
276
+ async def startup_event():
277
+ """Cargar modelo al iniciar"""
278
+ load_model()
279
+
280
+
281
+ @app.get("/health", response_model=HealthResponse)
282
+ async def health_check():
283
+ """Health check endpoint"""
284
+ return HealthResponse(
285
+ status="healthy",
286
+ model_loaded=classifier is not None,
287
+ device=DEVICE,
288
+ timestamp=time.time()
289
+ )
290
+
291
+
292
+ @app.get("/info", response_model=InfoResponse)
293
+ async def model_info():
294
+ """Informaci贸n del modelo"""
295
+ if classifier is None:
296
+ raise HTTPException(status_code=503, detail="Model not loaded")
297
+
298
+ archetypes = [
299
+ {
300
+ "id": ARCHETYPE_LABELS[i],
301
+ "name": ARCHETYPE_NAMES[i],
302
+ "description": ARCHETYPE_DESCRIPTIONS[i]
303
+ }
304
+ for i in range(5)
305
+ ]
306
+
307
+ return InfoResponse(
308
+ model_name="bert-base-multilingual-cased (fine-tuned)",
309
+ num_classes=5,
310
+ max_length=MAX_LENGTH,
311
+ device=DEVICE,
312
+ mc_dropout_samples=MC_SAMPLES,
313
+ archetypes=archetypes
314
+ )
315
+
316
+
317
+ @app.post("/predict", response_model=PredictionResponse)
318
+ async def predict(input_data: ReflectionInput):
319
+ """
320
+ Clasificar una reflexi贸n individual
321
+
322
+ Args:
323
+ input_data: Reflexi贸n y configuraci贸n
324
+
325
+ Returns:
326
+ Predicci贸n con arquetipo, confianza y m茅tricas
327
+ """
328
+ if classifier is None:
329
+ raise HTTPException(status_code=503, detail="Model not loaded")
330
+
331
+ try:
332
+ result = classifier.predict(
333
+ text=input_data.text,
334
+ use_mc_dropout=input_data.use_mc_dropout
335
+ )
336
+
337
+ archetype_result = ArchetypeResult(
338
+ id=ARCHETYPE_LABELS[result["predicted_class"]],
339
+ name=ARCHETYPE_NAMES[result["predicted_class"]],
340
+ description=ARCHETYPE_DESCRIPTIONS[result["predicted_class"]]
341
+ )
342
+
343
+ return PredictionResponse(
344
+ archetype=archetype_result,
345
+ confidence=result["confidence"],
346
+ uncertainty=result["uncertainty"],
347
+ top3_predictions=result["top3"],
348
+ inference_time_ms=result["inference_time_ms"],
349
+ method="bert-mc-dropout" if input_data.use_mc_dropout else "bert"
350
+ )
351
+
352
+ except Exception as e:
353
+ logger.error(f"Error en predicci贸n: {str(e)}")
354
+ raise HTTPException(status_code=500, detail=f"Prediction error: {str(e)}")
355
+
356
+
357
+ @app.post("/predict-batch", response_model=BatchPredictionResponse)
358
+ async def predict_batch(input_data: BatchReflectionInput):
359
+ """
360
+ Clasificar m煤ltiples reflexiones en batch
361
+
362
+ Args:
363
+ input_data: Lista de reflexiones
364
+
365
+ Returns:
366
+ Lista de predicciones
367
+ """
368
+ if classifier is None:
369
+ raise HTTPException(status_code=503, detail="Model not loaded")
370
+
371
+ if len(input_data.texts) == 0:
372
+ raise HTTPException(status_code=400, detail="Empty texts list")
373
+
374
+ start_time = time.time()
375
+ predictions = []
376
+
377
+ try:
378
+ for text in input_data.texts:
379
+ if len(text) < 100:
380
+ continue # Skip textos muy cortos
381
+
382
+ result = classifier.predict(
383
+ text=text,
384
+ use_mc_dropout=input_data.use_mc_dropout
385
+ )
386
+
387
+ archetype_result = ArchetypeResult(
388
+ id=ARCHETYPE_LABELS[result["predicted_class"]],
389
+ name=ARCHETYPE_NAMES[result["predicted_class"]],
390
+ description=ARCHETYPE_DESCRIPTIONS[result["predicted_class"]]
391
+ )
392
+
393
+ predictions.append(
394
+ PredictionResponse(
395
+ archetype=archetype_result,
396
+ confidence=result["confidence"],
397
+ uncertainty=result["uncertainty"],
398
+ top3_predictions=result["top3"],
399
+ inference_time_ms=result["inference_time_ms"],
400
+ method="bert-mc-dropout" if input_data.use_mc_dropout else "bert"
401
+ )
402
+ )
403
+
404
+ total_time = (time.time() - start_time) * 1000
405
+
406
+ return BatchPredictionResponse(
407
+ predictions=predictions,
408
+ total_inference_time_ms=total_time
409
+ )
410
+
411
+ except Exception as e:
412
+ logger.error(f"Error en batch prediction: {str(e)}")
413
+ raise HTTPException(status_code=500, detail=f"Batch prediction error: {str(e)}")
414
+
415
+
416
+ # ============================================================================
417
+ # MAIN (para testing local)
418
+ # ============================================================================
419
+
420
+ if __name__ == "__main__":
421
+ import uvicorn
422
+
423
+ uvicorn.run(
424
+ "app:app",
425
+ host="0.0.0.0",
426
+ port=7860, # Puerto est谩ndar de HuggingFace Spaces
427
+ reload=True
428
+ )
config.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "directionality": "bidi",
8
+ "dtype": "float32",
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "LABEL_0",
14
+ "1": "LABEL_1",
15
+ "2": "LABEL_2",
16
+ "3": "LABEL_3",
17
+ "4": "LABEL_4"
18
+ },
19
+ "initializer_range": 0.02,
20
+ "intermediate_size": 3072,
21
+ "label2id": {
22
+ "LABEL_0": 0,
23
+ "LABEL_1": 1,
24
+ "LABEL_2": 2,
25
+ "LABEL_3": 3,
26
+ "LABEL_4": 4
27
+ },
28
+ "layer_norm_eps": 1e-12,
29
+ "max_position_embeddings": 512,
30
+ "model_type": "bert",
31
+ "num_attention_heads": 12,
32
+ "num_hidden_layers": 12,
33
+ "pad_token_id": 0,
34
+ "pooler_fc_size": 768,
35
+ "pooler_num_attention_heads": 12,
36
+ "pooler_num_fc_layers": 3,
37
+ "pooler_size_per_head": 128,
38
+ "pooler_type": "first_token_transform",
39
+ "position_embedding_type": "absolute",
40
+ "problem_type": "single_label_classification",
41
+ "transformers_version": "4.57.0",
42
+ "type_vocab_size": 2,
43
+ "use_cache": true,
44
+ "vocab_size": 119547
45
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3fa322af7e353a942ef53d90c6ccd40c1d795777cf31bf9e4b41dd799c0b8382
3
+ size 711452684
requirements.txt ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # PERI BERT Classifier - HuggingFace Space Requirements
2
+ # Python 3.10+
3
+
4
+ # FastAPI y servidor
5
+ fastapi==0.110.0
6
+ uvicorn[standard]==0.27.0
7
+ pydantic==2.6.1
8
+
9
+ # Deep Learning
10
+ torch==2.2.0
11
+ transformers==4.38.0
12
+
13
+ # Utilidades
14
+ numpy==1.26.3
15
+ python-multipart==0.0.9
special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": false,
48
+ "extra_special_tokens": {},
49
+ "mask_token": "[MASK]",
50
+ "model_max_length": 512,
51
+ "never_split": null,
52
+ "pad_token": "[PAD]",
53
+ "sep_token": "[SEP]",
54
+ "strip_accents": null,
55
+ "tokenize_chinese_chars": true,
56
+ "tokenizer_class": "BertTokenizer",
57
+ "unk_token": "[UNK]"
58
+ }
vocab.txt ADDED
The diff for this file is too large to render. See raw diff