Spaces:
Sleeping
Sleeping
Claude commited on
feat: rendre l'endpoint /api/normalization/profiles dynamique
Browse filesExtraire le dict des profils en constante NORMALIZATION_PROFILES au
niveau module dans normalization.py, puis l'utiliser dans get_builtin_profile.
L'endpoint lit maintenant NORMALIZATION_PROFILES directement, ce qui
expose automatiquement les 3 profils anglais du Sprint 11
(early_modern_english, medieval_english, secretary_hand) et tout futur
profil ajouté sans modifier app.py.
https://claude.ai/code/session_017gXea9mxBQqDTAsSQd7aAq
- picarones/core/normalization.py +70 -68
- picarones/web/app.py +11 -24
picarones/core/normalization.py
CHANGED
|
@@ -232,6 +232,73 @@ class NormalizationProfile:
|
|
| 232 |
# Profils préconfigurés
|
| 233 |
# ---------------------------------------------------------------------------
|
| 234 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 235 |
def get_builtin_profile(name: str) -> NormalizationProfile:
|
| 236 |
"""Retourne un profil préconfigurée par son identifiant.
|
| 237 |
|
|
@@ -252,77 +319,12 @@ def get_builtin_profile(name: str) -> NormalizationProfile:
|
|
| 252 |
KeyError
|
| 253 |
Si le nom n'est pas reconnu.
|
| 254 |
"""
|
| 255 |
-
|
| 256 |
-
"medieval_french": NormalizationProfile(
|
| 257 |
-
name="medieval_french",
|
| 258 |
-
nfc=True,
|
| 259 |
-
caseless=False,
|
| 260 |
-
diplomatic_table=DIPLOMATIC_FR_MEDIEVAL,
|
| 261 |
-
description="Français médiéval (XIIe–XVe) : ſ=s, u=v, i=j, æ=ae, œ=oe",
|
| 262 |
-
),
|
| 263 |
-
"early_modern_french": NormalizationProfile(
|
| 264 |
-
name="early_modern_french",
|
| 265 |
-
nfc=True,
|
| 266 |
-
caseless=False,
|
| 267 |
-
diplomatic_table=DIPLOMATIC_FR_EARLY_MODERN,
|
| 268 |
-
description="Imprimés anciens (XVIe–XVIIIe) : ſ=s, æ=ae, œ=oe",
|
| 269 |
-
),
|
| 270 |
-
"medieval_latin": NormalizationProfile(
|
| 271 |
-
name="medieval_latin",
|
| 272 |
-
nfc=True,
|
| 273 |
-
caseless=False,
|
| 274 |
-
diplomatic_table=DIPLOMATIC_LATIN_MEDIEVAL,
|
| 275 |
-
description="Latin médiéval : ſ=s, u=v, i=j, ꝑ=per, ꝓ=pro",
|
| 276 |
-
),
|
| 277 |
-
"minimal": NormalizationProfile(
|
| 278 |
-
name="minimal",
|
| 279 |
-
nfc=True,
|
| 280 |
-
caseless=False,
|
| 281 |
-
diplomatic_table=DIPLOMATIC_MINIMAL,
|
| 282 |
-
description="Minimal : NFC + s long seulement",
|
| 283 |
-
),
|
| 284 |
-
"nfc": NormalizationProfile(
|
| 285 |
-
name="nfc",
|
| 286 |
-
nfc=True,
|
| 287 |
-
caseless=False,
|
| 288 |
-
diplomatic_table={},
|
| 289 |
-
description="Normalisation NFC uniquement",
|
| 290 |
-
),
|
| 291 |
-
"caseless": NormalizationProfile(
|
| 292 |
-
name="caseless",
|
| 293 |
-
nfc=True,
|
| 294 |
-
caseless=True,
|
| 295 |
-
diplomatic_table={},
|
| 296 |
-
description="NFC + insensible à la casse",
|
| 297 |
-
),
|
| 298 |
-
"early_modern_english": NormalizationProfile(
|
| 299 |
-
name="early_modern_english",
|
| 300 |
-
nfc=True,
|
| 301 |
-
caseless=False,
|
| 302 |
-
diplomatic_table=DIPLOMATIC_EN_EARLY_MODERN,
|
| 303 |
-
description="Early Modern English (XVIth–XVIIIth c.): ſ=s, u=v, i=j, vv=w, þ=th, ð=th, ȝ=y",
|
| 304 |
-
),
|
| 305 |
-
"medieval_english": NormalizationProfile(
|
| 306 |
-
name="medieval_english",
|
| 307 |
-
nfc=True,
|
| 308 |
-
caseless=False,
|
| 309 |
-
diplomatic_table=DIPLOMATIC_EN_MEDIEVAL,
|
| 310 |
-
description="Medieval English (XIIth–XVth c.): ſ=s, u=v, i=j, þ=th, ȝ=y, ꝑ=per, ꝓ=pro",
|
| 311 |
-
),
|
| 312 |
-
"secretary_hand": NormalizationProfile(
|
| 313 |
-
name="secretary_hand",
|
| 314 |
-
nfc=True,
|
| 315 |
-
caseless=False,
|
| 316 |
-
diplomatic_table=DIPLOMATIC_EN_SECRETARY,
|
| 317 |
-
description="Secretary hand (XVIth–XVIIth c.): ſ=s, u=v, i=j, vv=w, þ=th, ð=th, ȝ=y",
|
| 318 |
-
),
|
| 319 |
-
}
|
| 320 |
-
if name not in profiles:
|
| 321 |
raise KeyError(
|
| 322 |
f"Profil de normalisation inconnu : '{name}'. "
|
| 323 |
-
f"Disponibles : {', '.join(
|
| 324 |
)
|
| 325 |
-
return
|
| 326 |
|
| 327 |
|
| 328 |
# ---------------------------------------------------------------------------
|
|
|
|
| 232 |
# Profils préconfigurés
|
| 233 |
# ---------------------------------------------------------------------------
|
| 234 |
|
| 235 |
+
NORMALIZATION_PROFILES: dict[str, NormalizationProfile] = {
|
| 236 |
+
"nfc": NormalizationProfile(
|
| 237 |
+
name="nfc",
|
| 238 |
+
nfc=True,
|
| 239 |
+
caseless=False,
|
| 240 |
+
diplomatic_table={},
|
| 241 |
+
description="Normalisation NFC uniquement",
|
| 242 |
+
),
|
| 243 |
+
"caseless": NormalizationProfile(
|
| 244 |
+
name="caseless",
|
| 245 |
+
nfc=True,
|
| 246 |
+
caseless=True,
|
| 247 |
+
diplomatic_table={},
|
| 248 |
+
description="NFC + insensible à la casse",
|
| 249 |
+
),
|
| 250 |
+
"minimal": NormalizationProfile(
|
| 251 |
+
name="minimal",
|
| 252 |
+
nfc=True,
|
| 253 |
+
caseless=False,
|
| 254 |
+
diplomatic_table=DIPLOMATIC_MINIMAL,
|
| 255 |
+
description="Minimal : NFC + s long seulement",
|
| 256 |
+
),
|
| 257 |
+
"medieval_french": NormalizationProfile(
|
| 258 |
+
name="medieval_french",
|
| 259 |
+
nfc=True,
|
| 260 |
+
caseless=False,
|
| 261 |
+
diplomatic_table=DIPLOMATIC_FR_MEDIEVAL,
|
| 262 |
+
description="Français médiéval (XIIe–XVe) : ſ=s, u=v, i=j, æ=ae, œ=oe",
|
| 263 |
+
),
|
| 264 |
+
"early_modern_french": NormalizationProfile(
|
| 265 |
+
name="early_modern_french",
|
| 266 |
+
nfc=True,
|
| 267 |
+
caseless=False,
|
| 268 |
+
diplomatic_table=DIPLOMATIC_FR_EARLY_MODERN,
|
| 269 |
+
description="Imprimés anciens (XVIe–XVIIIe) : ſ=s, æ=ae, œ=oe",
|
| 270 |
+
),
|
| 271 |
+
"medieval_latin": NormalizationProfile(
|
| 272 |
+
name="medieval_latin",
|
| 273 |
+
nfc=True,
|
| 274 |
+
caseless=False,
|
| 275 |
+
diplomatic_table=DIPLOMATIC_LATIN_MEDIEVAL,
|
| 276 |
+
description="Latin médiéval : ſ=s, u=v, i=j, ꝑ=per, ꝓ=pro",
|
| 277 |
+
),
|
| 278 |
+
"early_modern_english": NormalizationProfile(
|
| 279 |
+
name="early_modern_english",
|
| 280 |
+
nfc=True,
|
| 281 |
+
caseless=False,
|
| 282 |
+
diplomatic_table=DIPLOMATIC_EN_EARLY_MODERN,
|
| 283 |
+
description="Early Modern English (XVIth–XVIIIth c.): ſ=s, u=v, i=j, vv=w, þ=th, ð=th, ȝ=y",
|
| 284 |
+
),
|
| 285 |
+
"medieval_english": NormalizationProfile(
|
| 286 |
+
name="medieval_english",
|
| 287 |
+
nfc=True,
|
| 288 |
+
caseless=False,
|
| 289 |
+
diplomatic_table=DIPLOMATIC_EN_MEDIEVAL,
|
| 290 |
+
description="Medieval English (XIIth–XVth c.): ſ=s, u=v, i=j, þ=th, ȝ=y, ꝑ=per, ꝓ=pro",
|
| 291 |
+
),
|
| 292 |
+
"secretary_hand": NormalizationProfile(
|
| 293 |
+
name="secretary_hand",
|
| 294 |
+
nfc=True,
|
| 295 |
+
caseless=False,
|
| 296 |
+
diplomatic_table=DIPLOMATIC_EN_SECRETARY,
|
| 297 |
+
description="Secretary hand (XVIth–XVIIth c.): ſ=s, u=v, i=j, vv=w, þ=th, ð=th, ȝ=y",
|
| 298 |
+
),
|
| 299 |
+
}
|
| 300 |
+
|
| 301 |
+
|
| 302 |
def get_builtin_profile(name: str) -> NormalizationProfile:
|
| 303 |
"""Retourne un profil préconfigurée par son identifiant.
|
| 304 |
|
|
|
|
| 319 |
KeyError
|
| 320 |
Si le nom n'est pas reconnu.
|
| 321 |
"""
|
| 322 |
+
if name not in NORMALIZATION_PROFILES:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 323 |
raise KeyError(
|
| 324 |
f"Profil de normalisation inconnu : '{name}'. "
|
| 325 |
+
f"Disponibles : {', '.join(NORMALIZATION_PROFILES)}"
|
| 326 |
)
|
| 327 |
+
return NORMALIZATION_PROFILES[name]
|
| 328 |
|
| 329 |
|
| 330 |
# ---------------------------------------------------------------------------
|
picarones/web/app.py
CHANGED
|
@@ -352,31 +352,18 @@ async def api_corpus_browse(path: str = Query(default=".", description="Chemin
|
|
| 352 |
|
| 353 |
@app.get("/api/normalization/profiles")
|
| 354 |
async def api_normalization_profiles() -> dict:
|
| 355 |
-
from picarones.core.normalization import
|
| 356 |
-
|
| 357 |
-
|
| 358 |
-
|
| 359 |
-
|
| 360 |
-
|
| 361 |
-
|
| 362 |
-
|
| 363 |
-
|
|
|
|
|
|
|
| 364 |
]
|
| 365 |
-
|
| 366 |
-
profiles = []
|
| 367 |
-
for pid in profile_ids:
|
| 368 |
-
try:
|
| 369 |
-
p = get_builtin_profile(pid)
|
| 370 |
-
profiles.append({
|
| 371 |
-
"id": pid,
|
| 372 |
-
"name": p.name,
|
| 373 |
-
"description": p.description or p.name,
|
| 374 |
-
"caseless": p.caseless,
|
| 375 |
-
"diplomatic_rules": len(p.diplomatic_table),
|
| 376 |
-
})
|
| 377 |
-
except Exception:
|
| 378 |
-
pass
|
| 379 |
-
|
| 380 |
return {"profiles": profiles}
|
| 381 |
|
| 382 |
|
|
|
|
| 352 |
|
| 353 |
@app.get("/api/normalization/profiles")
|
| 354 |
async def api_normalization_profiles() -> dict:
|
| 355 |
+
from picarones.core.normalization import NORMALIZATION_PROFILES
|
| 356 |
+
|
| 357 |
+
profiles = [
|
| 358 |
+
{
|
| 359 |
+
"id": pid,
|
| 360 |
+
"name": p.name,
|
| 361 |
+
"description": p.description or p.name,
|
| 362 |
+
"caseless": p.caseless,
|
| 363 |
+
"diplomatic_rules": len(p.diplomatic_table),
|
| 364 |
+
}
|
| 365 |
+
for pid, p in NORMALIZATION_PROFILES.items()
|
| 366 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 367 |
return {"profiles": profiles}
|
| 368 |
|
| 369 |
|