Spaces:

Akshay30
/

decipherai-api

Sleeping

App Files Files Community

Akshay30 commited on 3 days ago

Commit

36331c6

1 Parent(s): 7b9f40a

Fix Greek OCR and update Latin OCR model

Browse files

Files changed (9) hide show

__pycache__/app.cpython-312.pyc +0 -0
app.py +1 -1
config.py +0 -1
models/__pycache__/clip_classifier.cpython-312.pyc +0 -0
models/clip_classifier.py +8 -4
models/huggingface_models.py +4 -2
processors/cuneiform_processor.py +10 -4
processors/greek_processor.py +22 -52
processors/latin_processor.py +35 -15

__pycache__/app.cpython-312.pyc DELETED Viewed

Binary file (19 kB)

app.py CHANGED Viewed

@@ -291,7 +291,7 @@ def analyze():
                 response_data["validation"] = {
                     "quality_score": validation.get('quality_score', 0.0),
                     "latin_ratio": validation.get('latin_ratio', 0.0),
-                    "trocr_used": validation.get('tridis_used', False) or (validation.get('ocr_method') in ['trocr-base-latin', 'tridis_HTR']),
                     "char_analysis": processed_result.get('char_analysis', {}),
                     "ocr_method": validation.get('ocr_method', 'standard_latin_ocr'),
                     "writing_style": validation.get('writing_style', 'cursive')

                 response_data["validation"] = {
                     "quality_score": validation.get('quality_score', 0.0),
                     "latin_ratio": validation.get('latin_ratio', 0.0),
+                    "trocr_used": validation.get('tridis_used', False) or any(m in validation.get('ocr_method', '') for m in ['tridis', 'trocr-base-printed']),
                     "char_analysis": processed_result.get('char_analysis', {}),
                     "ocr_method": validation.get('ocr_method', 'standard_latin_ocr'),
                     "writing_style": validation.get('writing_style', 'cursive')

config.py CHANGED Viewed

@@ -10,7 +10,6 @@ class Config:
     TESSDATA_PREFIX = os.getenv("TESSDATA_PREFIX")
     REFERENCES_PATH = BASE_DIR / "references.json"
-    ANCIENT_GREEK_TESSDATA = BASE_DIR / "tessdata" / "ancient-greek"
     GROQ_API_KEY = os.getenv("GROQ_API_KEY")

     TESSDATA_PREFIX = os.getenv("TESSDATA_PREFIX")
     REFERENCES_PATH = BASE_DIR / "references.json"
     GROQ_API_KEY = os.getenv("GROQ_API_KEY")

models/__pycache__/clip_classifier.cpython-312.pyc DELETED Viewed

Binary file (9.5 kB)

models/clip_classifier.py CHANGED Viewed

@@ -25,11 +25,13 @@ class CLIPClassifier:
         try:
             _t0 = time.time()
             print(f"[CLIP LAZY] Step 1/4 — Loading CLIPModel: {model_name}...", flush=True)
-            self.model = CLIPModel.from_pretrained(model_name)
             print(f"[CLIP LAZY] Step 2/4 — CLIPModel loaded in {time.time()-_t0:.1f}s. Loading CLIPProcessor...", flush=True)
             _t1 = time.time()
-            self.processor = CLIPProcessor.from_pretrained(model_name)
             print(f"[CLIP LAZY] Step 3/4 — CLIPProcessor loaded in {time.time()-_t1:.1f}s. Moving to {self.device}...", flush=True)
             _t2 = time.time()
@@ -45,8 +47,10 @@ class CLIPClassifier:
             try:
                 _t0 = time.time()
                 print(f"[CLIP LAZY] Fallback 1/2 — Loading: {fallback_name}...", flush=True)
-                self.model = CLIPModel.from_pretrained(fallback_name)
-                self.processor = CLIPProcessor.from_pretrained(fallback_name)
                 print(f"[CLIP LAZY] Fallback 2/2 — Moving to {self.device}...", flush=True)
                 self.model.to(self.device)

         try:
             _t0 = time.time()
             print(f"[CLIP LAZY] Step 1/4 — Loading CLIPModel: {model_name}...", flush=True)
+            import os
+            HF_TOKEN = os.getenv("HF_TOKEN")
+            self.model = CLIPModel.from_pretrained(model_name, token=HF_TOKEN)
             print(f"[CLIP LAZY] Step 2/4 — CLIPModel loaded in {time.time()-_t0:.1f}s. Loading CLIPProcessor...", flush=True)
             _t1 = time.time()
+            self.processor = CLIPProcessor.from_pretrained(model_name, token=HF_TOKEN)
             print(f"[CLIP LAZY] Step 3/4 — CLIPProcessor loaded in {time.time()-_t1:.1f}s. Moving to {self.device}...", flush=True)
             _t2 = time.time()
             try:
                 _t0 = time.time()
                 print(f"[CLIP LAZY] Fallback 1/2 — Loading: {fallback_name}...", flush=True)
+                import os
+                HF_TOKEN = os.getenv("HF_TOKEN")
+                self.model = CLIPModel.from_pretrained(fallback_name, token=HF_TOKEN)
+                self.processor = CLIPProcessor.from_pretrained(fallback_name, token=HF_TOKEN)
                 print(f"[CLIP LAZY] Fallback 2/2 — Moving to {self.device}...", flush=True)
                 self.model.to(self.device)

models/huggingface_models.py CHANGED Viewed

@@ -17,8 +17,10 @@ class HuggingFaceModels:
         model_name = getattr(self.config, 'HF_TRANSLATOR_MODEL', 'AnushS/Hieroglyph-Translator-Using-Gardiner-Codes')
         try:
             print(f"[INFO] Lazily loading Hugging Face translation model on CPU: {model_name}...")
-            self._tokenizer = AutoTokenizer.from_pretrained(model_name)
-            self._model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
             self._model.to(self.device)
             self._model.eval()
             log_model_device("Egyptian T5 Translator", self.device)

         model_name = getattr(self.config, 'HF_TRANSLATOR_MODEL', 'AnushS/Hieroglyph-Translator-Using-Gardiner-Codes')
         try:
             print(f"[INFO] Lazily loading Hugging Face translation model on CPU: {model_name}...")
+            import os
+            HF_TOKEN = os.getenv("HF_TOKEN")
+            self._tokenizer = AutoTokenizer.from_pretrained(model_name, token=HF_TOKEN)
+            self._model = AutoModelForSeq2SeqLM.from_pretrained(model_name, token=HF_TOKEN)
             self._model.to(self.device)
             self._model.eval()
             log_model_device("Egyptian T5 Translator", self.device)

processors/cuneiform_processor.py CHANGED Viewed

@@ -48,8 +48,10 @@ class CuneiformProcessor(BaseScriptProcessor):
             # Use a powerful CLIP model for better ancient script understanding
             model_name = "openai/clip-vit-large-patch14"
-            self.clip_processor = CLIPProcessor.from_pretrained(model_name)
-            self.clip_model = CLIPModel.from_pretrained(model_name)
             self.clip_model.to(self.device)
             self.clip_model.eval()  # Put in evaluation mode
@@ -102,13 +104,17 @@ class CuneiformProcessor(BaseScriptProcessor):
             print("[INFO] Lazily loading praeclarum cuneiform translation model...")
             self.cuneiform_tokenizer = AutoTokenizer.from_pretrained(
                 "praeclarum/cuneiform",
-                cache_dir=CUNEIFORM_MODEL_DIR
             )
             self.cuneiform_model = AutoModelForSeq2SeqLM.from_pretrained(
                 "praeclarum/cuneiform",
-                cache_dir=CUNEIFORM_MODEL_DIR
             )
             self.cuneiform_model.to(self.device)
             self.cuneiform_model.eval()  # Put in evaluation mode

             # Use a powerful CLIP model for better ancient script understanding
             model_name = "openai/clip-vit-large-patch14"
+            import os
+            HF_TOKEN = os.getenv("HF_TOKEN")
+            self.clip_processor = CLIPProcessor.from_pretrained(model_name, token=HF_TOKEN)
+            self.clip_model = CLIPModel.from_pretrained(model_name, token=HF_TOKEN)
             self.clip_model.to(self.device)
             self.clip_model.eval()  # Put in evaluation mode
             print("[INFO] Lazily loading praeclarum cuneiform translation model...")
+            import os
+            HF_TOKEN = os.getenv("HF_TOKEN")
             self.cuneiform_tokenizer = AutoTokenizer.from_pretrained(
                 "praeclarum/cuneiform",
+                cache_dir=CUNEIFORM_MODEL_DIR,
+                token=HF_TOKEN
             )
             self.cuneiform_model = AutoModelForSeq2SeqLM.from_pretrained(
                 "praeclarum/cuneiform",
+                cache_dir=CUNEIFORM_MODEL_DIR,
+                token=HF_TOKEN
             )
             self.cuneiform_model.to(self.device)
             self.cuneiform_model.eval()  # Put in evaluation mode

processors/greek_processor.py CHANGED Viewed

@@ -36,15 +36,19 @@ class GreekProcessor(BaseScriptProcessor):
             from transformers import TrOCRProcessor, VisionEncoderDecoderModel
             import torch
             self.trocr_processor = TrOCRProcessor.from_pretrained(
                 'rithwikn/trocr_greek_combined',
                 cache_dir=GREEK_TROCR_MODEL_DIR,
-                local_files_only=False
             )
             self.trocr_model = VisionEncoderDecoderModel.from_pretrained(
                 'rithwikn/trocr_greek_combined',
                 cache_dir=GREEK_TROCR_MODEL_DIR,
-                local_files_only=False
             )
             self.trocr_model.to(self.device)
@@ -61,26 +65,23 @@ class GreekProcessor(BaseScriptProcessor):
             self.trocr_available = False
     def setup_ancient_greek_ocr(self):
-        """Setup Ancient Greek OCR with specialized tessdata"""
-        # Path to Ancient Greek tessdata (download from ancientgreekocr.org)
-        self.ancient_greek_tessdata = os.path.join(
-            os.path.dirname(__file__),
-            "..", "tessdata", "ancient-greek"
-        )
-        # Verify tessdata exists
-        if os.path.exists(self.ancient_greek_tessdata):
-            print(f"[INFO] Ancient Greek tessdata found: {self.ancient_greek_tessdata}")
-        else:
-            print(f"[WARN] Ancient Greek tessdata not found at: {self.ancient_greek_tessdata}")
-            print("[INFO] Download from: https://ancientgreekocr.org")
     def detect_script(self, image_path):
         """Simplified detection - Groq Vision handles main classification"""
         try:
             if not getattr(self, 'trocr_available', False):
                 # Check if Ancient Greek OCR is available as fallback
-                grc_file = os.path.join(self.ancient_greek_tessdata, "grc.traineddata")
-                if not os.path.exists(grc_file):
                     print("[INFO] Greek processor not available (neither TrOCR nor Tesseract)")
                     return False, 0.5
@@ -138,8 +139,7 @@ class GreekProcessor(BaseScriptProcessor):
                 print("[WARN] TrOCR extraction returned poor quality result, trying Tesseract fallback...")
             # Method 2: Ancient Greek OCR (if available and safe)
-            grc_file = os.path.join(self.ancient_greek_tessdata, "grc.traineddata")
-            if os.path.exists(grc_file):
                 ancient_greek_text = self._extract_with_ancient_greek_ocr(image)
                 if ancient_greek_text and self._validate_greek_text(ancient_greek_text):
                     print("[INFO] Using Ancient Greek OCR result")
@@ -232,17 +232,7 @@ class GreekProcessor(BaseScriptProcessor):
     def _extract_with_ancient_greek_ocr(self, image):
         """Extract using specialized Ancient Greek OCR"""
         try:
-            # Save original tessdata path
-            original_tessdata = os.environ.get("TESSDATA_PREFIX", "")
-            # Set tessdata path properly (fix the path format)
-            if os.path.exists(self.ancient_greek_tessdata):
-                # Ensure proper path format without trailing quotes
-                clean_path = str(self.ancient_greek_tessdata).replace('"', '')
-                os.environ["TESSDATA_PREFIX"] = clean_path
-                print(f"[INFO] Set TESSDATA_PREFIX to: {clean_path}")
-            else:
-                print(f"[WARN] Ancient Greek tessdata not found at: {self.ancient_greek_tessdata}")
                 return ""
             # Use ancient Greek language code 'grc' with optimized settings
@@ -254,22 +244,10 @@ class GreekProcessor(BaseScriptProcessor):
                 lang="grc",  # Ancient Greek language code
                 config=config
             )
-            # Restore original tessdata path
-            if original_tessdata:
-                os.environ["TESSDATA_PREFIX"] = original_tessdata
-            else:
-                # Remove the environment variable if it wasn't set before
-                if "TESSDATA_PREFIX" in os.environ:
-                    del os.environ["TESSDATA_PREFIX"]
             return text.strip()
         except Exception as e:
             print(f"[WARN] Ancient Greek OCR failed: {e}")
-            # Make sure to restore tessdata path even on error
-            if 'original_tessdata' in locals() and original_tessdata:
-                os.environ["TESSDATA_PREFIX"] = original_tessdata
             return ""
     def _extract_layout_aware_ocr(self, image_path):
@@ -288,14 +266,7 @@ class GreekProcessor(BaseScriptProcessor):
             line_texts = []
             # Try to use Ancient Greek first
-            grc_file = os.path.join(self.ancient_greek_tessdata, "grc.traineddata")
-            use_grc = os.path.exists(grc_file)
-            # Save original TESSDATA_PREFIX
-            original_tessdata = os.environ.get("TESSDATA_PREFIX", "")
-            if use_grc:
-                clean_path = str(self.ancient_greek_tessdata).replace('"', '')
-                os.environ["TESSDATA_PREFIX"] = clean_path
             try:
                 for idx, crop in enumerate(crops):
@@ -326,8 +297,7 @@ class GreekProcessor(BaseScriptProcessor):
                     if text:
                         line_texts.append(text)
             finally:
-                if use_grc and original_tessdata:
-                    os.environ["TESSDATA_PREFIX"] = original_tessdata
             return "\n".join(line_texts)
         except Exception as e:

             from transformers import TrOCRProcessor, VisionEncoderDecoderModel
             import torch
+            import os
+            HF_TOKEN = os.getenv("HF_TOKEN")
             self.trocr_processor = TrOCRProcessor.from_pretrained(
                 'rithwikn/trocr_greek_combined',
                 cache_dir=GREEK_TROCR_MODEL_DIR,
+                local_files_only=False,
+                token=HF_TOKEN
             )
             self.trocr_model = VisionEncoderDecoderModel.from_pretrained(
                 'rithwikn/trocr_greek_combined',
                 cache_dir=GREEK_TROCR_MODEL_DIR,
+                local_files_only=False,
+                token=HF_TOKEN
             )
             self.trocr_model.to(self.device)
             self.trocr_available = False
     def setup_ancient_greek_ocr(self):
+        """Setup Ancient Greek OCR with Tesseract language check"""
+        try:
+            langs = pytesseract.get_languages(config='')
+            self.grc_available = "grc" in langs
+            if self.grc_available:
+                print("[INFO] Ancient Greek Tesseract language pack 'grc' is available")
+            else:
+                print("[WARN] Ancient Greek Tesseract language pack 'grc' is NOT available")
+        except Exception as e:
+            print(f"[ERROR] Failed to check Tesseract languages: {e}")
+            self.grc_available = False
     def detect_script(self, image_path):
         """Simplified detection - Groq Vision handles main classification"""
         try:
             if not getattr(self, 'trocr_available', False):
                 # Check if Ancient Greek OCR is available as fallback
+                if not getattr(self, 'grc_available', False):
                     print("[INFO] Greek processor not available (neither TrOCR nor Tesseract)")
                     return False, 0.5
                 print("[WARN] TrOCR extraction returned poor quality result, trying Tesseract fallback...")
             # Method 2: Ancient Greek OCR (if available and safe)
+            if getattr(self, 'grc_available', False):
                 ancient_greek_text = self._extract_with_ancient_greek_ocr(image)
                 if ancient_greek_text and self._validate_greek_text(ancient_greek_text):
                     print("[INFO] Using Ancient Greek OCR result")
     def _extract_with_ancient_greek_ocr(self, image):
         """Extract using specialized Ancient Greek OCR"""
         try:
+            if not getattr(self, 'grc_available', False):
                 return ""
             # Use ancient Greek language code 'grc' with optimized settings
                 lang="grc",  # Ancient Greek language code
                 config=config
             )
             return text.strip()
         except Exception as e:
             print(f"[WARN] Ancient Greek OCR failed: {e}")
             return ""
     def _extract_layout_aware_ocr(self, image_path):
             line_texts = []
             # Try to use Ancient Greek first
+            use_grc = getattr(self, 'grc_available', False)
             try:
                 for idx, crop in enumerate(crops):
                     if text:
                         line_texts.append(text)
             finally:
+                pass
             return "\n".join(line_texts)
         except Exception as e:

processors/latin_processor.py CHANGED Viewed

@@ -46,15 +46,19 @@ class LatinProcessor(BaseScriptProcessor):
             print("[INFO] This model specializes in 13th-16th century manuscripts with automatic abbreviation expansion")
             # TRIDIS model from Hugging Face - runs locally after download
             self.tridis_processor = TrOCRProcessor.from_pretrained(
                 'magistermilitum/tridis_HTR',
                 cache_dir=TRIDIS_MODEL_DIR,
-                local_files_only=False  # Download first time, then cache locally
             )
             self.tridis_model = VisionEncoderDecoderModel.from_pretrained(
                 'magistermilitum/tridis_HTR',
                 cache_dir=TRIDIS_MODEL_DIR,
-                local_files_only=False
             )
             self.tridis_model.to(self.device)
@@ -74,45 +78,56 @@ class LatinProcessor(BaseScriptProcessor):
             self.tridis_available = False
     def setup_trocr_base_latin(self):
-        """Setup trocr-base-latin model - BEST for printed or carved classical Latin"""
         try:
             from utils.gpu_diagnostics import reclaim_vram_for
             reclaim_vram_for("latin")
-            print("[INFO] Lazily loading trocr-base-latin model for printed/carved Latin...")
             self.trocr_latin_processor = TrOCRProcessor.from_pretrained(
-                'magistermilitum/trocr-base-latin',
                 cache_dir=TROCR_LATIN_MODEL_DIR,
-                local_files_only=False
             )
             self.trocr_latin_model = VisionEncoderDecoderModel.from_pretrained(
-                'magistermilitum/trocr-base-latin',
                 cache_dir=TROCR_LATIN_MODEL_DIR,
-                local_files_only=False
             )
             self.trocr_latin_model.to(self.device)
             self.trocr_latin_model.eval()  # Put in evaluation mode
             from utils.gpu_diagnostics import log_model_device
-            log_model_device("Latin TrOCR (Printed)", self.device)
             self.trocr_latin_available = True
-            print(f"[INFO] trocr-base-latin loaded successfully on {self.device}")
         except Exception as e:
-            print(f"[WARN] magistermilitum/trocr-base-latin model failed to load ({e}). Trying public fallback 'microsoft/trocr-base-printed'...")
             try:
                 # Free VRAM again in case partial allocation left residue
                 reclaim_vram_for("latin")
                 self.trocr_latin_processor = TrOCRProcessor.from_pretrained(
                     'microsoft/trocr-base-printed',
                     cache_dir=TROCR_LATIN_MODEL_DIR,
-                    local_files_only=False
                 )
                 self.trocr_latin_model = VisionEncoderDecoderModel.from_pretrained(
                     'microsoft/trocr-base-printed',
                     cache_dir=TROCR_LATIN_MODEL_DIR,
-                    local_files_only=False
                 )
                 self.trocr_latin_model.to(self.device)
                 self.trocr_latin_model.eval()  # Put in evaluation mode
@@ -121,7 +136,12 @@ class LatinProcessor(BaseScriptProcessor):
                 log_model_device("Latin TrOCR (Printed Fallback)", self.device)
                 self.trocr_latin_available = True
                 print(f"[INFO] Public fallback microsoft/trocr-base-printed loaded successfully on {self.device}")
             except Exception as ex:
                 print(f"[ERROR] All printed Latin models failed to load: {ex}")
                 self.trocr_latin_available = False
@@ -211,7 +231,7 @@ class LatinProcessor(BaseScriptProcessor):
                     processing_time = time.time() - start_time
                     print(f"[SUCCESS] Routed to trocr-base-latin and completed in {processing_time:.2f}s")
                     self.active_style = "printed"
-                    self.active_model = "trocr-base-latin"
                     return primary_text
                 else:
                     print("[WARN] trocr-base-latin returned poor quality result, trying TRIDIS HTR fallback...")
@@ -242,7 +262,7 @@ class LatinProcessor(BaseScriptProcessor):
                 processing_time = time.time() - start_time
                 print(f"[SUCCESS] Fallback model transcription successful in {processing_time:.2f}s")
                 self.active_style = "printed" if style == "cursive" else "cursive"
-                self.active_model = "trocr-base-latin" if style == "cursive" else "tridis_HTR"
                 return fallback_text
             # Step 3: Tesseract fallback

             print("[INFO] This model specializes in 13th-16th century manuscripts with automatic abbreviation expansion")
             # TRIDIS model from Hugging Face - runs locally after download
+            import os
+            HF_TOKEN = os.getenv("HF_TOKEN")
             self.tridis_processor = TrOCRProcessor.from_pretrained(
                 'magistermilitum/tridis_HTR',
                 cache_dir=TRIDIS_MODEL_DIR,
+                local_files_only=False,
+                token=HF_TOKEN
             )
             self.tridis_model = VisionEncoderDecoderModel.from_pretrained(
                 'magistermilitum/tridis_HTR',
                 cache_dir=TRIDIS_MODEL_DIR,
+                local_files_only=False,
+                token=HF_TOKEN
             )
             self.tridis_model.to(self.device)
             self.tridis_available = False
     def setup_trocr_base_latin(self):
+        """Setup TRIDIS v2 HTR model - Primary for printed or manuscript Latin, fallback to printed"""
+        import os
+        HF_TOKEN = os.getenv("HF_TOKEN")
         try:
             from utils.gpu_diagnostics import reclaim_vram_for
             reclaim_vram_for("latin")
+            print("[LATIN] Loading TRIDIS v2 model...")
             self.trocr_latin_processor = TrOCRProcessor.from_pretrained(
+                'magistermilitum/tridis_v2_HTR_historical_manuscripts',
                 cache_dir=TROCR_LATIN_MODEL_DIR,
+                local_files_only=False,
+                token=HF_TOKEN
             )
             self.trocr_latin_model = VisionEncoderDecoderModel.from_pretrained(
+                'magistermilitum/tridis_v2_HTR_historical_manuscripts',
                 cache_dir=TROCR_LATIN_MODEL_DIR,
+                local_files_only=False,
+                token=HF_TOKEN
             )
             self.trocr_latin_model.to(self.device)
             self.trocr_latin_model.eval()  # Put in evaluation mode
             from utils.gpu_diagnostics import log_model_device
+            log_model_device("Latin TRIDIS v2 HTR", self.device)
             self.trocr_latin_available = True
+            self.loaded_printed_model_name = "tridis_v2_HTR_historical_manuscripts"
+            print("[LATIN] TRIDIS v2 model loaded successfully")
+            print(f"processor class: {type(self.trocr_latin_processor).__name__}")
+            print(f"model class: {type(self.trocr_latin_model).__name__}")
+            print(f"device: {self.device}")
+            print(f"parameter count: {sum(p.numel() for p in self.trocr_latin_model.parameters())}")
         except Exception as e:
+            print(f"[LATIN] TRIDIS unavailable, using microsoft/trocr-base-printed")
             try:
                 # Free VRAM again in case partial allocation left residue
                 reclaim_vram_for("latin")
                 self.trocr_latin_processor = TrOCRProcessor.from_pretrained(
                     'microsoft/trocr-base-printed',
                     cache_dir=TROCR_LATIN_MODEL_DIR,
+                    local_files_only=False,
+                    token=HF_TOKEN
                 )
                 self.trocr_latin_model = VisionEncoderDecoderModel.from_pretrained(
                     'microsoft/trocr-base-printed',
                     cache_dir=TROCR_LATIN_MODEL_DIR,
+                    local_files_only=False,
+                    token=HF_TOKEN
                 )
                 self.trocr_latin_model.to(self.device)
                 self.trocr_latin_model.eval()  # Put in evaluation mode
                 log_model_device("Latin TrOCR (Printed Fallback)", self.device)
                 self.trocr_latin_available = True
+                self.loaded_printed_model_name = "trocr-base-printed"
                 print(f"[INFO] Public fallback microsoft/trocr-base-printed loaded successfully on {self.device}")
+                print(f"processor class: {type(self.trocr_latin_processor).__name__}")
+                print(f"model class: {type(self.trocr_latin_model).__name__}")
+                print(f"device: {self.device}")
+                print(f"parameter count: {sum(p.numel() for p in self.trocr_latin_model.parameters())}")
             except Exception as ex:
                 print(f"[ERROR] All printed Latin models failed to load: {ex}")
                 self.trocr_latin_available = False
                     processing_time = time.time() - start_time
                     print(f"[SUCCESS] Routed to trocr-base-latin and completed in {processing_time:.2f}s")
                     self.active_style = "printed"
+                    self.active_model = getattr(self, "loaded_printed_model_name", "tridis_v2_HTR_historical_manuscripts")
                     return primary_text
                 else:
                     print("[WARN] trocr-base-latin returned poor quality result, trying TRIDIS HTR fallback...")
                 processing_time = time.time() - start_time
                 print(f"[SUCCESS] Fallback model transcription successful in {processing_time:.2f}s")
                 self.active_style = "printed" if style == "cursive" else "cursive"
+                self.active_model = getattr(self, "loaded_printed_model_name", "tridis_v2_HTR_historical_manuscripts") if style == "cursive" else "tridis_HTR"
                 return fallback_text
             # Step 3: Tesseract fallback