Spaces:

KaushiGihan
/

Image_BasedText_Classification

Runtime error

App Files Files Community

KaushiGihan commited on Jul 1, 2025

Commit

07fc447

verified ·

1 Parent(s): f9d20c4

Upload 17 files

Browse files

Files changed (18) hide show

.gitattributes +2 -0
app.py +46 -0
app/__init__.py +0 -0
app/app.py +98 -0
app/src/__init__.py +0 -0
app/src/constant.py +11 -0
app/src/layout_loader.py +323 -0
app/src/logger.py +91 -0
app/src/model_loader.py +25 -0
app/src/test_vit.py +102 -0
app/src/vgg16_load.py +381 -0
app/src/vit_load.py +281 -0
artifacts/model/VIT_model/confusion_matrix.png +3 -0
artifacts/model/VIT_model/mlb.joblib +3 -0
artifacts/model/VIT_model/model.pth +3 -0
artifacts/model/vgg_model/mlb.joblib +3 -0
artifacts/model/vgg_model/model.keras +3 -0
requirements.txt +29 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+artifacts/model/vgg_model/model.keras filter=lfs diff=lfs merge=lfs -text
+artifacts/model/VIT_model/confusion_matrix.png filter=lfs diff=lfs merge=lfs -text

app.py ADDED Viewed

	@@ -0,0 +1,46 @@

+import gradio as gr
+from pathlib import Path
+from PIL import Image
+# Import your model classes (adjust import paths as needed)
+from app.src.vit_load import VITDocumentClassifier
+from app.src.vgg16_load import VGGDocumentClassifier
+from app.src.constant import vit_model_path, vit_mlb_path, vgg_model_path, vgg_mlb_path
+# Load models once at startup
+vit_model = VITDocumentClassifier(vit_model_path, vit_mlb_path)
+vgg_model = VGGDocumentClassifier(vgg_model_path, vgg_mlb_path)
+def predict_vit(image, cut_off):
+    if image is None:
+        return "Please upload an image."
+    temp_path = "temp_vit_image.png"
+    image.save(temp_path)
+    result = vit_model.predict(Path(temp_path), cut_off)
+    return f"ViT Prediction: {result}"
+def predict_vgg(image):
+    if image is None:
+        return "Please upload an image."
+    temp_path = "temp_vgg_image.png"
+    image.save(temp_path)
+    result = vgg_model.predict(Path(temp_path))
+    return f"VGG16 Prediction: {result}"
+with gr.Blocks() as demo:
+    gr.Markdown("# Document Classification Demo\nUpload an image and choose a model to classify it.")
+    with gr.Row():
+        with gr.Column():
+            image_input = gr.Image(type="pil", label="Upload Image")
+            cut_off = gr.Slider(0, 1, value=0.5, label="ViT Cutoff Threshold")
+        with gr.Column():
+            result_output = gr.Textbox(label="Prediction Result", interactive=False)
+    with gr.Row():
+        vit_btn = gr.Button("Predict with ViT Model")
+        vgg_btn = gr.Button("Predict with VGG16 Model")
+    vit_btn.click(fn=predict_vit, inputs=[image_input, cut_off], outputs=result_output)
+    vgg_btn.click(fn=predict_vgg, inputs=image_input, outputs=result_output)
+if __name__ == "__main__":
+    demo.launch()

app/__init__.py ADDED Viewed

File without changes

app/app.py ADDED Viewed

	@@ -0,0 +1,98 @@

+from fastapi import FastAPI, File, UploadFile
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import StreamingResponse,FileResponse , JSONResponse,HTMLResponse
+from pydantic import BaseModel
+import uvicorn
+import cv2
+import tempfile
+import shutil
+import os
+import warnings
+import base64
+import numpy as np
+from pathlib import Path
+from app.src.model_loader import vit_loader,vgg_loader
+from app.src.logger import setup_logger
+warnings.filterwarnings("ignore")
+app=FastAPI(title="Document_Classifire",
+    description="FastAPI",
+    version="0.115.4")
+# Allow all origins (replace * with specific origins if needed)
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+@app.get("/")
+async def root():
+  return {"Fast API":"API is woorking"}
+# Suppress warnings
+os.environ['TF_CPP_MIN_LOG_LEVEL'] = '0'  # 0 = all logs, 1 = filter out info, 2 = filter out warnings, 3 = filter out errors
+warnings.filterwarnings("ignore")
+logger = setup_logger()
+@app.post("/vit_model")
+async def vit_clf(cut_off:float=0.5,image_file: UploadFile = File(...)):
+    try:
+        # Create a temporary directory
+        temp_dir = tempfile.mkdtemp()
+        # Create a temporary file path
+        temp_file_path = os.path.join(temp_dir,image_file.filename)
+        # Write the uploaded file content to the temporary file
+        with open(temp_file_path, "wb") as temp_file:
+            shutil.copyfileobj(image_file.file, temp_file)
+        result=vit_loader().predict(image_path=Path(temp_file_path), cut_off=cut_off)
+        logger.info(result)
+        if result is not None:
+            return JSONResponse(content={"status":1,"document_classe":result})
+        else:
+            return JSONResponse(content={"status":0,"document_classe":None})
+    except Exception as e:
+        logger.error(str(e))
+        return JSONResponse(content={"status":0,"error_message":str(e)})
+@app.post("/vgg_model")
+async def vgg_clf(image_file: UploadFile = File(...)):
+    try:
+        # Create a temporary directory
+        temp_dir = tempfile.mkdtemp()
+        # Create a temporary file path
+        temp_file_path = os.path.join(temp_dir,image_file.filename)
+        # Write the uploaded file content to the temporary file
+        with open(temp_file_path, "wb") as temp_file:
+            shutil.copyfileobj(image_file.file, temp_file)
+        result=vgg_loader().predict(image_path=Path(temp_file_path))
+        logger.info(result)
+        if result is not None:
+            return JSONResponse(content={"status":1,"document_classe":result})
+        else:
+            return JSONResponse(content={"status":0,"document_classe":None})
+    except Exception as e:
+        logger.error(str(e))
+        return JSONResponse(content={"status":0,"document_classe":str(e)})

app/src/__init__.py ADDED Viewed

File without changes

app/src/constant.py ADDED Viewed

	@@ -0,0 +1,11 @@

+from pathlib import Path
+vit_model_path=Path(r"artifacts\model\VIT_model\model.pth")
+vit_mlb_path=Path(r"artifacts\model\VIT_model\mlb.joblib")
+vgg_model_path=Path(r"artifacts\model\vgg_model\model.keras")
+vgg_mlb_path=Path(r"artifacts\model\vgg_model\mlb.joblib")
+layout_model_path=Path(r"artifacts\model\layout_model\model.pth")

app/src/layout_loader.py ADDED Viewed

	@@ -0,0 +1,323 @@

+from PIL import Image
+import numpy as np
+import torch
+from typing import Optional, List, Dict, Any
+from pathlib import Path
+from transformers import LayoutLMv2ForSequenceClassification, LayoutLMv2Processor, LayoutLMv2FeatureExtractor, LayoutLMv2Tokenizer
+import os
+from dotenv import load_dotenv
+from app.src.logger import setup_logger
+logger = setup_logger("layout_loader")
+class LayoutLMDocumentClassifier:
+    """
+    A class for classifying documents using a LayoutLMv2 model.
+    This class encapsulates the loading of the LayoutLMv2 model and its associated
+    processor, handles image preprocessing, and performs document classification
+    predictions. The model path is loaded from environment variables, promoting
+    flexible configuration. It includes robust error handling, logging, and
+    type hinting for production readiness.
+    """
+    def __init__(self,model_path_str) -> None:
+        """
+        Initializes the LayoutLMDocumentClassifier by loading the model and processor.
+        The model and processor are loaded from the path specified in the
+        environment variable 'LAYOUTLM_MODEL_PATH'. This method also sets up
+        the device for inference (GPU if available, otherwise CPU) and defines
+         the mapping from model output indices to class labels.
+        Includes robust error handling and logging for initialization and artifact loading.
+        Raises:
+            ValueError: If the 'LAYOUTLM_MODEL_PATH' environment variable is not set.
+            FileNotFoundError: If the model path specified in the environment variable
+                               does not exist or a required artifact file is not found
+                               during the artifact loading process.
+            Exception: If any other unexpected error occurs during the loading
+                       of the model or processor.
+        """
+        logger.info("Initializing LayoutLMDocumentClassifier.")
+        self.model_path_str: Optional[str]=model_path_str
+        self.model: Optional[LayoutLMv2ForSequenceClassification] = None
+        self.processor: Optional[LayoutLMv2Processor] = None
+        self.device: torch.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        logger.info(f"Using device: {self.device}")
+        # Define id2label mapping as a class attribute
+        # This mapping should align with the model's output classes.
+        self.id2label: Dict[int, str] = {0:'invoice', 1: 'form', 2:'note', 3:'advertisement', 4: 'email'}
+        logger.info(f"Defined id2label mapping: {self.id2label}")
+        # Load model path from environment variable
+        model_path_str: Optional[str] = self.model_path_str
+        logger.info(f"Attempting to retrieve LAYOUTLM_MODEL_PATH from environment variables.")
+        if not model_path_str:
+            logger.critical("Critical Error: 'LAYOUTLM_MODEL_PATH' environment variable is not set.")
+            raise ValueError("LAYOUTLM_MODEL_PATH environment variable is not set.")
+        model_path: Path = Path(model_path_str)
+        logger.info(f"Retrieved model path: {model_path}")
+        if not model_path.exists():
+             logger.critical(f"Critical Error: Model path from environment variable does not exist: {model_path}")
+             raise FileNotFoundError(f"Model path not found: {model_path}")
+        logger.info(f"Model path {model_path} exists.")
+        try:
+            logger.info("Calling _load_artifacts to load model and processor.")
+            self._load_artifacts(model_path)
+            if self.model is not None and self.processor is not None:
+                logger.info("LayoutLMDocumentClassifier initialized successfully.")
+            else:
+                # This case should ideally be caught and re-raised in _load_artifacts
+                logger.critical("LayoutLMDocumentClassifier failed to fully initialize due to artifact loading errors in _load_artifacts.")
+                # _load_artifacts already raises on critical failure, no need to raise again
+        except Exception as e:
+            # Catch and log any exception that wasn't handled and re-raised in _load_artifacts
+            logger.critical(f"An unhandled exception occurred during LayoutLMDocumentClassifier initialization: {e}", exc_info=True)
+            raise # Re-raise the exception after logging
+        logger.info("Initialization process completed.")
+    def _load_artifacts(self, model_path: Path) -> None:
+        """
+        Loads the LayoutLMv2 model and processor from the specified path.
+        This is an internal helper method called during initialization. It handles
+        the loading of both the `LayoutLMv2ForSequenceClassification` model and
+        its corresponding `LayoutLMv2Processor` with error handling and logging.
+        Args:
+            model_path: Path to the LayoutLMv2 model directory or file. This path
+                        is expected to contain both the model weights and the
+                        processor configuration/files.
+        Raises:
+            FileNotFoundError: If the `model_path` or any required processor/model
+                               file within that path is not found.
+            Exception: If any other unexpected error occurs during loading
+                       from the specified path (e.g., corrupt files, compatibility issues).
+        """
+        logger.info(f"Starting artifact loading from {model_path} for LayoutLMv2.")
+        processor_loaded: bool = False
+        model_loaded: bool = False
+        # Load Processor
+        try:
+            logger.info(f"Attempting to load LayoutLMv2 processor from {model_path}")
+            # Load feature extractor and tokenizer separately to create the processor
+            feature_extractor = LayoutLMv2FeatureExtractor()
+            tokenizer = LayoutLMv2Tokenizer.from_pretrained("microsoft/layoutlmv2-base-uncased")
+            self.processor = LayoutLMv2Processor(feature_extractor, tokenizer)
+            logger.info("LayoutLMv2 processor loaded successfully.")
+            processor_loaded = True
+        except Exception as e:
+            logger.critical(f"Critical Error: An unexpected error occurred while loading the LayoutLMv2 processor from {model_path}: {e}", exc_info=True)
+            raise # Re-raise to indicate a critical initialization failure
+        # Load Model
+        try:
+            logger.info(f"Attempting to load LayoutLMv2 model from {model_path}")
+            self.model = LayoutLMv2ForSequenceClassification.from_pretrained(model_path)
+            self.model.to(self.device) # Ensure model is on the correct device
+            logger.info(f"LayoutLMv2 model loaded successfully and moved to {self.device}.")
+            model_loaded = True
+        except FileNotFoundError:
+            logger.critical(f"Critical Error: LayoutLMv2 model file not found at {model_path}", exc_info=True)
+            raise # Re-raise to indicate a critical initialization failure
+        except Exception as e:
+            logger.critical(f"Critical Error: An unexpected error occurred while loading the LayoutLMv2 model from {model_path}: {e}", exc_info=True)
+            raise # Re-raise to indicate a critical initialization failure
+        # Conditional logging based on loading success
+        if model_loaded and processor_loaded:
+             logger.info("All required LayoutLMv2 artifacts loaded successfully from _load_artifacts.")
+        elif model_loaded and not processor_loaded:
+             logger.error("LayoutLMv2 model loaded successfully, but processor loading failed in _load_artifacts.")
+        elif not model_loaded and processor_loaded:
+             logger.error("LayoutLMv2 processor loaded successfully, but model loading failed in _load_artifacts.")
+        else:
+            logger.error("Both LayoutLMv2 model and processor failed to load during _load_artifacts.")
+        logger.info("Artifact loading process completed.")
+    def _prepare_inputs(self, image_path: Path) -> Optional[Dict[str, torch.Tensor]]:
+        """
+        Loads and preprocesses an image to prepare inputs for the LayoutLMv2 model.
+        This method handles loading the image from a file path, converting it to RGB,
+        and using the loaded LayoutLMv2Processor to create the necessary input tensors
+        (pixel values, input IDs, attention masks, bounding boxes). The tensors are
+        then moved to the appropriate device for inference.
+        Includes robust error handling and logging for each step.
+        Args:
+            image_path: Path to the image file (e.g., PNG, JPG) to be processed.
+        Returns:
+            A dictionary containing the prepared input tensors (e.g., 'pixel_values',
+            'input_ids', 'attention_mask', 'bbox') as PyTorch tensors, if image
+            loading and preprocessing are successful. Returns `None` if any
+            step fails (e.g., file not found, image corruption, processor error).
+        """
+        logger.info(f"Starting image loading and preprocessing for {image_path}.")
+        image: Optional[Image.Image] = None
+        # Load image
+        try:
+            logger.info(f"Attempting to load image from {image_path}")
+            image = Image.open(image_path)
+            logger.info(f"Image loaded successfully from {image_path}.")
+        except FileNotFoundError:
+            logger.error(f"Error: Image file not found at {image_path}", exc_info=True)
+            return None
+        except Exception as e:
+            logger.error(f"An unexpected error occurred while loading image {image_path}: {e}", exc_info=True)
+            return None
+        # Convert image to RGB
+        try:
+            logger.info(f"Attempting to convert image to RGB for {image_path}.")
+            if image is None:
+                 logger.error(f"Image is None after loading for {image_path}. Cannot convert to RGB.")
+                 return None
+            if image.mode != "RGB":
+                image = image.convert("RGB")
+                logger.info(f"Image converted to RGB successfully for {image_path}.")
+            else:
+                 logger.info(f"Image is already in RGB format for {image_path}.")
+        except Exception as e:
+            logger.error(f"An error occurred while converting image {image_path} to RGB: {e}", exc_info=True)
+            return None
+        # Prepare inputs using the processor
+        if self.processor is None:
+            logger.error("LayoutLMv2 processor is not loaded. Cannot prepare inputs.")
+            return None
+        encoded_inputs: Optional[Dict[str, torch.Tensor]] = None
+        try:
+            logger.info(f"Attempting to prepare inputs using processor for {image_path}.")
+            # The processor expects a PIL Image or a list of PIL Images
+            if image is None:
+                 logger.error(f"Image is None before preprocessing for {image_path}. Cannot prepare inputs.")
+                 return None
+            encoded_inputs = self.processor(
+                images=image,
+                return_tensors="pt",
+                truncation=True,
+                padding="max_length",
+                max_length=512
+            )
+            logger.info(f"Inputs prepared successfully for {image_path}.")
+        except Exception as e:
+            logger.error(f"An error occurred during input preparation for {image_path}: {e}", exc_info=True)
+            return None
+        # Move inputs to the device
+        if encoded_inputs is not None:
+            try:
+                logger.info(f"Attempting to move inputs to device ({self.device}) for {image_path}.")
+                for k, v in encoded_inputs.items():
+                    if isinstance(v, torch.Tensor):
+                        encoded_inputs[k] = v.to(self.device)
+                logger.info(f"Inputs moved to device ({self.device}) successfully for {image_path}.")
+            except Exception as e:
+                logger.error(f"An error occurred while moving inputs to device for {image_path}: {e}", exc_info=True)
+                return None
+        else:
+             logger.error(f"Encoded inputs are None after processing for {image_path}. Cannot move to device.")
+             return None
+        logger.info(f"Image loading and preprocessing completed successfully for {image_path}.")
+        return encoded_inputs
+    def predict(self, image_path: Path) -> Optional[str]:
+        """
+        Predicts the class label for a given image using the loaded LayoutLMv2 model.
+        This is the main prediction method. It orchestrates the process by first
+        preparing the image inputs using `_prepare_inputs`, performing inference
+        with the LayoutLMv2 model, determining the predicted class index from the
+        model's output logits, and finally mapping this index to a human-readable
+        class label using the `id2label` mapping.
+        Includes robust error handling and logging throughout the prediction pipeline.
+        Args:
+            image_path: Path to the image file to classify.
+        Returns:
+            The predicted class label as a string if the entire prediction process
+            is successful. Returns `None` if any critical step fails (e.g.,
+            image loading/preprocessing, model inference, or if the predicted
+            index is not found in the `id2label` mapping).
+        """
+        logger.info(f"Starting prediction process for image: {image_path}.")
+        # Prepare inputs
+        logger.info(f"Calling _prepare_inputs for {image_path}.")
+        encoded_inputs: Optional[Dict[str, torch.Tensor]] = self._prepare_inputs(image_path)
+        if encoded_inputs is None:
+            logger.error(f"Input preparation failed for {image_path}. Cannot perform prediction.")
+            logger.info(f"Prediction process failed for {image_path}.")
+            return None
+        logger.info(f"Input preparation successful for {image_path}.")
+        # Check if model is loaded
+        if self.model is None:
+            logger.error("LayoutLMv2 model is not loaded. Cannot perform prediction.")
+            logger.info(f"Prediction process failed for {image_path}.")
+            return None
+        logger.info("LayoutLMv2 model is loaded. Proceeding with inference.")
+        predicted_label: Optional[str] = None
+        try:
+            logger.info(f"Performing model inference for {image_path}.")
+            self.model.eval() # Set model to evaluation mode
+            with torch.no_grad():
+                outputs: Any = self.model(**encoded_inputs)
+                logits: torch.Tensor = outputs.logits
+            # Determine predicted class index
+            # Ensure logits is a tensor before calling argmax
+            if not isinstance(logits, torch.Tensor):
+                 logger.error(f"Model output 'logits' is not a torch.Tensor for {image_path}. Cannot determine predicted index.")
+                 logger.info(f"Prediction process failed for {image_path} due to invalid model output.")
+                 return None
+            predicted_class_idx: int = logits.argmax(-1).item()
+            logger.info(f"Model inference completed for {image_path}. Predicted index: {predicted_class_idx}.")
+            # Map index to label
+            logger.info(f"Attempting to map predicted index {predicted_class_idx} to label.")
+            if predicted_class_idx in self.id2label:
+                predicted_label = self.id2label[predicted_class_idx]
+                logger.info(f"Mapped predicted index {predicted_class_idx} to label: {predicted_label}.")
+            else:
+                logger.error(f"Predicted index {predicted_class_idx} not found in id2label mapping for {image_path}.")
+                logger.info(f"Prediction process failed for {image_path} due to unknown predicted index.")
+                return None # Return None if index is not in mapping
+        except Exception as e:
+            logger.error(f"An error occurred during model inference or label mapping for {image_path}: {e}", exc_info=True)
+            logger.info(f"Prediction process failed for {image_path} due to inference/mapping error.")
+            return None
+        logger.info(f"Prediction process completed successfully for {image_path}. Predicted label: {predicted_label}.")
+        return predicted_label

app/src/logger.py ADDED Viewed

	@@ -0,0 +1,91 @@

+import logging
+import os
+from logging.handlers import RotatingFileHandler
+from datetime import datetime
+# Get the current working directory
+#current_direction = os.path.dirname(os.path.abspath(__file__))
+LOG_FILE=f"{datetime.now().strftime('%m_%d_%Y_%H_%M_%S')}"
+logs_path=os.path.join(os.getcwd(),"logs",LOG_FILE)
+os.makedirs(logs_path,exist_ok=True)
+# Define the logging configuration
+def setup_logger(file_name:str=None,api_app=None):
+    if file_name is not None :
+        LOG_FILE_PATH=os.path.join(logs_path,f"{file_name}.log")
+        #log_formatter = logging.Formatter("%(asctime)s- %(name)s - %(levelname)s - %(message)s")
+        # Modified log formatter to include filename, function name, and line number
+        log_formatter = logging.Formatter("%(asctime)s - %(filename)s - %(funcName)s - Line %(lineno)d - %(levelname)s - %(message)s")
+        # File handler for logging to a file
+        file_handler = RotatingFileHandler(filename=LOG_FILE_PATH,maxBytes=5 * 1024 * 1024, backupCount=3)  # Log file size is 5MB with 3 backups
+        file_handler.setFormatter(log_formatter)
+        file_handler.setLevel(logging.INFO)
+        file_handler2 = RotatingFileHandler(filename=os.path.join(logs_path,"global.log"),maxBytes=5 * 1024 * 1024, backupCount=3)  # Log file size is 5MB with 3 backups
+        file_handler2.setFormatter(log_formatter)
+        file_handler2.setLevel(logging.INFO)
+        # Stream handler for console output (optional)
+        console_handler = logging.StreamHandler()
+        console_handler.setFormatter(log_formatter)
+        console_handler.setLevel(logging.DEBUG)
+        # Add handlers to the root logger for custom logging
+        root_logger = logging.getLogger(file_name)
+        root_logger.setLevel(logging.DEBUG)
+        root_logger.addHandler(file_handler)
+        root_logger.addHandler(file_handler2)
+        #root_logger.addHandler(console_handler)
+        if api_app is not None:
+            # Get the FastAPI logger and attach handlers
+            uvicorn_access_logger = logging.getLogger("uvicorn.access")  # For request logging
+            uvicorn_access_logger.setLevel(logging.INFO)
+            uvicorn_access_logger.addHandler(file_handler)
+            uvicorn_access_logger.addHandler(file_handler2)
+            #api_logger.addHandler(console_handler)
+            return uvicorn_access_logger
+        else:
+            return root_logger
+    else:
+        # Modified log formatter to include filename, function name, and line number
+        log_formatter = logging.Formatter("%(asctime)s - %(filename)s - %(funcName)s - Line %(lineno)d - %(levelname)s - %(message)s")
+        file_handler2 = RotatingFileHandler(filename=os.path.join(logs_path,"global.log"),maxBytes=5 * 1024 * 1024, backupCount=3)  # Log file size is 5MB with 3 backups
+        file_handler2.setFormatter(log_formatter)
+        file_handler2.setLevel(logging.INFO)
+        # Stream handler for console output (optional)
+        console_handler = logging.StreamHandler()
+        console_handler.setFormatter(log_formatter)
+        console_handler.setLevel(logging.DEBUG)
+        # Add handlers to the root logger for custom logging
+        root_logger = logging.getLogger(file_name)
+        root_logger.setLevel(logging.DEBUG)
+        root_logger.addHandler(file_handler2)
+        #root_logger.addHandler(console_handler)
+        if api_app is not None:
+            # Get the FastAPI logger and attach handlers
+            uvicorn_access_logger = logging.getLogger("uvicorn.access")  # For request logging
+            uvicorn_access_logger.setLevel(logging.INFO)
+            uvicorn_access_logger.addHandler(file_handler2)
+            #api_logger.addHandler(console_handler)
+            return uvicorn_access_logger
+        else:
+            return root_logger

app/src/model_loader.py ADDED Viewed

	@@ -0,0 +1,25 @@

+from pathlib import Path
+from app.src.vgg16_load import VGGDocumentClassifier
+from app.src.vit_load import VITDocumentClassifier
+from app.src.constant import *
+from app.src.logger import setup_logger
+logger = setup_logger("model_loader")
+def vit_loader()->VITDocumentClassifier:
+    try:
+        vit=VITDocumentClassifier(vit_model_path, vit_mlb_path)
+        return vit
+    except Exception as e:
+        logger.error(str(e))
+        raise e
+def vgg_loader():
+    try:
+        vgg=VGGDocumentClassifier(vgg_model_path, vgg_mlb_path)
+        return vgg
+    except Exception as e:
+        logger.error(str(e))
+        raise e

app/src/test_vit.py ADDED Viewed

	@@ -0,0 +1,102 @@

+import joblib
+from sklearn.preprocessing import MultiLabelBinarizer
+from pathlib import Path
+import torch
+import numpy as np
+from PIL import Image
+from transformers import AutoImageProcessor, AutoModelForImageClassification
+from app.src.logger import setup_logger
+logger = setup_logger("test_vit")
+try:
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    mlb_file_path=Path("artifacts\model\VIT_model\mlb.joblib")
+    model_file_path=Path("artifacts\model\VIT_model\model.pth")
+    # Select model
+    model_id = "google/vit-base-patch16-224-in21k"
+    # Load processor
+    processor = AutoImageProcessor.from_pretrained(model_id, use_fast=True)
+    # TODO: You need to load your fine-tuned model here
+    # For example:
+    # model = AutoModelForImageClassification.from_pretrained("path/to/your/fine-tuned-model")
+    # For now, we will use the base model for demonstration, but it will not give correct predictions.
+    #model = AutoModelForImageClassification.from_pretrained(model_id)
+    # Load the entire model
+    model= torch.load(model_file_path, map_location=device,weights_only=False )
+    # Set device
+    model.to(device)
+except Exception as e:
+    logger.error(str(e))
+    raise e
+def mlb_load(file_path:Path)->MultiLabelBinarizer:
+    try:
+        # Assuming you run this notebook from the root of your project directory
+        mlb = joblib.load(file_path)
+    except FileNotFoundError:
+        logger.error("Error: 'artifacts/model/VIT_model/mlb.joblib' not found.")
+        logger.error("Please make sure the path is correct. Using a placeholder binarizer.")
+        # As a placeholder, let's create a dummy mlb if the file is not found.
+        mlb = MultiLabelBinarizer()
+        # This should be the set of your actual labels.
+        mlb.fit([['advertisement', 'email', 'form', 'invoice', 'note']])
+    return mlb
+def VIT_model_prediction(image_path:Path,cut_off:float):
+    try:
+        # Load and convert image
+        # --- IMPORTANT: Please update this path to your image ---
+        try:
+            image = Image.open(image_path)
+            if image.mode != "RGB":
+                image = image.convert("RGB")
+        except FileNotFoundError:
+            logger.error(f"Error: Image not found at {image_path}")
+            logger.error("Using a dummy image for demonstration.")
+            # Create a dummy image for demonstration if image not found
+            image = Image.new('RGB', (224, 224), color = 'red')
+        # Preprocess image
+        pixel_values = processor(image, return_tensors="pt").pixel_values.to(device)
+        # Forward pass
+        with torch.no_grad():
+            outputs = model(pixel_values)
+            logits = outputs.logits
+        # Apply sigmoid for multi-label classification
+        sigmoid = torch.nn.Sigmoid()
+        probs = sigmoid(logits.squeeze().cpu())
+        # Thresholding (using 0.5 as an example)
+        predictions = np.zeros(probs.shape)
+        predictions[np.where(probs >= cut_off)] = 1
+        # Get label names using the loaded MultiLabelBinarizer
+        mlb=mlb_load(mlb_file_path)
+        # The predictions need to be in a 2D array for inverse_transform, e.g., (1, num_classes)
+        predicted_labels = mlb.inverse_transform(predictions.reshape(1, -1))
+        logger.info(f"Predicted labels: {predicted_labels}")
+        return {"status":1,"classe":predicted_labels}
+    except Exception as e:
+        logger.error(str(e))
+        raise e
+#VIT_model_prediction(Path(r"dataset\sample_text_ds\test\email\2078379610a.jpg"),0.5)

app/src/vgg16_load.py ADDED Viewed

	@@ -0,0 +1,381 @@

+import logging
+import joblib
+import tensorflow as tf
+from pathlib import Path
+from sklearn.preprocessing import MultiLabelBinarizer
+import cv2
+import numpy as np
+import logging
+import cv2
+import keras
+from pathlib import Path
+import tensorflow as tf
+from typing import Optional, Tuple, List
+from app.src.logger import setup_logger
+# Configure logging
+logger = setup_logger("vgg16_load")
+def load_vgg_artifacts(model_path: Path, mlb_path: Path) -> tuple[tf.keras.Model, MultiLabelBinarizer]:
+    """
+    Loads the VGG model and the MultiLabelBinarizer from specified paths.
+    Args:
+        model_path: Path to the VGG model file (.keras).
+        mlb_path: Path to the MultiLabelBinarizer file (.joblib).
+    Returns:
+        A tuple containing the loaded Keras model and MultiLabelBinarizer object.
+    Raises:
+        FileNotFoundError: If either the model file or the MLB file is not found.
+        Exception: If any other error occurs during loading.
+    """
+    model = None
+    mlb = None
+    try:
+        logger.info(f"Attempting to load VGG model from {model_path}")
+        model = tf.keras.models.load_model(model_path)
+        logger.info("VGG model loaded successfully.")
+    except FileNotFoundError:
+        logger.error(f"Error: VGG model file not found at {model_path}")
+        raise
+    except Exception as e:
+        logger.error(f"An error occurred while loading the VGG model: {e}")
+        raise
+    try:
+        logger.info(f"Attempting to load MultiLabelBinarizer from {mlb_path}")
+        mlb = joblib.load(mlb_path)
+        logger.info("MultiLabelBinarizer loaded successfully.")
+    except FileNotFoundError:
+        logger.error(f"Error: MultiLabelBinarizer file not found at {mlb_path}")
+        raise
+    except Exception as e:
+        logger.error(f"An error occurred while loading the MultiLabelBinarizer: {e}")
+        raise
+    logger.info("Both VGG model and MultiLabelBinarizer loaded successfully.")
+    return model, mlb
+def preprocess_image(image_path: Path, target_size: tuple[int, int] = (224, 224)) -> np.ndarray | None:
+    """
+    Preprocesses an image for VGG model prediction.
+    Loads an image from the specified path, converts it to RGB, resizes it,
+    and normalizes pixel values. Includes robust error handling and logging
+    at each step.
+    Args:
+        image_path: Path to the image file.
+        target_size: A tuple (width, height) specifying the desired output size.
+    Returns:
+        A preprocessed NumPy array representing the image with pixel values
+        scaled between 0 and 1, or None if an error occurred during processing.
+    """
+    try:
+        logger.info(f"Attempting to load image from {image_path}")
+        img = cv2.imread(str(image_path)) # cv2.imread expects a string or numpy array
+        if img is None:
+            logger.error(f"Error: Could not load image from {image_path}. cv2.imread returned None.")
+            return None
+        logger.info("Image loaded successfully.")
+        logger.info("Attempting to convert image to RGB.")
+        # Check if the image is already in a format that doesn't need BGR to RGB conversion
+        # cv2.imread loads in BGR format by default for color images.
+        # If the image is grayscale, it might be loaded as such.
+        # We want RGB for consistency with models trained on RGB data.
+        if len(img.shape) == 3 and img.shape[2] == 3: # Check if it's a color image (likely BGR)
+            try:
+                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+                logger.info("Image converted to RGB successfully.")
+            except cv2.error as e:
+                logger.error(f"Error during BGR to RGB conversion for image {image_path}: {e}")
+                return None
+        elif len(img.shape) == 2: # Grayscale image
+             try:
+                img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
+                logger.info("Grayscale image converted to RGB successfully.")
+             except cv2.error as e:
+                logger.error(f"Error during Grayscale to RGB conversion for image {image_path}: {e}")
+                return None
+        else:
+             logger.warning(f"Unexpected image format for {image_path}. Attempting to proceed.")
+             # If it's not a standard color or grayscale, we might proceed but log a warning.
+             # Depending on requirements, you might want to return None here.
+        logger.info(f"Attempting to resize image to {target_size}.")
+        try:
+            img = cv2.resize(img, target_size)
+            if img is None or img.size == 0:
+                 logger.error(f"Error: cv2.resize returned None or empty array for image {image_path}.")
+                 return None
+            logger.info("Image resized successfully.")
+        except cv2.error as e:
+            logger.error(f"Error during image resizing for image {image_path} to size {target_size}: {e}")
+            return None
+        logger.info("Attempting to normalize pixel values.")
+        try:
+            # Ensure the image is the correct dtype before division
+            img = img.astype("float32") / 255.0
+            if img is None or img.size == 0 or np.max(img) > 1.0 or np.min(img) < 0.0:
+                 logger.error(f"Error: Image normalization failed or resulted in unexpected values for image {image_path}.")
+                 return None
+            logger.info("Pixel values normalized successfully.")
+        except Exception as e:
+            logger.error(f"Error during pixel normalization for image {image_path}: {e}")
+            return None
+        logger.info(f"Image preprocessing completed successfully for {image_path}.")
+        return img
+    except Exception as e:
+        logger.error(f"An unexpected error occurred during image preprocessing for {image_path}: {e}")
+        return None
+class VGGDocumentClassifier:
+    """
+    A class for classifying documents using a VGG16 model.
+    This class encapsulates the loading of the VGG16 model and its associated
+    MultiLabelBinarizer, provides a method to preprocess input images, and
+    performs document classification predictions.
+    """
+    def __init__(self, model_path: Path, mlb_path: Path, target_size: Tuple[int, int] = (224, 224)) -> None:
+        """
+        Initializes the VGGDocumentClassifier by loading the model and MLB.
+        Args:
+            model_path: Path to the VGG model file (.keras).
+            mlb_path: Path to the MultiLabelBinarizer file (.joblib).
+            target_size: The target size (width, height) for image preprocessing.
+                         Defaults to (224, 224).
+        Raises:
+            FileNotFoundError: If either the model file or the MLB file is not found.
+            Exception: If any other error occurs during loading.
+        """
+        logger.info("Initializing VGGDocumentClassifier.")
+        self.model: Optional[tf.keras.Model] = None
+        self.mlb: Optional[MultiLabelBinarizer] = None
+        self.target_size: Tuple[int, int] = target_size
+        try:
+            self._load_artifacts(model_path, mlb_path)
+            if self.model and self.mlb:
+                logger.info("VGGDocumentClassifier initialized successfully.")
+            else:
+                logger.critical("VGGDocumentClassifier failed to fully initialize due to artifact loading errors.")
+                raise RuntimeError("Failed to load all required artifacts for VGGDocumentClassifier.")
+        except Exception as e:
+            logger.critical(f"Failed to initialize VGGDocumentClassifier: {e}", exc_info=True)
+            raise # Re-raise the exception after logging
+    def _load_artifacts(self, model_path: Path, mlb_path: Path) -> None:
+        """
+        Loads the VGG model and MultiLabelBinarizer with error handling and logging.
+        Args:
+            model_path: Path to the VGG model file (.keras).
+            mlb_path: Path to the MultiLabelBinarizer file (.joblib).
+        Raises:
+            FileNotFoundError: If either the model file or the MLB file is not found.
+            Exception: If any other unexpected error occurs during loading.
+        """
+        logger.info("Starting artifact loading.")
+        model_loaded: bool = False
+        mlb_loaded: bool = False
+        # Load Model
+        try:
+            logger.info(f"Attempting to load VGG model from {model_path}")
+            self.model = tf.keras.models.load_model(model_path)
+            logger.info("VGG model loaded successfully.")
+            model_loaded = True
+        except FileNotFoundError:
+            logger.critical(f"Critical Error: VGG model file not found at {model_path}", exc_info=True)
+            raise # Re-raise to indicate a critical initialization failure
+        except Exception as e:
+            logger.critical(f"Critical Error: An unexpected error occurred while loading the VGG model from {model_path}: {e}", exc_info=True)
+            raise # Re-raise to indicate a critical initialization failure
+        # Load MLB
+        try:
+            logger.info(f"Attempting to load MultiLabelBinarizer from {mlb_path}")
+            self.mlb = joblib.load(mlb_path)
+            logger.info("MultiLabelBinarizer loaded successfully.")
+            mlb_loaded = True
+        except FileNotFoundError:
+            logger.critical(f"Critical Error: MultiLabelBinarizer file not found at {mlb_path}", exc_info=True)
+            raise # Re-raise to indicate a critical initialization failure
+        except Exception as e:
+            logger.critical(f"Critical Error: An unexpected error occurred while loading the MultiLabelBinarizer from {mlb_path}: {e}", exc_info=True)
+            raise # Re-raise to indicate a critical initialization failure
+        if model_loaded and mlb_loaded:
+             logger.info("All required VGG artifacts loaded successfully.")
+        else:
+            logger.error("One or more required VGG artifacts failed to load during _load_artifacts.")
+    def preprocess_image(self, image_path: Path) -> Optional[np.ndarray]:
+        """
+        Preprocesses an image for VGG model prediction.
+        Loads an image from the specified path, converts it to RGB, resizes it,
+        and normalizes pixel values. Includes robust error handling and logging
+        at each step.
+        Args:
+            image_path: Path to the image file.
+        Returns:
+            A preprocessed NumPy array representing the image with pixel values
+            scaled between 0 and 1, or None if an error occurred during processing.
+        """
+        try:
+            logger.info(f"Attempting to load image from {image_path}")
+            img = cv2.imread(str(image_path)) # cv2.imread expects a string or numpy array
+            if img is None:
+                logger.error(f"Error: Could not load image from {image_path}. cv2.imread returned None.")
+                return None
+            logger.info("Image loaded successfully.")
+            logger.info("Attempting to convert image to RGB.")
+            if len(img.shape) == 3 and img.shape[2] == 3: # Check if it's a color image (likely BGR)
+                try:
+                    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+                    logger.info("Image converted to RGB successfully.")
+                except cv2.error as e:
+                    logger.error(f"Error during BGR to RGB conversion for image {image_path}: {e}")
+                    return None
+            elif len(img.shape) == 2: # Grayscale image
+                 try:
+                    img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
+                    logger.info("Grayscale image converted to RGB successfully.")
+                 except cv2.error as e:
+                    logger.error(f"Error during Grayscale to RGB conversion for image {image_path}: {e}")
+                    return None
+            else:
+                 logger.warning(f"Unexpected image format for {image_path}. Attempting to proceed.")
+            logger.info(f"Attempting to resize image to {self.target_size}.")
+            try:
+                img = cv2.resize(img, self.target_size)
+                if img is None or img.size == 0:
+                     logger.error(f"Error: cv2.resize returned None or empty array for image {image_path}.")
+                     return None
+                logger.info("Image resized successfully.")
+            except cv2.error as e:
+                logger.error(f"Error during image resizing for image {image_path} to size {self.target_size}: {e}")
+                return None
+            logger.info("Attempting to normalize pixel values.")
+            try:
+                img = img.astype("float32") / 255.0
+                if img is None or img.size == 0 or np.max(img) > 1.0 or np.min(img) < 0.0:
+                     logger.error(f"Error: Image normalization failed or resulted in unexpected values for image {image_path}.")
+                     return None
+                logger.info("Pixel values normalized successfully.")
+            except Exception as e:
+                logger.error(f"Error during pixel normalization for image {image_path}: {e}")
+                return None
+            logger.info(f"Image preprocessing completed successfully for {image_path}.")
+            return img
+        except Exception as e:
+            logger.error(f"An unexpected error occurred during image preprocessing for {image_path}: {e}")
+            return None
+    def predict(self, image_path: Path) -> Optional[List[str]]:
+        """
+        Predicts the class labels for a given image using the loaded VGG model.
+        The process involves loading and preprocessing the image, performing
+        inference with the model, and converting the prediction to class labels
+        using the MultiLabelBinarizer.
+        Args:
+            image_path: Path to the image file to classify.
+        Returns:
+            A list of predicted class labels (strings) if the prediction process
+            is successful. Returns None if any critical step (image loading,
+            preprocessing, model inference, or inverse transform) fails.
+            Returns an empty list if the prediction process is successful but
+            no labels are predicted.
+        """
+        logger.info(f"Starting prediction process for image: {image_path}.")
+        if self.model is None or self.mlb is None:
+            logger.error("Model or MultiLabelBinarizer not loaded. Cannot perform prediction.")
+            return None
+        # Preprocess image
+        image = self.preprocess_image(image_path)
+        if image is None:
+            logger.error(f"Image preprocessing failed for {image_path}. Cannot perform prediction.")
+            return None
+        try:
+            logger.info(f"Performing model inference for {image_path}.")
+            # Add batch dimension to the image
+            image = np.expand_dims(image, axis=0)
+            prd = self.model.predict(image)
+            logger.info(f"Model inference completed for {image_path}. Prediction shape: {prd.shape}")
+        except Exception as e:
+            logger.error(f"An error occurred during model inference for {image_path}: {e}", exc_info=True)
+            return None
+        # Convert the prediction to a binary indicator format and get labels
+        try:
+            logger.info(f"Converting prediction to labels for {image_path}.")
+            # Assuming multi-class classification for now, taking the argmax
+            # If it's multi-label, you'd apply a sigmoid and thresholding here
+            pred_id = np.argmax(prd, axis=1)
+            # Create a zero array with the shape (1, number of classes)
+            binary_prediction = np.zeros((1, len(self.mlb.classes_)))
+            # Set the index of the predicted class to 1
+            binary_prediction[0, pred_id] = 1
+            predicted_labels_tuple_list: List[Tuple[str, ...]] = self.mlb.inverse_transform(binary_prediction)
+            logger.info(f"Prediction processed for {image_path}. Predicted labels (raw tuple list): {predicted_labels_tuple_list}")
+            if predicted_labels_tuple_list and len(predicted_labels_tuple_list) > 0:
+                 final_labels: List[str] = list(predicted_labels_tuple_list[0])
+                 logger.info(f"Final predicted labels for {image_path}: {final_labels}")
+                 return final_labels
+            else:
+                 logger.warning(f"MLB inverse_transform returned an empty list for {image_path}. No labels predicted.")
+                 return []
+        except Exception as e:
+            logger.error(f"An error occurred during inverse transform or label processing for {image_path}: {e}", exc_info=True)
+            return None

app/src/vit_load.py ADDED Viewed

	@@ -0,0 +1,281 @@

+import numpy as np
+import torch
+from PIL import Image
+from transformers import AutoImageProcessor, AutoModelForImageClassification
+from sklearn.preprocessing import MultiLabelBinarizer
+import joblib
+from pathlib import Path
+from typing import List, Optional, Tuple, Any
+from app.src.logger import setup_logger
+logger = setup_logger("vit_load")
+class VITDocumentClassifier:
+    """
+    A class for classifying documents using a Vision Transformer (ViT) model.
+    This class encapsulates the loading of the ViT model, its associated processor,
+    and a MultiLabelBinarizer for converting model outputs to meaningful labels.
+    It provides a method to preprocess input images and perform multi-label
+    classification predictions with a specified confidence cutoff threshold.
+    """
+    def __init__(self, model_path: Path, mlb_path: Path, model_id: str = "google/vit-base-patch16-224-in21k") -> None:
+        """
+        Initializes the VITDocumentClassifier by loading the model, processor, and MLB.
+        Args:
+            model_path: Path to the ViT model file (.pth). This is expected to be
+                        a pre-trained or fine-tuned PyTorch model file.
+            mlb_path: Path to the MultiLabelBinarizer file (.joblib). This file
+                      should contain the fitted binarizer object corresponding
+                      to the model's output classes.
+            model_id: The Hugging Face model ID for the processor. This is used
+                      to load the appropriate image processor for the ViT model.
+                      Defaults to "google/vit-base-patch16-224-in21k".
+        Raises:
+            FileNotFoundError: If either the model file or the MLB file is not found
+                             at the specified paths during artifact loading.
+            Exception: If any other unexpected error occurs during the loading
+                       of the model, processor, or MultiLabelBinarizer.
+            RuntimeError: If artifact loading fails for critical components
+                          (model or MLB).
+        """
+        logger.info("Initializing VITDocumentClassifier.")
+        self.model: Optional[torch.nn.Module] = None
+        self.processor: Optional[AutoImageProcessor] = None
+        self.mlb: Optional[MultiLabelBinarizer] = None
+        self.device: torch.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        logger.info(f"Using device: {self.device}")
+        self.model_id: str = model_id
+        try:
+            self._load_artifacts(model_path, mlb_path)
+            if self.model and self.processor and self.mlb:
+                logger.info("VITDocumentClassifier initialized successfully.")
+            else:
+                # This case should ideally be caught and re-raised in _load_artifacts
+                # but adding a check here for robustness.
+                logger.critical("VITDocumentClassifier failed to fully initialize due to artifact loading errors.")
+                raise RuntimeError("Failed to load all required artifacts for VITDocumentClassifier.")
+        except Exception as e:
+            logger.critical(f"Failed to initialize VITDocumentClassifier: {e}", exc_info=True)
+            # Re-raise the exception after logging
+            raise
+    def _load_artifacts(self, model_path: Path, mlb_path: Path) -> None:
+        """
+        Loads the ViT model, processor, and MultiLabelBinarizer with enhanced error handling and logging.
+        This is an internal helper method called during initialization.
+        Args:
+            model_path: Path to the ViT model file (.pth).
+            mlb_path: Path to the MultiLabelBinarizer file (.joblib).
+        Raises:
+            FileNotFoundError: If either the model file or the MLB file is not found.
+            Exception: If any other unexpected error occurs during loading.
+        """
+        logger.info("Starting artifact loading.")
+        processor_loaded: bool = False
+        model_loaded: bool = False
+        mlb_loaded: bool = False
+        # Load Processor
+        try:
+            logger.info(f"Attempting to load ViT processor for model ID: {self.model_id}")
+            self.processor = AutoImageProcessor.from_pretrained(self.model_id, use_fast=True)
+            logger.info("ViT processor loaded successfully.")
+            processor_loaded = True
+        except Exception as e:
+            # Log at error level as processor is important but not strictly critical if we raise later
+            logger.error(f"An error occurred while loading the ViT processor for model ID {self.model_id}: {e}", exc_info=True)
+            # Do not re-raise here, continue loading other artifacts
+        # Load Model
+        try:
+            logger.info(f"Attempting to load ViT model from {model_path}")
+            # Note: Adjust map_location as needed based on where the model was saved
+            self.model = torch.load(model_path, map_location=self.device, weights_only=False)
+            self.model.to(self.device) # Ensure model is on the correct device
+            logger.info(f"ViT model loaded successfully and moved to {self.device}.")
+            model_loaded = True
+        except FileNotFoundError:
+            logger.critical(f"Critical Error: ViT model file not found at {model_path}", exc_info=True)
+            raise # Re-raise to indicate a critical initialization failure
+        except Exception as e:
+            logger.critical(f"Critical Error: An unexpected error occurred while loading the ViT model from {model_path}: {e}", exc_info=True)
+            raise # Re-raise to indicate a critical initialization failure
+        # Load MLB
+        try:
+            logger.info(f"Attempting to load MultiLabelBinarizer from {mlb_path}")
+            self.mlb = joblib.load(mlb_path)
+            logger.info("MultiLabelBinarizer loaded successfully.")
+            mlb_loaded = True
+        except FileNotFoundError:
+            logger.critical(f"Critical Error: MultiLabelBinarizer file not found at {mlb_path}", exc_info=True)
+            raise # Re-raise to indicate a critical initialization failure
+        except Exception as e:
+            logger.critical(f"Critical Error: An unexpected error occurred while loading the MultiLabelBinarizer from {mlb_path}: {e}", exc_info=True)
+            raise # Re-raise to indicate a critical initialization failure
+        if processor_loaded and model_loaded and mlb_loaded:
+             logger.info("All required ViT artifacts loaded successfully.")
+        else:
+            logger.error("One or more required ViT artifacts failed to load during _load_artifacts.")
+    def predict(self, image_path: Path, cut_off: float = 0.5) -> Optional[List[str]]:
+        """
+        Predicts the class labels for a given image using the loaded ViT model.
+        The process involves loading and preprocessing the image, performing
+        inference with the model, applying a sigmoid activation, thresholding
+        the probabilities to obtain binary predictions, and finally converting
+        the binary predictions back to class labels using the MultiLabelBinarizer.
+        Args:
+            image_path: Path to the image file to classify. The image is expected
+                        to be in a format compatible with PIL (Pillow).
+            cut_off: The threshold for converting predicted probabilities into
+                     binary labels. Probabilities greater than or equal to this
+                     value are considered positive predictions (1), otherwise 0.
+                     Defaults to 0.5.
+        Returns:
+            A list of predicted class labels (strings) if the prediction process
+            is successful. Returns None if any critical step (image loading,
+            preprocessing, model inference, or inverse transform) fails.
+            Returns an empty list if the prediction process is successful but
+            no labels meet the cutoff threshold.
+        """
+        logger.info(f"Starting prediction process for image: {image_path} with cutoff {cut_off}.")
+        if self.model is None or self.processor is None or self.mlb is None:
+            logger.error("Model, processor, or MultiLabelBinarizer not loaded. Cannot perform prediction.")
+            return None
+        # Load and preprocess image
+        image: Optional[Image.Image] = None
+        try:
+            logger.info(f"Attempting to load image from {image_path}")
+            image = Image.open(image_path)
+            logger.info(f"Image loaded successfully from {image_path}.")
+        except FileNotFoundError:
+            logger.error(f"Error: Image file not found at {image_path}", exc_info=True)
+            return None
+        except Exception as e:
+            logger.error(f"An unexpected error occurred while loading image {image_path}: {e}", exc_info=True)
+            return None
+        try:
+            logger.info(f"Attempting to convert image to RGB for {image_path}.")
+            if image.mode != "RGB":
+                image = image.convert("RGB")
+                logger.info(f"Image converted to RGB successfully for {image_path}.")
+            else:
+                 logger.info(f"Image is already in RGB format for {image_path}.")
+        except Exception as e:
+            logger.error(f"An error occurred while converting image {image_path} to RGB: {e}", exc_info=True)
+            return None
+        # Preprocess image using the loaded processor
+        try:
+            logger.info(f"Attempting to preprocess image using processor for {image_path}.")
+            # Check if image is valid after loading/conversion
+            if image is None:
+                 logger.error(f"Image is None after loading/conversion for {image_path}. Cannot preprocess.")
+                 return None
+            # The processor expects a PIL Image or a list of PIL Images
+            pixel_values: torch.Tensor = self.processor(images=image, return_tensors="pt").pixel_values.to(self.device)
+            logger.info(f"Image preprocessed and moved to device ({self.device}).")
+        except Exception as e:
+            logger.error(f"An error occurred during image preprocessing for {image_path}: {e}", exc_info=True)
+            return None
+        # Forward pass
+        try:
+            logger.info(f"Starting model forward pass for {image_path}.")
+            self.model.eval() # Set model to evaluation mode
+            with torch.no_grad():
+                outputs: Any = self.model(pixel_values) # Use Any because the output type can vary
+                logits: torch.Tensor = outputs.logits
+            logger.info(f"Model forward pass completed for {image_path}.")
+        except Exception as e:
+            logger.error(f"An error occurred during model forward pass for {image_path}: {e}", exc_info=True)
+            return None
+        # Apply sigmoid and thresholding
+        try:
+            logger.info(f"Applying sigmoid and thresholding for {image_path}.")
+            sigmoid: torch.nn.Sigmoid = torch.nn.Sigmoid()
+            probs: torch.Tensor = sigmoid(logits.squeeze().cpu())
+            predictions: np.ndarray = np.zeros(probs.shape, dtype=int) # Explicitly set dtype to int
+            print(predictions)
+            predictions[np.where(probs >= cut_off)] = 1
+            logger.info(f"Applied sigmoid and thresholding with cutoff {cut_off} for {image_path}. Binary predictions shape: {predictions.shape}")
+        except Exception as e:
+            logger.error(f"An error occurred during probability processing for {image_path}: {e}", exc_info=True)
+            return None
+        # Get label names using the loaded MultiLabelBinarizer
+        try:
+            logger.info(f"Performing inverse transform using MultiLabelBinarizer for {image_path}.")
+            # The predictions need to be in a 2D array for inverse_transform, e.g., (1, num_classes)
+            # Use the self.mlb loaded during initialization
+            # Ensure self.mlb is not None (checked at the start of predict, but good practice)
+            if self.mlb is None:
+                 logger.error(f"MultiLabelBinarizer is None. Cannot perform inverse transform for {image_path}.")
+                 return None
+            binary_prediction: np.ndarray
+            # Ensure predictions shape is compatible (must be 2D: (n_samples, n_classes))
+            # Since we process one image at a time, expected shape is (1, n_classes)
+            expected_shape: Tuple[int, int] = (1, len(self.mlb.classes_))
+            if predictions.ndim == 1 and predictions.shape[0] == len(self.mlb.classes_):
+                 binary_prediction = predictions.reshape(expected_shape)
+                 logger.info(f"Reshaped 1D prediction to 2D ({expected_shape}) for inverse transform.")
+            elif predictions.ndim == 2 and predictions.shape == expected_shape:
+                 binary_prediction = predictions
+                 logger.info(f"Prediction already in correct 2D shape ({expected_shape}) for inverse transform.")
+            else:
+                 logger.error(f"Cannot inverse transform prediction shape {predictions.shape} with MLB classes {len(self.mlb.classes_)} for {image_path}. Expected shape: {expected_shape}")
+                 return None
+            predicted_labels_tuple_list: List[Tuple[str, ...]] = self.mlb.inverse_transform(binary_prediction)
+            logger.info(f"Prediction processed for {image_path}. Predicted labels (raw tuple list): {predicted_labels_tuple_list}")
+            # inverse_transform returns a list of tuples, even for a single sample.
+            # We expect a single prediction here, so we take the first tuple.
+            if predicted_labels_tuple_list and len(predicted_labels_tuple_list) > 0:
+                final_labels: List[str] = list(predicted_labels_tuple_list[0])
+                logger.info(f"Final predicted labels for {image_path}: {final_labels}")
+                return final_labels
+            else:
+                 logger.warning(f"MLB inverse_transform returned an empty list for {image_path}. No labels predicted.")
+                 return []
+        except Exception as e:
+            logger.error(f"An error occurred during inverse transform for {image_path}: {e}", exc_info=True)
+            return None

artifacts/model/VIT_model/confusion_matrix.png ADDED Viewed

Git LFS Details

SHA256: 6fa92b894adacd89239ccb67dcaa37c8e84a4d6d8987924e2e5d6a913f70c415
Pointer size: 131 Bytes
Size of remote file: 138 kB

artifacts/model/VIT_model/mlb.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4754cb9555905cbeb8a008ac90b2bb81ab076fbc272510a17c40abea32aa5d16
+size 571

artifacts/model/VIT_model/model.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:223b9f3ccbe55b37f66ed7dd4c832116c17bec3229693a679da41351e9361a82
+size 343310666

artifacts/model/vgg_model/mlb.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4754cb9555905cbeb8a008ac90b2bb81ab076fbc272510a17c40abea32aa5d16
+size 571

artifacts/model/vgg_model/model.keras ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ad1f9fbf700dfac83efd97f5cc4f944ea5a628de9c0ba26d440abdd4b4426ef2
+size 183090331

requirements.txt ADDED Viewed

	@@ -0,0 +1,29 @@

+transformers==4.53.0
+efficientnet==1.1.1
+seaborn==0.13.2
+libfinder==0.1.7
+pathlib==1.0.1
+requests==2.32.3
+tensorflow==2.18.0
+dagshub==0.5.10
+google==2.0.3
+torch==2.7.1
+numpy==2.0.2
+pandas==2.2.2
+opencv-python
+mlflow==3.1.1
+keras==3.8.0
+scikit-learn==1.6.1
+ensure==1.0.4
+joblib==1.5.1
+matplotlib==3.10.0
+ensure==1.0.4
+python-box
+pydot
+graphviz
+#'git+https://github.com/facebookresearch/detectron2.git'
+gradio
+fastapi==0.115.4
+uvicorn==0.34.0
+python-multipart== 0.0.19
+-e .