Spaces:

Mohansai2004
/

Image_analysis

Paused

@@ -1,28 +1,57 @@
 from fastapi import FastAPI, UploadFile, File, HTTPException
-from fastapi.middleware.cors import CORSMiddleware
 from starlette.responses import JSONResponse
 from app.model import analyze_image
 from app.utils import read_image
 app = FastAPI(title="Image Analyzer API", version="1.0.0")
-# CORS config
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=["*"],
-    allow_credentials=True,
-    allow_methods=["*"],
-    allow_headers=["*"],
-)
 @app.post("/analyze")
 async def analyze(file: UploadFile = File(...)):
     try:
         image = read_image(file)
         result = analyze_image(image)
         return JSONResponse(content=result)
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
 @app.get("/")
 def read_root():

 from fastapi import FastAPI, UploadFile, File, HTTPException
 from starlette.responses import JSONResponse
+from starlette.requests import Request
 from app.model import analyze_image
 from app.utils import read_image
+from app.caption_model import captioner
 app = FastAPI(title="Image Analyzer API", version="1.0.0")
 @app.post("/analyze")
 async def analyze(file: UploadFile = File(...)):
+    if not file or not file.filename:
+        raise HTTPException(status_code=400, detail="No file uploaded.")
     try:
         image = read_image(file)
+    except Exception as e:
+        raise HTTPException(status_code=400, detail=f"Failed to read image: {str(e)}")
+    if not file.content_type.startswith('image/'):
+        raise HTTPException(status_code=400, detail="File must be an image")
+    try:
         result = analyze_image(image)
         return JSONResponse(content=result)
+    except ValueError as e:
+        raise HTTPException(status_code=400, detail=str(e))
+    except RuntimeError as e:
+        raise HTTPException(status_code=500, detail=str(e))
     except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
+@app.post("/caption")
+async def generate_caption(file: UploadFile = File(...)):
+    if not file or not file.filename:
+        raise HTTPException(status_code=400, detail="No file uploaded.")
+    try:
+        image = read_image(file)
+    except Exception as e:
+        raise HTTPException(status_code=400, detail=f"Failed to read image: {str(e)}")
+    if not file.content_type.startswith('image/'):
+        raise HTTPException(status_code=400, detail="File must be an image")
+    try:
+        result = captioner.generate_caption(image)
+        return JSONResponse(content=result)
+    except ValueError as e:
+        raise HTTPException(status_code=400, detail=str(e))
+    except RuntimeError as e:
         raise HTTPException(status_code=500, detail=str(e))
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
 @app.get("/")
 def read_root():

app/caption_model.py ADDED Viewed

	@@ -0,0 +1,124 @@

+from transformers import BlipProcessor, BlipForConditionalGeneration
+import torch
+from PIL import Image
+import logging
+import time
+from typing import Dict, Any, Optional
+import gc
+MODEL_NAME = "Salesforce/blip-image-captioning-base"
+MAX_RETRIES = 3
+RETRY_DELAY = 1  # seconds
+MAX_LENGTH = 50  # Maximum length for generated captions
+class ImageCaptioner:
+    def __init__(self):
+        self.processor = None
+        self.model = None
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        logging.info(f"Using device: {self.device} for caption model")
+        self._initialize_model()
+    def _initialize_model(self):
+        for attempt in range(MAX_RETRIES):
+            try:
+                # Clear CUDA cache if using GPU
+                if torch.cuda.is_available():
+                    torch.cuda.empty_cache()
+                    gc.collect()
+                self.processor = BlipProcessor.from_pretrained(MODEL_NAME)
+                self.model = BlipForConditionalGeneration.from_pretrained(MODEL_NAME).to(self.device)
+                # Verify model loaded correctly
+                if self.model is None or self.processor is None:
+                    raise RuntimeError("Caption model or processor initialization failed")
+                # Set model to evaluation mode
+                self.model.eval()
+                logging.info(f"Caption model loaded successfully on {self.device} (attempt {attempt + 1})")
+                return
+            except Exception as e:
+                logging.error(f"Attempt {attempt + 1} failed to load caption model: {str(e)}")
+                if attempt < MAX_RETRIES - 1:
+                    time.sleep(RETRY_DELAY)
+                    continue
+                raise RuntimeError(f"Failed to initialize the image captioning model after {MAX_RETRIES} attempts")
+    def validate_image(self, image: Image.Image) -> Optional[str]:
+        """Validate image before processing"""
+        if not isinstance(image, Image.Image):
+            return "Input must be a PIL Image"
+        # Check image mode
+        if image.mode not in ('RGB', 'L'):
+            return "Image must be in RGB or grayscale format"
+        return None
+    def generate_caption(self, image: Image.Image) -> Dict[str, Any]:
+        # Validate input
+        error = self.validate_image(image)
+        if error:
+            raise ValueError(error)
+        # Check model initialization
+        if self.model is None or self.processor is None:
+            self._initialize_model()  # Try to reinitialize if models are not loaded
+        try:
+            # Clear CUDA cache if using GPU
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
+                gc.collect()
+            # Prepare inputs
+            inputs = self.processor(image, return_tensors="pt")
+            inputs = {k: v.to(self.device) if hasattr(v, 'to') else v for k, v in inputs.items()}
+            # Process with error handling and memory management
+            try:
+                with torch.no_grad():
+                    # Generate caption with parameters for better quality
+                    out = self.model.generate(
+                        **inputs,
+                        max_length=MAX_LENGTH,
+                        num_beams=5,  # Beam search for better quality
+                        temperature=1.0,
+                        top_k=50,
+                        top_p=0.95,
+                        repetition_penalty=1.2,
+                        length_penalty=1.0,
+                        no_repeat_ngram_size=2
+                    )
+                    caption = self.processor.decode(out[0], skip_special_tokens=True)
+                    # Process the caption
+                    caption = caption.strip()
+                    # Ensure caption starts with capital letter and ends with period
+                    caption = caption[0].upper() + caption[1:]
+                    if not caption.endswith(('.', '!', '?')):
+                        caption += '.'
+                    return {
+                        "caption": caption,
+                        "status": "success",
+                        "model_info": {
+                            "device": self.device,
+                            "model_name": MODEL_NAME
+                        }
+                    }
+            finally:
+                # Clean up tensors
+                if torch.cuda.is_available():
+                    torch.cuda.empty_cache()
+                    gc.collect()
+        except Exception as e:
+            logging.error(f"Error during caption generation: {str(e)}")
+            raise RuntimeError(f"Failed to generate caption: {str(e)}")
+# Initialize model
+captioner = ImageCaptioner()

app/model.py CHANGED Viewed

@@ -1,24 +1,108 @@
-from transformers import ViTImageProcessor, ViTForImageClassification
 import torch
 from PIL import Image
-MODEL_NAME = "google/vit-base-patch16-224"
-# Load once at startup
-processor = ViTImageProcessor.from_pretrained(MODEL_NAME)
-model = ViTForImageClassification.from_pretrained(MODEL_NAME)
 def analyze_image(image: Image.Image):
-    inputs = processor(images=image, return_tensors="pt")
-    with torch.no_grad():
-        outputs = model(**inputs)
-    logits = outputs.logits
-    predicted_class_idx = logits.argmax(-1).item()
-    label = model.config.id2label[predicted_class_idx]
-    confidence = torch.nn.functional.softmax(logits, dim=-1)[0, predicted_class_idx].item()
-    return {
-        "label": label,
-        "confidence": round(confidence, 4)
-    }

+from transformers import CLIPProcessor, CLIPModel
 import torch
 from PIL import Image
+import logging
+import time
+from typing import Dict, Any
+import gc
+MODEL_NAME = "openai/clip-vit-base-patch16"
+CATEGORIES = ["food", "fitness", "healthcare"]
+MAX_RETRIES = 3
+RETRY_DELAY = 1  # seconds
+class ImageAnalyzer:
+    def __init__(self):
+        self.processor = None
+        self.model = None
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        logging.info(f"Using device: {self.device}")
+        self._initialize_model()
+    def _initialize_model(self):
+        for attempt in range(MAX_RETRIES):
+            try:
+                # Clear CUDA cache if using GPU
+                if torch.cuda.is_available():
+                    torch.cuda.empty_cache()
+                    gc.collect()
+                self.processor = CLIPProcessor.from_pretrained(MODEL_NAME)
+                self.model = CLIPModel.from_pretrained(MODEL_NAME).to(self.device)
+                # Verify model loaded correctly
+                if self.model is None or self.processor is None:
+                    raise RuntimeError("Model or processor initialization failed")
+                logging.info(f"Model loaded successfully on {self.device} (attempt {attempt + 1})")
+                return
+            except Exception as e:
+                logging.error(f"Attempt {attempt + 1} failed to load model: {str(e)}")
+                if attempt < MAX_RETRIES - 1:
+                    time.sleep(RETRY_DELAY)
+                    continue
+                raise RuntimeError(f"Failed to initialize the image analysis model after {MAX_RETRIES} attempts")
+    def analyze_image(self, image: Image.Image) -> Dict[str, Any]:
+        if not isinstance(image, Image.Image):
+            raise ValueError("Input must be a PIL Image")
+        if self.model is None or self.processor is None:
+            self._initialize_model()  # Try to reinitialize if models are not loaded
+        try:
+            # Clear CUDA cache if using GPU
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
+                gc.collect()
+            # Prepare inputs for CLIP
+            inputs = self.processor(
+                text=CATEGORIES,
+                images=image,
+                return_tensors="pt",
+                padding=True
+            )
+            # Move inputs to the same device as model
+            inputs = {k: v.to(self.device) if hasattr(v, 'to') else v
+                     for k, v in inputs.items()}
+            # Process with error handling and memory management
+            try:
+                with torch.no_grad():
+                    outputs = self.model(**inputs)
+                    logits_per_image = outputs.logits_per_image
+                    probs = logits_per_image.softmax(dim=1).cpu().numpy()[0]
+                    # Get top 2 predictions for more informative results
+                    top_indices = probs.argsort()[-2:][::-1]
+                    predictions = [
+                        {
+                            "category": CATEGORIES[idx],
+                            "confidence": round(float(probs[idx]), 4)
+                        }
+                        for idx in top_indices
+                    ]
+                    return {
+                        "primary_prediction": predictions[0],
+                        "alternative_prediction": predictions[1],
+                        "status": "success"
+                    }
+            finally:
+                # Clean up tensors
+                if torch.cuda.is_available():
+                    torch.cuda.empty_cache()
+                    gc.collect()
+        except Exception as e:
+            logging.error(f"Error during image analysis: {str(e)}")
+            raise RuntimeError(f"Failed to analyze image: {str(e)}")
+# Create a single instance to be used by the API
+analyzer = ImageAnalyzer()
+# Function to be used by the API
 def analyze_image(image: Image.Image):
+    return analyzer.analyze_image(image)

app/utils.py CHANGED Viewed

@@ -1,7 +1,34 @@
-from fastapi import UploadFile
 from PIL import Image
 import io
 def read_image(upload_file: UploadFile) -> Image.Image:
-    image_bytes = upload_file.file.read()
-    return Image.open(io.BytesIO(image_bytes)).convert("RGB")

+from fastapi import UploadFile, HTTPException
 from PIL import Image
 import io
+import logging
+from typing import Tuple
+def validate_image_size(image: Image.Image) -> Tuple[bool, str]:
+    """Basic image validation"""
+    try:
+        # Just verify that we can get the image size
+        _ = image.size
+        return True, ""
+    except Exception as e:
+        return False, "Invalid image format"
 def read_image(upload_file: UploadFile) -> Image.Image:
+    """Read and validate image from uploaded file"""
+    try:
+        # Read image directly
+        image_bytes = upload_file.file.read()
+        image = Image.open(io.BytesIO(image_bytes))
+        # Convert to RGB if needed
+        if image.mode not in ('RGB', 'L'):
+            image = image.convert('RGB')
+        return image
+    except IOError as e:
+        logging.error(f"Failed to read image: {str(e)}")
+        raise HTTPException(status_code=400, detail="Invalid image format")
+    except Exception as e:
+        logging.error(f"Unexpected error reading image: {str(e)}")
+        raise HTTPException(status_code=500, detail="Failed to process image")

requirements.txt CHANGED Viewed

@@ -3,3 +3,4 @@ uvicorn
 transformers
 torch
 Pillow

 transformers
 torch
 Pillow
+python-multipart