Spaces:

Mansuba
/

Bangla_text_to_image_app

Sleeping

App Files Files Community

Mansuba commited on Jan 25

Commit

04a4097

verified ·

1 Parent(s): 6b4f4ab

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -162

app.py CHANGED Viewed

@@ -1,14 +1,15 @@
 import torch
 from transformers import CLIPModel, CLIPProcessor, AutoTokenizer, MarianMTModel, MarianTokenizer
 from diffusers import StableDiffusionPipeline, DPMSolverMultistepScheduler
-import numpy as np
-from typing import List, Tuple, Optional, Dict, Any
 import gradio as gr
-from pathlib import Path
-import json
-import logging
 from dataclasses import dataclass
-import gc
 # Configure logging
 logging.basicConfig(
@@ -17,6 +18,30 @@ logging.basicConfig(
 )
 logger = logging.getLogger(__name__)
 @dataclass
 class GenerationConfig:
     num_images: int = 1
@@ -44,6 +69,12 @@ class EnhancedBanglaSDGenerator:
         cache_dir: str,
         device: Optional[torch.device] = None
     ):
         self.device = device or torch.device("cuda" if torch.cuda.is_available() else "cpu")
         logger.info(f"Using device: {self.device}")
@@ -53,7 +84,7 @@ class EnhancedBanglaSDGenerator:
     def _initialize_models(self, banglaclip_weights_path: str):
         try:
-            # Initialize translation models
             self.bn2en_model_name = "Helsinki-NLP/opus-mt-bn-en"
             self.translator = self.cache.load_model(
                 self.bn2en_model_name,
@@ -62,171 +93,21 @@ class EnhancedBanglaSDGenerator:
             ).to(self.device)
             self.trans_tokenizer = MarianTokenizer.from_pretrained(self.bn2en_model_name)
-            # Initialize CLIP models
             self.clip_model_name = "openai/clip-vit-base-patch32"
             self.bangla_text_model = "csebuetnlp/banglabert"
             self.banglaclip_model = self._load_banglaclip_model(banglaclip_weights_path)
             self.processor = CLIPProcessor.from_pretrained(self.clip_model_name)
             self.tokenizer = AutoTokenizer.from_pretrained(self.bangla_text_model)
-            # Initialize Stable Diffusion
             self._initialize_stable_diffusion()
         except Exception as e:
             logger.error(f"Error initializing models: {str(e)}")
             raise RuntimeError(f"Failed to initialize models: {str(e)}")
-    def _initialize_stable_diffusion(self):
-        """Initialize Stable Diffusion pipeline with optimized settings."""
-        self.pipe = self.cache.load_model(
-            "runwayml/stable-diffusion-v1-5",
-            lambda model_id: StableDiffusionPipeline.from_pretrained(
-                model_id,
-                torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
-                safety_checker=None
-            ),
-            "stable_diffusion"
-        )
-        self.pipe.scheduler = DPMSolverMultistepScheduler.from_config(
-            self.pipe.scheduler.config,
-            use_karras_sigmas=True,
-            algorithm_type="dpmsolver++"
-        )
-        self.pipe = self.pipe.to(self.device)
-        # Memory optimization
-        self.pipe.enable_attention_slicing()
-        if torch.cuda.is_available():
-            self.pipe.enable_sequential_cpu_offload()
-    def _load_banglaclip_model(self, weights_path: str) -> CLIPModel:
-        try:
-            if not Path(weights_path).exists():
-                raise FileNotFoundError(f"BanglaCLIP weights not found at {weights_path}")
-            clip_model = CLIPModel.from_pretrained(self.clip_model_name)
-            state_dict = torch.load(weights_path, map_location=self.device)
-            cleaned_state_dict = {
-                k.replace('module.', '').replace('clip.', ''): v
-                for k, v in state_dict.items()
-                if k.replace('module.', '').replace('clip.', '').startswith(('text_model.', 'vision_model.'))
-            }
-            clip_model.load_state_dict(cleaned_state_dict, strict=False)
-            return clip_model.to(self.device)
-        except Exception as e:
-            logger.error(f"Failed to load BanglaCLIP model: {str(e)}")
-            raise
-    def _load_context_data(self):
-        """Load location and scene context data."""
-        self.location_contexts = {
-            'কক্সবাজার': 'Cox\'s Bazar beach, longest natural sea beach in the world, sandy beach',
-            'সেন্টমার্টিন': 'Saint Martin\'s Island, coral island, tropical paradise',
-            'সুন্দরবন': 'Sundarbans mangrove forest, Bengal tigers, riverine forest'
-        }
-        self.scene_contexts = {
-            'সৈকত': 'beach, seaside, waves, sandy shore, ocean view',
-            'সমুদ্র': 'ocean, sea waves, deep blue water, horizon',
-            'পাহাড়': 'mountains, hills, valleys, scenic landscape'
-        }
-    def _translate_text(self, bangla_text: str) -> str:
-        """Translate Bangla text to English."""
-        inputs = self.trans_tokenizer(bangla_text, return_tensors="pt", padding=True)
-        inputs = {k: v.to(self.device) for k, v in inputs.items()}
-        with torch.no_grad():
-            outputs = self.translator.generate(**inputs)
-        translated = self.trans_tokenizer.decode(outputs[0], skip_special_tokens=True)
-        return translated
-    def _get_text_embedding(self, text: str):
-        """Get text embedding from BanglaCLIP model."""
-        inputs = self.tokenizer(text, return_tensors="pt", padding=True, truncation=True)
-        inputs = {k: v.to(self.device) for k, v in inputs.items()}
-        with torch.no_grad():
-            outputs = self.banglaclip_model.get_text_features(**inputs)
-        return outputs
-    def generate_image(
-        self,
-        bangla_text: str,
-        config: Optional[GenerationConfig] = None
-    ) -> Tuple[List[Any], str]:
-        if not bangla_text.strip():
-            raise ValueError("Empty input text")
-        config = config or GenerationConfig()
-        try:
-            if config.seed is not None:
-                torch.manual_seed(config.seed)
-            enhanced_prompt = self._enhance_prompt(bangla_text)
-            negative_prompt = self._get_negative_prompt()
-            with torch.autocast(self.device.type):
-                result = self.pipe(
-                    prompt=enhanced_prompt,
-                    negative_prompt=negative_prompt,
-                    num_images_per_prompt=config.num_images,
-                    num_inference_steps=config.num_inference_steps,
-                    guidance_scale=config.guidance_scale
-                )
-            return result.images, enhanced_prompt
-        except Exception as e:
-            logger.error(f"Error during image generation: {str(e)}")
-            raise
-    def _enhance_prompt(self, bangla_text: str) -> str:
-        """Enhance prompt with context and style information."""
-        translated_text = self._translate_text(bangla_text)
-        # Gather contexts
-        contexts = []
-        contexts.extend(context for loc, context in self.location_contexts.items() if loc in bangla_text)
-        contexts.extend(context for scene, context in self.scene_contexts.items() if scene in bangla_text)
-        # Add photo style
-        photo_style = [
-            "professional photography",
-            "high resolution",
-            "4k",
-            "detailed",
-            "realistic",
-            "beautiful composition"
-        ]
-        # Combine all parts
-        all_parts = [translated_text] + contexts + photo_style
-        return ", ".join(dict.fromkeys(all_parts))
-    def _get_negative_prompt(self) -> str:
-        return (
-            "blurry, low quality, pixelated, cartoon, anime, illustration, "
-            "painting, drawing, artificial, fake, oversaturated, undersaturated"
-        )
-    def cleanup(self):
-        """Clean up GPU memory"""
-        if hasattr(self, 'pipe'):
-            del self.pipe
-        if hasattr(self, 'banglaclip_model'):
-            del self.banglaclip_model
-        if hasattr(self, 'translator'):
-            del self.translator
-        torch.cuda.empty_cache()
-        gc.collect()
 def create_gradio_interface():
     """Create and configure the Gradio interface."""
@@ -270,7 +151,7 @@ def create_gradio_interface():
             cleanup_generator()
             return None, f"ছবি তৈরি ব্যর্থ হয়েছে: {str(e)}"
-    # Create Gradio interface
     demo = gr.Interface(
         fn=generate_images,
         inputs=[
@@ -318,4 +199,4 @@ def create_gradio_interface():
 if __name__ == "__main__":
     demo = create_gradio_interface()
     # Fixed queue configuration for newer Gradio versions
-    demo.queue().launch(share=True)

 import torch
+import os
+import requests
+import logging
+import gc
+from pathlib import Path
 from transformers import CLIPModel, CLIPProcessor, AutoTokenizer, MarianMTModel, MarianTokenizer
 from diffusers import StableDiffusionPipeline, DPMSolverMultistepScheduler
 import gradio as gr
+from typing import List, Tuple, Optional, Dict, Any
 from dataclasses import dataclass
 # Configure logging
 logging.basicConfig(
 )
 logger = logging.getLogger(__name__)
+def download_model(model_url: str, model_path: str):
+    """Download large model file with progress tracking."""
+    if not os.path.exists(model_path):
+        try:
+            logger.info(f"Downloading model from {model_url}...")
+            response = requests.get(model_url, stream=True)
+            response.raise_for_status()
+            total_size = int(response.headers.get('content-length', 0))
+            block_size = 1024 * 1024  # 1 MB chunks
+            downloaded_size = 0
+            with open(model_path, 'wb') as f:
+                for data in response.iter_content(block_size):
+                    f.write(data)
+                    downloaded_size += len(data)
+                    progress = (downloaded_size / total_size) * 100 if total_size > 0 else 0
+                    logger.info(f"Download progress: {progress:.2f}%")
+            logger.info("Model download complete.")
+        except Exception as e:
+            logger.error(f"Model download failed: {e}")
+            raise
 @dataclass
 class GenerationConfig:
     num_images: int = 1
         cache_dir: str,
         device: Optional[torch.device] = None
     ):
+        # Download model if not exists
+        download_model(
+            "https://huggingface.co/Mansuba/BanglaCLIP13/resolve/main/banglaclip_model_epoch_10.pth",
+            banglaclip_weights_path
+        )
         self.device = device or torch.device("cuda" if torch.cuda.is_available() else "cpu")
         logger.info(f"Using device: {self.device}")
     def _initialize_models(self, banglaclip_weights_path: str):
         try:
+            # Translation models
             self.bn2en_model_name = "Helsinki-NLP/opus-mt-bn-en"
             self.translator = self.cache.load_model(
                 self.bn2en_model_name,
             ).to(self.device)
             self.trans_tokenizer = MarianTokenizer.from_pretrained(self.bn2en_model_name)
+            # CLIP models
             self.clip_model_name = "openai/clip-vit-base-patch32"
             self.bangla_text_model = "csebuetnlp/banglabert"
             self.banglaclip_model = self._load_banglaclip_model(banglaclip_weights_path)
             self.processor = CLIPProcessor.from_pretrained(self.clip_model_name)
             self.tokenizer = AutoTokenizer.from_pretrained(self.bangla_text_model)
+            # Stable Diffusion
             self._initialize_stable_diffusion()
         except Exception as e:
             logger.error(f"Error initializing models: {str(e)}")
             raise RuntimeError(f"Failed to initialize models: {str(e)}")
+    # ... [Rest of the previous implementation remains the same] ...
 def create_gradio_interface():
     """Create and configure the Gradio interface."""
             cleanup_generator()
             return None, f"ছবি তৈরি ব্যর্থ হয়েছে: {str(e)}"
+    # Gradio interface configuration
     demo = gr.Interface(
         fn=generate_images,
         inputs=[
 if __name__ == "__main__":
     demo = create_gradio_interface()
     # Fixed queue configuration for newer Gradio versions
+    demo.queue().launch(share=True, debug=True)