Spaces:

sagar007
/

Lava_phi_model

Runtime error

App Files Files Community

sagar007 commited on Mar 24, 2025

Commit

7b75090

verified ·

1 Parent(s): 63d3bc6

Update app.py

Browse files

Files changed (1) hide show

app.py +55 -19

app.py CHANGED Viewed

@@ -28,6 +28,7 @@ class LLaVAPhiModel:
             logging.error(f"Failed to load CLIP processor: {str(e)}")
             self.processor = None
         self.history = []
         self.model = None
         self.clip = None
@@ -36,13 +37,12 @@ class LLaVAPhiModel:
     def ensure_models_loaded(self):
         """Ensure models are loaded in GPU context"""
         if self.model is None:
-            # Load main model with updated quantization config
             from transformers import BitsAndBytesConfig
             quantization_config = BitsAndBytesConfig(
-                load_in_4bit=True,
-                bnb_4bit_compute_dtype=torch.float16,
-                bnb_4bit_use_double_quant=True,
-                bnb_4bit_quant_type="nf4"
             )
             try:
@@ -116,17 +116,21 @@ class LLaVAPhiModel:
                     message = "Note: Image processing is not available - continuing with text only.\n" + message
                 prompt = f"human: {'<image>' if has_image else ''}\n{message}\ngpt:"
                 context = ""
-                for turn in self.history[-3:]:
                     context += f"human: {turn[0]}\ngpt: {turn[1]}\n"
                 full_prompt = context + prompt
                 inputs = self.tokenizer(
                     full_prompt,
                     return_tensors="pt",
                     padding=True,
                     truncation=True,
-                    max_length=512
                 )
                 inputs = {k: v.to(self.device) for k, v in inputs.items()}
@@ -134,15 +138,16 @@ class LLaVAPhiModel:
                     inputs["image_features"] = image_features
                 with torch.no_grad():
                     outputs = self.model.generate(
                         **inputs,
                         max_new_tokens=256,
                         min_length=20,
-                        temperature=0.7,
                         do_sample=True,
-                        top_p=0.9,
-                        top_k=40,
-                        repetition_penalty=1.5,
                         no_repeat_ngram_size=3,
                         use_cache=True,
                         pad_token_id=self.tokenizer.pad_token_id,
@@ -150,30 +155,34 @@ class LLaVAPhiModel:
                     )
             else:
                 prompt = f"human: {message}\ngpt:"
                 context = ""
-                for turn in self.history[-3:]:
                     context += f"human: {turn[0]}\ngpt: {turn[1]}\n"
                 full_prompt = context + prompt
                 inputs = self.tokenizer(
                     full_prompt,
                     return_tensors="pt",
                     padding=True,
                     truncation=True,
-                    max_length=512
                 )
                 inputs = {k: v.to(self.device) for k, v in inputs.items()}
                 with torch.no_grad():
                     outputs = self.model.generate(
                         **inputs,
-                        max_new_tokens=150,
                         min_length=20,
-                        temperature=0.6,
                         do_sample=True,
-                        top_p=0.85,
-                        top_k=30,
-                        repetition_penalty=1.8,
                         no_repeat_ngram_size=4,
                         use_cache=True,
                         pad_token_id=self.tokenizer.pad_token_id,
@@ -202,6 +211,15 @@ class LLaVAPhiModel:
         self.history = []
         return None
 def create_demo():
     try:
         model = LLaVAPhiModel()
@@ -209,7 +227,7 @@ def create_demo():
         with gr.Blocks(css="footer {visibility: hidden}") as demo:
             gr.Markdown(
                 """
-                # LLaVA-Phi Demo (ZeroGPU)
                 Chat with a vision-language model that can understand both text and images.
                 """
             )
@@ -229,6 +247,15 @@ def create_demo():
             image = gr.Image(type="pil", label="Upload Image (Optional)")
             def respond(message, chat_history, image):
                 if not message and image is None:
                     return chat_history
@@ -241,6 +268,9 @@ def create_demo():
                 model.clear_history()
                 return None, None
             submit.click(
                 respond,
                 [msg, chatbot, image],
@@ -259,6 +289,12 @@ def create_demo():
                 [msg, chatbot],
             )
         return demo
     except Exception as e:
         logging.error(f"Error creating demo: {str(e)}")

             logging.error(f"Failed to load CLIP processor: {str(e)}")
             self.processor = None
+        # Increase history length to retain more context
         self.history = []
         self.model = None
         self.clip = None
     def ensure_models_loaded(self):
         """Ensure models are loaded in GPU context"""
         if self.model is None:
+            # Improved quantization config for better quality
             from transformers import BitsAndBytesConfig
             quantization_config = BitsAndBytesConfig(
+                load_in_8bit=True,  # Changed from 4-bit to 8-bit for better quality
+                bnb_8bit_compute_dtype=torch.float16,
+                bnb_8bit_use_double_quant=False
             )
             try:
                     message = "Note: Image processing is not available - continuing with text only.\n" + message
                 prompt = f"human: {'<image>' if has_image else ''}\n{message}\ngpt:"
+                # Include more history for better context (previous 5 turns instead of 3)
                 context = ""
+                for turn in self.history[-5:]:
                     context += f"human: {turn[0]}\ngpt: {turn[1]}\n"
                 full_prompt = context + prompt
+                # Increased context window
                 inputs = self.tokenizer(
                     full_prompt,
                     return_tensors="pt",
                     padding=True,
                     truncation=True,
+                    max_length=1024  # Increased from 512
                 )
                 inputs = {k: v.to(self.device) for k, v in inputs.items()}
                     inputs["image_features"] = image_features
                 with torch.no_grad():
+                    # More conservative generation settings to reduce hallucinations
                     outputs = self.model.generate(
                         **inputs,
                         max_new_tokens=256,
                         min_length=20,
+                        temperature=0.3,  # Reduced from 0.7 for more deterministic output
                         do_sample=True,
+                        top_p=0.92,
+                        top_k=50,
+                        repetition_penalty=1.2,  # Adjusted for more natural responses
                         no_repeat_ngram_size=3,
                         use_cache=True,
                         pad_token_id=self.tokenizer.pad_token_id,
                     )
             else:
                 prompt = f"human: {message}\ngpt:"
+                # Include more history
                 context = ""
+                for turn in self.history[-5:]:
                     context += f"human: {turn[0]}\ngpt: {turn[1]}\n"
                 full_prompt = context + prompt
+                # Increased context window
                 inputs = self.tokenizer(
                     full_prompt,
                     return_tensors="pt",
                     padding=True,
                     truncation=True,
+                    max_length=1024  # Increased from 512
                 )
                 inputs = {k: v.to(self.device) for k, v in inputs.items()}
                 with torch.no_grad():
+                    # More conservative generation settings
                     outputs = self.model.generate(
                         **inputs,
+                        max_new_tokens=200,  # Slightly increased from 150
                         min_length=20,
+                        temperature=0.3,  # Reduced from 0.6
                         do_sample=True,
+                        top_p=0.92,
+                        top_k=50,
+                        repetition_penalty=1.2,
                         no_repeat_ngram_size=4,
                         use_cache=True,
                         pad_token_id=self.tokenizer.pad_token_id,
         self.history = []
         return None
+    # Add new function to control generation parameters
+    def update_generation_params(self, temperature=0.3, top_p=0.92, top_k=50, repetition_penalty=1.2):
+        """Update generation parameters to control hallucination tendency"""
+        self.temperature = temperature
+        self.top_p = top_p
+        self.top_k = top_k
+        self.repetition_penalty = repetition_penalty
+        return f"Generation parameters updated: temp={temperature}, top_p={top_p}, top_k={top_k}, rep_penalty={repetition_penalty}"
 def create_demo():
     try:
         model = LLaVAPhiModel()
         with gr.Blocks(css="footer {visibility: hidden}") as demo:
             gr.Markdown(
                 """
+                # LLaVA-Phi Demo (Optimized for Accuracy)
                 Chat with a vision-language model that can understand both text and images.
                 """
             )
             image = gr.Image(type="pil", label="Upload Image (Optional)")
+            # Add generation parameter controls
+            with gr.Accordion("Advanced Settings", open=False):
+                gr.Markdown("Adjust these parameters to control hallucination tendency")
+                temp_slider = gr.Slider(0.1, 1.0, value=0.3, step=0.1, label="Temperature (lower = more factual)")
+                top_p_slider = gr.Slider(0.5, 1.0, value=0.92, step=0.01, label="Top-p (nucleus sampling)")
+                top_k_slider = gr.Slider(10, 100, value=50, step=5, label="Top-k")
+                rep_penalty_slider = gr.Slider(1.0, 2.0, value=1.2, step=0.1, label="Repetition Penalty")
+                update_params = gr.Button("Update Parameters")
             def respond(message, chat_history, image):
                 if not message and image is None:
                     return chat_history
                 model.clear_history()
                 return None, None
+            def update_params_fn(temp, top_p, top_k, rep_penalty):
+                return model.update_generation_params(temp, top_p, top_k, rep_penalty)
             submit.click(
                 respond,
                 [msg, chatbot, image],
                 [msg, chatbot],
             )
+            update_params.click(
+                update_params_fn,
+                [temp_slider, top_p_slider, top_k_slider, rep_penalty_slider],
+                None
+            )
         return demo
     except Exception as e:
         logging.error(f"Error creating demo: {str(e)}")