Spaces:

zahemen9900
/

finsight-chat

Sleeping

App Files Files Community

zahemen9900 commited on Mar 25, 2025

Commit

cf4153c

1 Parent(s): a938c73

Refactor model loading in FinanceAdvisorBot for improved quantization handling and add asset setup function

Browse files

Files changed (2) hide show

.gitignore +1 -0
app.py +66 -31

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ .qodo

app.py CHANGED Viewed

@@ -37,32 +37,57 @@ class FinanceAdvisorBot:
         self.precision = torch.bfloat16 if torch.cuda.is_available() else torch.float32
         logger.info(f"Precision set to {self.precision}")
-        # Load tokenizer and model with precision
         self.tokenizer = AutoTokenizer.from_pretrained(
             base_model,
             padding_side="left",
             trust_remote_code=True
         )
-        # Configure 4-bit quantization
-        bnb_config = BitsAndBytesConfig(
-            load_in_4bit=True,
-            bnb_4bit_use_double_quant=True,
-            bnb_4bit_quant_type="nf4",
-            bnb_4bit_compute_dtype=torch.bfloat16
-        )
-        # Load base model with quantization
-        base = AutoModelForCausalLM.from_pretrained(
-            base_model,
-            quantization_config=bnb_config,
-            device_map="auto",
-            trust_remote_code=True
-        )
-        # Load adapter weights from hub
-        self.model = PeftModel.from_pretrained(base, adapter_path)
-        logger.info("Model loaded successfully from HuggingFace Hub")
         self.model.eval()
         self.should_analyze_question = should_analyze_question
@@ -75,19 +100,8 @@ class FinanceAdvisorBot:
         self.system_prompt = {
             "role": "system",
             "content": (
-                "You are FinSight, a professional financial advisor chatbot. "
-                "Follow these guidelines strictly:\n"
-                "1. Provide clear, concise, and accurate financial guidance\n"
-                "2. Focus on factual, practical advice without speculation\n"
-                "3. Use professional but accessible language\n"
-                "4. Break down complex concepts into understandable terms\n"
-                # "5. Maintain objectivity and avoid personal opinions\n"
-                "6. Always consider risk management in advice\n"
-                "7. Be transparent about limitations of AI advice\n"
-                "8. Cite reliable sources when appropriate\n"
-                "9. Encourage due diligence and research\n"
-                "10. Give bullet points and numbered lists when necessary\n"
-                "Remember: You are an AI assistant focused on financial education and guidance."
             )
         }
         self.conversation_history = []
@@ -464,6 +478,27 @@ def create_demo():
     return demo
 if __name__ == "__main__":
     demo = create_demo()
     demo.queue().launch()

         self.precision = torch.bfloat16 if torch.cuda.is_available() else torch.float32
         logger.info(f"Precision set to {self.precision}")
+        # Load tokenizer
         self.tokenizer = AutoTokenizer.from_pretrained(
             base_model,
             padding_side="left",
             trust_remote_code=True
         )
+        # Configure model loading based on device
+        model_kwargs = {
+            "trust_remote_code": True,
+            "device_map": "auto"
+        }
+        if self.device == "cuda":
+            # Use 4-bit quantization for GPU
+            bnb_config = BitsAndBytesConfig(
+                load_in_4bit=True,
+                bnb_4bit_use_double_quant=True,
+                bnb_4bit_quant_type="nf4",
+                bnb_4bit_compute_dtype=torch.bfloat16
+            )
+            model_kwargs["quantization_config"] = bnb_config
+            model_kwargs["torch_dtype"] = self.precision
+        else:
+            # Use 8-bit quantization for CPU
+            model_kwargs["load_in_8bit"] = True
+            model_kwargs["torch_dtype"] = torch.float32
+        try:
+            # Load base model with appropriate configuration
+            base = AutoModelForCausalLM.from_pretrained(
+                base_model,
+                **model_kwargs
+            )
+            # Load adapter weights
+            self.model = PeftModel.from_pretrained(base, adapter_path)
+            logger.info("Model loaded successfully with adapter")
+        except Exception as e:
+            logger.warning(f"Failed to load model with quantization: {e}")
+            logger.info("Falling back to standard model loading")
+            # Fallback to basic loading without quantization
+            base = AutoModelForCausalLM.from_pretrained(
+                base_model,
+                torch_dtype=torch.float32,
+                trust_remote_code=True,
+                device_map="auto"
+            )
+            self.model = PeftModel.from_pretrained(base, adapter_path)
         self.model.eval()
         self.should_analyze_question = should_analyze_question
         self.system_prompt = {
             "role": "system",
             "content": (
+                "You are FinSight, a professional financial advisor chatbot.\n"
+                "Provide clear, concise, and accurate financial guidance to the user."
             )
         }
         self.conversation_history = []
     return demo
+def setup_assets():
+    """Set up assets directory and files"""
+    current_dir = Path(__file__).parent
+    assets_dir = current_dir / "static" / "assets"
+    favicon_path = assets_dir / "favicon.ico"
+    # Create assets directory
+    # If favicon doesn't exist, create a default one or copy from another location
+    assets_dir.mkdir(parents=True, exist_ok=True)
+    # Copy default favicon if it doesn't exist
+    if not favicon_path.exists():
+        default_favicon = current_dir.parent.parent / "assets" / "favicon.ico"
+        if default_favicon.exists():
+            shutil.copy(default_favicon, favicon_path)
+        else:
+            # Create empty favicon if no default exists
+            favicon_path.touch()
+    return favicon_path
 if __name__ == "__main__":
     demo = create_demo()
     demo.queue().launch()