Spaces:

bajajhackrx
/

model

Sleeping

App Files Files Community

sohamchitimali commited on Aug 5, 2025

Commit

9a1d5cb

1 Parent(s): 88c3900

Trying Qwen Model

Browse files

Files changed (1) hide show

app.py +20 -25

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import gradio as gr
-from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, BitsAndBytesConfig
 import torch
 import faiss
 import numpy as np
@@ -265,7 +265,7 @@ class OptimizedChunker:
         return min(score, 3.0)
 class PowerfulQASystem:
-    """High-performance QA system using Mistral 7B with domain enhancements"""
     def __init__(self):
         self.qa_pipeline = None
@@ -274,41 +274,36 @@ class PowerfulQASystem:
         self.initialize_powerful_models()
     def initialize_powerful_models(self):
-        """Initialize Mistral 7B with optimizations"""
-        model_name = "mistralai/Mistral-7B-Instruct-v0.3"
-        logger.info(f"Loading high-performance model: {model_name}")
         try:
             self.tokenizer = AutoTokenizer.from_pretrained(model_name)
             self.model = AutoModelForCausalLM.from_pretrained(
                 model_name,
                 torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
                 device_map="auto" if torch.cuda.is_available() else None,
-                quantization_config=BitsAndBytesConfig(
-                    load_in_4bit=True,
-                    bnb_4bit_compute_dtype=torch.float16,
-                    bnb_4bit_use_double_quant=True,
-                    bnb_4bit_quant_type="nf4"
-                ) if torch.cuda.is_available() else None
             )
             self.qa_pipeline = pipeline(
                 "text-generation",
                 model=self.model,
                 tokenizer=self.tokenizer,
                 device=0 if torch.cuda.is_available() else -1,
-                max_new_tokens=400,
-                max_length=512,
                 return_full_text=False
             )
-            logger.info("Mistral 7B loaded successfully")
         except Exception as e:
-            logger.error(f"Failed to load Mistral 7B: {e}")
-            self.qa_pipeline = pipeline(
-                "text-generation",
-                model=model_name,
-                device=-1,
-                max_new_tokens=400
-            )
-            self.tokenizer = AutoTokenizer.from_pretrained(model_name)
     def _enhance_question(self, question: str) -> str:
         """Enhance question for better model understanding"""
@@ -757,14 +752,14 @@ with gr.Blocks(
     gr.HTML("""
     <div class="performance-highlight">
         <h1>🚀 High-Performance Document QA System</h1>
-        <p><strong>Powered by Mistral 7B + MPNet Embeddings + RAG Pipeline</strong></p>
         <p>Optimized for insurance, legal, HR, and compliance documents with 90-95% accuracy</p>
     </div>
     """)
     with gr.Tab("🎯 Hackathon Submission"):
         gr.Markdown("### Production-Ready Processing with State-of-the-Art Models")
-        gr.Markdown("**Current Models**: Mistral-7B-Instruct-v0.3 (QA, 4-bit quantized) + all-mpnet-base-v2 (Embeddings)")
         with gr.Row():
             with gr.Column():
@@ -828,6 +823,6 @@ app = gr.mount_gradio_app(api_app, demo, path="/")
 if __name__ == "__main__":
     logger.info("Starting High-Performance Document QA System...")
-    logger.info("Models: Mistral-7B-Instruct-v0.3 (QA, 4-bit quantized) + all-mpnet-base-v2 (Embeddings)")
     logger.info("Optimized for insurance, legal, HR, and compliance documents")
     uvicorn.run(app, host="0.0.0.0", port=7860)

 import gradio as gr
+from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 import torch
 import faiss
 import numpy as np
         return min(score, 3.0)
 class PowerfulQASystem:
+    """High-performance QA system using Qwen2.5-3B-Instruct with domain enhancements"""
     def __init__(self):
         self.qa_pipeline = None
         self.initialize_powerful_models()
     def initialize_powerful_models(self):
+        """Initialize Qwen2.5-3B-Instruct with 4-bit quantization"""
+        model_name = "Qwen/Qwen2.5-3B-Instruct"
+        logger.info(f"Loading high-performance model: {model_name} (4-bit quantized)")
         try:
             self.tokenizer = AutoTokenizer.from_pretrained(model_name)
+            quantization_config = BitsAndBytesConfig(
+                load_in_4bit=True,
+                bnb_4bit_compute_dtype=torch.float16,
+                bnb_4bit_use_double_quant=True,
+                bnb_4bit_quant_type="nf4"
+            ) if torch.cuda.is_available() else None
             self.model = AutoModelForCausalLM.from_pretrained(
                 model_name,
                 torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
                 device_map="auto" if torch.cuda.is_available() else None,
+                quantization_config=quantization_config
             )
             self.qa_pipeline = pipeline(
                 "text-generation",
                 model=self.model,
                 tokenizer=self.tokenizer,
                 device=0 if torch.cuda.is_available() else -1,
+                max_new_tokens=150,
+                max_length=2048,
                 return_full_text=False
             )
+            logger.info(f"Qwen2.5-3B-Instruct loaded successfully {'with 4-bit quantization' if quantization_config else 'on CPU'}")
         except Exception as e:
+            logger.error(f"Failed to load Qwen2.5-3B-Instruct: {e}")
+            raise RuntimeError(f"Model loading failed: {str(e)}")
     def _enhance_question(self, question: str) -> str:
         """Enhance question for better model understanding"""
     gr.HTML("""
     <div class="performance-highlight">
         <h1>🚀 High-Performance Document QA System</h1>
+        <p><strong>Powered by Qwen2.5-3B-Instruct + MPNet Embeddings + RAG Pipeline</strong></p>
         <p>Optimized for insurance, legal, HR, and compliance documents with 90-95% accuracy</p>
     </div>
     """)
     with gr.Tab("🎯 Hackathon Submission"):
         gr.Markdown("### Production-Ready Processing with State-of-the-Art Models")
+        gr.Markdown("**Current Models**: Qwen2.5-3B-Instruct (QA, unquantized) + all-mpnet-base-v2 (Embeddings)")
         with gr.Row():
             with gr.Column():
 if __name__ == "__main__":
     logger.info("Starting High-Performance Document QA System...")
+    logger.info("Models: Qwen2.5-3B-Instruct (QA, unquantized) + all-mpnet-base-v2 (Embeddings)")
     logger.info("Optimized for insurance, legal, HR, and compliance documents")
     uvicorn.run(app, host="0.0.0.0", port=7860)