sohamchitimali commited on
Commit
9a1d5cb
·
1 Parent(s): 88c3900

Trying Qwen Model

Browse files
Files changed (1) hide show
  1. app.py +20 -25
app.py CHANGED
@@ -1,5 +1,5 @@
1
  import gradio as gr
2
- from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, BitsAndBytesConfig
3
  import torch
4
  import faiss
5
  import numpy as np
@@ -265,7 +265,7 @@ class OptimizedChunker:
265
  return min(score, 3.0)
266
 
267
  class PowerfulQASystem:
268
- """High-performance QA system using Mistral 7B with domain enhancements"""
269
 
270
  def __init__(self):
271
  self.qa_pipeline = None
@@ -274,41 +274,36 @@ class PowerfulQASystem:
274
  self.initialize_powerful_models()
275
 
276
  def initialize_powerful_models(self):
277
- """Initialize Mistral 7B with optimizations"""
278
- model_name = "mistralai/Mistral-7B-Instruct-v0.3"
279
- logger.info(f"Loading high-performance model: {model_name}")
280
  try:
281
  self.tokenizer = AutoTokenizer.from_pretrained(model_name)
 
 
 
 
 
 
282
  self.model = AutoModelForCausalLM.from_pretrained(
283
  model_name,
284
  torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
285
  device_map="auto" if torch.cuda.is_available() else None,
286
- quantization_config=BitsAndBytesConfig(
287
- load_in_4bit=True,
288
- bnb_4bit_compute_dtype=torch.float16,
289
- bnb_4bit_use_double_quant=True,
290
- bnb_4bit_quant_type="nf4"
291
- ) if torch.cuda.is_available() else None
292
  )
293
  self.qa_pipeline = pipeline(
294
  "text-generation",
295
  model=self.model,
296
  tokenizer=self.tokenizer,
297
  device=0 if torch.cuda.is_available() else -1,
298
- max_new_tokens=400,
299
- max_length=512,
300
  return_full_text=False
301
  )
302
- logger.info("Mistral 7B loaded successfully")
303
  except Exception as e:
304
- logger.error(f"Failed to load Mistral 7B: {e}")
305
- self.qa_pipeline = pipeline(
306
- "text-generation",
307
- model=model_name,
308
- device=-1,
309
- max_new_tokens=400
310
- )
311
- self.tokenizer = AutoTokenizer.from_pretrained(model_name)
312
 
313
  def _enhance_question(self, question: str) -> str:
314
  """Enhance question for better model understanding"""
@@ -757,14 +752,14 @@ with gr.Blocks(
757
  gr.HTML("""
758
  <div class="performance-highlight">
759
  <h1>🚀 High-Performance Document QA System</h1>
760
- <p><strong>Powered by Mistral 7B + MPNet Embeddings + RAG Pipeline</strong></p>
761
  <p>Optimized for insurance, legal, HR, and compliance documents with 90-95% accuracy</p>
762
  </div>
763
  """)
764
 
765
  with gr.Tab("🎯 Hackathon Submission"):
766
  gr.Markdown("### Production-Ready Processing with State-of-the-Art Models")
767
- gr.Markdown("**Current Models**: Mistral-7B-Instruct-v0.3 (QA, 4-bit quantized) + all-mpnet-base-v2 (Embeddings)")
768
 
769
  with gr.Row():
770
  with gr.Column():
@@ -828,6 +823,6 @@ app = gr.mount_gradio_app(api_app, demo, path="/")
828
 
829
  if __name__ == "__main__":
830
  logger.info("Starting High-Performance Document QA System...")
831
- logger.info("Models: Mistral-7B-Instruct-v0.3 (QA, 4-bit quantized) + all-mpnet-base-v2 (Embeddings)")
832
  logger.info("Optimized for insurance, legal, HR, and compliance documents")
833
  uvicorn.run(app, host="0.0.0.0", port=7860)
 
1
  import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
3
  import torch
4
  import faiss
5
  import numpy as np
 
265
  return min(score, 3.0)
266
 
267
  class PowerfulQASystem:
268
+ """High-performance QA system using Qwen2.5-3B-Instruct with domain enhancements"""
269
 
270
  def __init__(self):
271
  self.qa_pipeline = None
 
274
  self.initialize_powerful_models()
275
 
276
  def initialize_powerful_models(self):
277
+ """Initialize Qwen2.5-3B-Instruct with 4-bit quantization"""
278
+ model_name = "Qwen/Qwen2.5-3B-Instruct"
279
+ logger.info(f"Loading high-performance model: {model_name} (4-bit quantized)")
280
  try:
281
  self.tokenizer = AutoTokenizer.from_pretrained(model_name)
282
+ quantization_config = BitsAndBytesConfig(
283
+ load_in_4bit=True,
284
+ bnb_4bit_compute_dtype=torch.float16,
285
+ bnb_4bit_use_double_quant=True,
286
+ bnb_4bit_quant_type="nf4"
287
+ ) if torch.cuda.is_available() else None
288
  self.model = AutoModelForCausalLM.from_pretrained(
289
  model_name,
290
  torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
291
  device_map="auto" if torch.cuda.is_available() else None,
292
+ quantization_config=quantization_config
 
 
 
 
 
293
  )
294
  self.qa_pipeline = pipeline(
295
  "text-generation",
296
  model=self.model,
297
  tokenizer=self.tokenizer,
298
  device=0 if torch.cuda.is_available() else -1,
299
+ max_new_tokens=150,
300
+ max_length=2048,
301
  return_full_text=False
302
  )
303
+ logger.info(f"Qwen2.5-3B-Instruct loaded successfully {'with 4-bit quantization' if quantization_config else 'on CPU'}")
304
  except Exception as e:
305
+ logger.error(f"Failed to load Qwen2.5-3B-Instruct: {e}")
306
+ raise RuntimeError(f"Model loading failed: {str(e)}")
 
 
 
 
 
 
307
 
308
  def _enhance_question(self, question: str) -> str:
309
  """Enhance question for better model understanding"""
 
752
  gr.HTML("""
753
  <div class="performance-highlight">
754
  <h1>🚀 High-Performance Document QA System</h1>
755
+ <p><strong>Powered by Qwen2.5-3B-Instruct + MPNet Embeddings + RAG Pipeline</strong></p>
756
  <p>Optimized for insurance, legal, HR, and compliance documents with 90-95% accuracy</p>
757
  </div>
758
  """)
759
 
760
  with gr.Tab("🎯 Hackathon Submission"):
761
  gr.Markdown("### Production-Ready Processing with State-of-the-Art Models")
762
+ gr.Markdown("**Current Models**: Qwen2.5-3B-Instruct (QA, unquantized) + all-mpnet-base-v2 (Embeddings)")
763
 
764
  with gr.Row():
765
  with gr.Column():
 
823
 
824
  if __name__ == "__main__":
825
  logger.info("Starting High-Performance Document QA System...")
826
+ logger.info("Models: Qwen2.5-3B-Instruct (QA, unquantized) + all-mpnet-base-v2 (Embeddings)")
827
  logger.info("Optimized for insurance, legal, HR, and compliance documents")
828
  uvicorn.run(app, host="0.0.0.0", port=7860)