Spaces:
Sleeping
Sleeping
Commit ·
9a1d5cb
1
Parent(s): 88c3900
Trying Qwen Model
Browse files
app.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
-
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
|
| 3 |
import torch
|
| 4 |
import faiss
|
| 5 |
import numpy as np
|
|
@@ -265,7 +265,7 @@ class OptimizedChunker:
|
|
| 265 |
return min(score, 3.0)
|
| 266 |
|
| 267 |
class PowerfulQASystem:
|
| 268 |
-
"""High-performance QA system using
|
| 269 |
|
| 270 |
def __init__(self):
|
| 271 |
self.qa_pipeline = None
|
|
@@ -274,41 +274,36 @@ class PowerfulQASystem:
|
|
| 274 |
self.initialize_powerful_models()
|
| 275 |
|
| 276 |
def initialize_powerful_models(self):
|
| 277 |
-
"""Initialize
|
| 278 |
-
model_name = "
|
| 279 |
-
logger.info(f"Loading high-performance model: {model_name}")
|
| 280 |
try:
|
| 281 |
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 282 |
self.model = AutoModelForCausalLM.from_pretrained(
|
| 283 |
model_name,
|
| 284 |
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
|
| 285 |
device_map="auto" if torch.cuda.is_available() else None,
|
| 286 |
-
quantization_config=
|
| 287 |
-
load_in_4bit=True,
|
| 288 |
-
bnb_4bit_compute_dtype=torch.float16,
|
| 289 |
-
bnb_4bit_use_double_quant=True,
|
| 290 |
-
bnb_4bit_quant_type="nf4"
|
| 291 |
-
) if torch.cuda.is_available() else None
|
| 292 |
)
|
| 293 |
self.qa_pipeline = pipeline(
|
| 294 |
"text-generation",
|
| 295 |
model=self.model,
|
| 296 |
tokenizer=self.tokenizer,
|
| 297 |
device=0 if torch.cuda.is_available() else -1,
|
| 298 |
-
max_new_tokens=
|
| 299 |
-
max_length=
|
| 300 |
return_full_text=False
|
| 301 |
)
|
| 302 |
-
logger.info("
|
| 303 |
except Exception as e:
|
| 304 |
-
logger.error(f"Failed to load
|
| 305 |
-
|
| 306 |
-
"text-generation",
|
| 307 |
-
model=model_name,
|
| 308 |
-
device=-1,
|
| 309 |
-
max_new_tokens=400
|
| 310 |
-
)
|
| 311 |
-
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 312 |
|
| 313 |
def _enhance_question(self, question: str) -> str:
|
| 314 |
"""Enhance question for better model understanding"""
|
|
@@ -757,14 +752,14 @@ with gr.Blocks(
|
|
| 757 |
gr.HTML("""
|
| 758 |
<div class="performance-highlight">
|
| 759 |
<h1>🚀 High-Performance Document QA System</h1>
|
| 760 |
-
<p><strong>Powered by
|
| 761 |
<p>Optimized for insurance, legal, HR, and compliance documents with 90-95% accuracy</p>
|
| 762 |
</div>
|
| 763 |
""")
|
| 764 |
|
| 765 |
with gr.Tab("🎯 Hackathon Submission"):
|
| 766 |
gr.Markdown("### Production-Ready Processing with State-of-the-Art Models")
|
| 767 |
-
gr.Markdown("**Current Models**:
|
| 768 |
|
| 769 |
with gr.Row():
|
| 770 |
with gr.Column():
|
|
@@ -828,6 +823,6 @@ app = gr.mount_gradio_app(api_app, demo, path="/")
|
|
| 828 |
|
| 829 |
if __name__ == "__main__":
|
| 830 |
logger.info("Starting High-Performance Document QA System...")
|
| 831 |
-
logger.info("Models:
|
| 832 |
logger.info("Optimized for insurance, legal, HR, and compliance documents")
|
| 833 |
uvicorn.run(app, host="0.0.0.0", port=7860)
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
|
| 3 |
import torch
|
| 4 |
import faiss
|
| 5 |
import numpy as np
|
|
|
|
| 265 |
return min(score, 3.0)
|
| 266 |
|
| 267 |
class PowerfulQASystem:
|
| 268 |
+
"""High-performance QA system using Qwen2.5-3B-Instruct with domain enhancements"""
|
| 269 |
|
| 270 |
def __init__(self):
|
| 271 |
self.qa_pipeline = None
|
|
|
|
| 274 |
self.initialize_powerful_models()
|
| 275 |
|
| 276 |
def initialize_powerful_models(self):
|
| 277 |
+
"""Initialize Qwen2.5-3B-Instruct with 4-bit quantization"""
|
| 278 |
+
model_name = "Qwen/Qwen2.5-3B-Instruct"
|
| 279 |
+
logger.info(f"Loading high-performance model: {model_name} (4-bit quantized)")
|
| 280 |
try:
|
| 281 |
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 282 |
+
quantization_config = BitsAndBytesConfig(
|
| 283 |
+
load_in_4bit=True,
|
| 284 |
+
bnb_4bit_compute_dtype=torch.float16,
|
| 285 |
+
bnb_4bit_use_double_quant=True,
|
| 286 |
+
bnb_4bit_quant_type="nf4"
|
| 287 |
+
) if torch.cuda.is_available() else None
|
| 288 |
self.model = AutoModelForCausalLM.from_pretrained(
|
| 289 |
model_name,
|
| 290 |
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
|
| 291 |
device_map="auto" if torch.cuda.is_available() else None,
|
| 292 |
+
quantization_config=quantization_config
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 293 |
)
|
| 294 |
self.qa_pipeline = pipeline(
|
| 295 |
"text-generation",
|
| 296 |
model=self.model,
|
| 297 |
tokenizer=self.tokenizer,
|
| 298 |
device=0 if torch.cuda.is_available() else -1,
|
| 299 |
+
max_new_tokens=150,
|
| 300 |
+
max_length=2048,
|
| 301 |
return_full_text=False
|
| 302 |
)
|
| 303 |
+
logger.info(f"Qwen2.5-3B-Instruct loaded successfully {'with 4-bit quantization' if quantization_config else 'on CPU'}")
|
| 304 |
except Exception as e:
|
| 305 |
+
logger.error(f"Failed to load Qwen2.5-3B-Instruct: {e}")
|
| 306 |
+
raise RuntimeError(f"Model loading failed: {str(e)}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 307 |
|
| 308 |
def _enhance_question(self, question: str) -> str:
|
| 309 |
"""Enhance question for better model understanding"""
|
|
|
|
| 752 |
gr.HTML("""
|
| 753 |
<div class="performance-highlight">
|
| 754 |
<h1>🚀 High-Performance Document QA System</h1>
|
| 755 |
+
<p><strong>Powered by Qwen2.5-3B-Instruct + MPNet Embeddings + RAG Pipeline</strong></p>
|
| 756 |
<p>Optimized for insurance, legal, HR, and compliance documents with 90-95% accuracy</p>
|
| 757 |
</div>
|
| 758 |
""")
|
| 759 |
|
| 760 |
with gr.Tab("🎯 Hackathon Submission"):
|
| 761 |
gr.Markdown("### Production-Ready Processing with State-of-the-Art Models")
|
| 762 |
+
gr.Markdown("**Current Models**: Qwen2.5-3B-Instruct (QA, unquantized) + all-mpnet-base-v2 (Embeddings)")
|
| 763 |
|
| 764 |
with gr.Row():
|
| 765 |
with gr.Column():
|
|
|
|
| 823 |
|
| 824 |
if __name__ == "__main__":
|
| 825 |
logger.info("Starting High-Performance Document QA System...")
|
| 826 |
+
logger.info("Models: Qwen2.5-3B-Instruct (QA, unquantized) + all-mpnet-base-v2 (Embeddings)")
|
| 827 |
logger.info("Optimized for insurance, legal, HR, and compliance documents")
|
| 828 |
uvicorn.run(app, host="0.0.0.0", port=7860)
|