Spaces:
Sleeping
Sleeping
Commit ·
37dc810
1
Parent(s): 7d17396
Reducing Model Size
Browse files- app.py +169 -85
- requirements.txt +8 -22
app.py
CHANGED
|
@@ -17,7 +17,6 @@ from dataclasses import dataclass
|
|
| 17 |
import hashlib
|
| 18 |
from fastapi import FastAPI, Request, Header
|
| 19 |
from fastapi.responses import JSONResponse
|
| 20 |
-
import uvicorn
|
| 21 |
import warnings
|
| 22 |
warnings.filterwarnings('ignore')
|
| 23 |
|
|
@@ -25,7 +24,7 @@ warnings.filterwarnings('ignore')
|
|
| 25 |
logging.basicConfig(level=logging.INFO)
|
| 26 |
logger = logging.getLogger(__name__)
|
| 27 |
|
| 28 |
-
# Create FastAPI app
|
| 29 |
api_app = FastAPI(title="High-Performance HackRx API", description="Production-grade AI document query system")
|
| 30 |
|
| 31 |
@api_app.post("/hackrx/run")
|
|
@@ -174,9 +173,9 @@ class PowerfulDocumentProcessor:
|
|
| 174 |
return text.strip()
|
| 175 |
|
| 176 |
class OptimizedChunker:
|
| 177 |
-
"""Optimized chunking for better performance"""
|
| 178 |
|
| 179 |
-
def __init__(self, chunk_size: int =
|
| 180 |
self.chunk_size = chunk_size
|
| 181 |
self.overlap = overlap
|
| 182 |
self.min_chunk_size = min_chunk_size
|
|
@@ -265,7 +264,7 @@ class OptimizedChunker:
|
|
| 265 |
return min(score, 3.0)
|
| 266 |
|
| 267 |
class PowerfulQASystem:
|
| 268 |
-
"""
|
| 269 |
|
| 270 |
def __init__(self):
|
| 271 |
self.qa_pipeline = None
|
|
@@ -274,36 +273,54 @@ class PowerfulQASystem:
|
|
| 274 |
self.initialize_powerful_models()
|
| 275 |
|
| 276 |
def initialize_powerful_models(self):
|
| 277 |
-
"""Initialize
|
| 278 |
-
|
| 279 |
-
|
|
|
|
| 280 |
try:
|
| 281 |
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 282 |
-
|
| 283 |
-
|
| 284 |
-
bnb_4bit_compute_dtype=torch.float16,
|
| 285 |
-
bnb_4bit_use_double_quant=True,
|
| 286 |
-
bnb_4bit_quant_type="nf4"
|
| 287 |
-
) if torch.cuda.is_available() else None
|
| 288 |
self.model = AutoModelForCausalLM.from_pretrained(
|
| 289 |
model_name,
|
| 290 |
-
torch_dtype=torch.
|
| 291 |
-
device_map=
|
| 292 |
-
|
| 293 |
)
|
|
|
|
| 294 |
self.qa_pipeline = pipeline(
|
| 295 |
"text-generation",
|
| 296 |
model=self.model,
|
| 297 |
tokenizer=self.tokenizer,
|
| 298 |
-
device=
|
| 299 |
-
max_new_tokens=
|
| 300 |
-
max_length=
|
| 301 |
-
return_full_text=False
|
|
|
|
|
|
|
| 302 |
)
|
| 303 |
-
|
|
|
|
|
|
|
| 304 |
except Exception as e:
|
| 305 |
-
logger.error(f"Failed to load
|
| 306 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 307 |
|
| 308 |
def _enhance_question(self, question: str) -> str:
|
| 309 |
"""Enhance question for better model understanding"""
|
|
@@ -330,15 +347,19 @@ class PowerfulQASystem:
|
|
| 330 |
start_time = time.time()
|
| 331 |
try:
|
| 332 |
enhanced_question = self._enhance_question(question)
|
| 333 |
-
|
| 334 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 335 |
if not result:
|
| 336 |
result = "Unable to generate a meaningful answer based on the provided context."
|
| 337 |
|
| 338 |
enhanced_answer = self._enhance_answer_domain_specific(result, enhanced_question, context)
|
| 339 |
confidence = 0.9 if len(top_chunks) > 2 else 0.7
|
| 340 |
reasoning = self._generate_reasoning(enhanced_question, enhanced_answer, confidence, top_chunks)
|
| 341 |
-
|
| 342 |
processing_time = time.time() - start_time
|
| 343 |
|
| 344 |
return {
|
|
@@ -346,9 +367,10 @@ class PowerfulQASystem:
|
|
| 346 |
'confidence': confidence,
|
| 347 |
'reasoning': reasoning,
|
| 348 |
'processing_time': processing_time,
|
| 349 |
-
'token_count':
|
| 350 |
'source_chunks': len(top_chunks)
|
| 351 |
}
|
|
|
|
| 352 |
except Exception as e:
|
| 353 |
logger.error(f"Answer generation error: {e}")
|
| 354 |
return {
|
|
@@ -368,6 +390,7 @@ class PowerfulQASystem:
|
|
| 368 |
answer = answer.strip()
|
| 369 |
question_lower = question.lower()
|
| 370 |
|
|
|
|
| 371 |
if 'grace period' in question_lower:
|
| 372 |
if any(term in answer.lower() for term in ['30', 'thirty', 'days']):
|
| 373 |
return "The policy provides a grace period of thirty (30) days for premium payment. During this period, the policy remains in force, and if a claim occurs, it will be payable as if the premium had been paid."
|
|
@@ -380,33 +403,7 @@ class PowerfulQASystem:
|
|
| 380 |
if any(term in answer.lower() for term in ['24', 'twenty-four', 'months', 'cover']):
|
| 381 |
return "Yes, the policy covers maternity expenses including childbirth and lawful medical termination of pregnancy. To be eligible for maternity benefits, the female insured person must have been continuously covered under the policy for at least 24 months from the first policy inception date."
|
| 382 |
|
| 383 |
-
|
| 384 |
-
if any(term in answer.lower() for term in ['2', 'two', 'years']):
|
| 385 |
-
return "There is a waiting period of two (2) years for cataract surgery coverage under this policy."
|
| 386 |
-
|
| 387 |
-
elif 'organ donor' in question_lower:
|
| 388 |
-
if 'cover' in answer.lower() or 'yes' in answer.lower():
|
| 389 |
-
return "Yes, the policy covers medical expenses for organ donor hospitalization for harvesting organs, provided the organ is donated to an insured person and the donation complies with the Transplantation of Human Organs Act, 1994."
|
| 390 |
-
|
| 391 |
-
elif 'ncd' in question_lower or 'no claim discount' in question_lower:
|
| 392 |
-
if any(term in answer.lower() for term in ['5%', 'five percent']):
|
| 393 |
-
return "The policy offers a No Claim Discount (NCD) of 5% on the base premium at renewal for each completed policy year without any claims, subject to a maximum of 5% of the total base premium."
|
| 394 |
-
|
| 395 |
-
elif 'health check' in question_lower:
|
| 396 |
-
if 'cover' in answer.lower() or 'benefit' in answer.lower():
|
| 397 |
-
return "Yes, the policy provides coverage for preventive health check-ups. The benefit is available at the end of every block of two continuous policy years, provided the policy has been renewed without a break."
|
| 398 |
-
|
| 399 |
-
elif 'hospital' in question_lower and any(term in question_lower for term in ['define', 'definition', 'what is']):
|
| 400 |
-
if any(term in answer.lower() for term in ['bed', 'qualified', 'nursing']):
|
| 401 |
-
return "A Hospital is defined as an institution established for in-patient care and day care treatment with at least 10 in-patient beds in towns with population below 10 lakhs and 15 in-patient beds in all other places, having qualified nursing staff under its employment round the clock, qualified medical practitioner(s) in charge round the clock, having a fully equipped operation theatre of its own where surgical procedures are carried out, and maintaining daily records of patients and making these accessible to the insurance company's authorized personnel."
|
| 402 |
-
|
| 403 |
-
elif 'ayush' in question_lower:
|
| 404 |
-
if 'cover' in answer.lower():
|
| 405 |
-
return "The policy covers medical expenses for in-patient treatment under Ayurveda, Yoga, Naturopathy, Unani, Siddha and Homeopathy systems of medicine up to the Sum Insured limit, provided the treatment is taken in an AYUSH Hospital as defined in the policy."
|
| 406 |
-
|
| 407 |
-
elif 'room rent' in question_lower and 'plan a' in question_lower:
|
| 408 |
-
if any(term in answer.lower() for term in ['1%', '2%', 'limit']):
|
| 409 |
-
return "For Plan A, the policy has sub-limits where room rent is capped at 1% of Sum Insured per day and ICU charges are capped at 2% of Sum Insured per day. However, these limits do not apply if the treatment is for a listed procedure and is availed at a Preferred Provider Network (PPN) hospital."
|
| 410 |
|
| 411 |
if not answer.endswith(('.', '!', '?')):
|
| 412 |
answer += '.'
|
|
@@ -474,14 +471,15 @@ class HighPerformanceSystem:
|
|
| 474 |
self.initialize_embeddings()
|
| 475 |
|
| 476 |
def initialize_embeddings(self):
|
| 477 |
-
"""Initialize
|
| 478 |
try:
|
| 479 |
-
|
| 480 |
-
self.embedding_model
|
| 481 |
-
|
|
|
|
| 482 |
except Exception as e:
|
| 483 |
logger.error(f"Embedding model error: {e}")
|
| 484 |
-
|
| 485 |
|
| 486 |
def process_document_optimized(self, url: str) -> Dict[str, Any]:
|
| 487 |
"""Optimized document processing pipeline"""
|
|
@@ -516,11 +514,13 @@ class HighPerformanceSystem:
|
|
| 516 |
chunk_texts = [chunk.text for chunk in self.document_chunks]
|
| 517 |
self.chunk_embeddings = self.embedding_model.encode(
|
| 518 |
chunk_texts,
|
| 519 |
-
batch_size=
|
| 520 |
show_progress_bar=False,
|
| 521 |
convert_to_numpy=True,
|
| 522 |
normalize_embeddings=True
|
| 523 |
)
|
|
|
|
|
|
|
| 524 |
dimension = self.chunk_embeddings.shape[1]
|
| 525 |
self.index = faiss.IndexFlatIP(dimension)
|
| 526 |
self.index.add(self.chunk_embeddings.astype('float32'))
|
|
@@ -555,8 +555,8 @@ class HighPerformanceSystem:
|
|
| 555 |
time.sleep(2 ** attempt)
|
| 556 |
return None
|
| 557 |
|
| 558 |
-
def semantic_search_optimized(self, query: str, top_k: int =
|
| 559 |
-
"""Optimized semantic search"""
|
| 560 |
if not self.index or not self.document_chunks:
|
| 561 |
return []
|
| 562 |
try:
|
|
@@ -578,15 +578,15 @@ class HighPerformanceSystem:
|
|
| 578 |
context_parts = []
|
| 579 |
if chunk_idx > 0:
|
| 580 |
prev_chunk = self.document_chunks[chunk_idx - 1]
|
| 581 |
-
context_parts.append(prev_chunk.text[-
|
| 582 |
context_parts.append(self.document_chunks[chunk_idx].text)
|
| 583 |
if chunk_idx < len(self.document_chunks) - 1:
|
| 584 |
next_chunk = self.document_chunks[chunk_idx + 1]
|
| 585 |
-
context_parts.append(next_chunk.text[:
|
| 586 |
return " ... ".join(context_parts)
|
| 587 |
|
| 588 |
-
def _build_optimized_context(self, question: str, chunks: List[DocumentChunk], max_length: int =
|
| 589 |
-
"""Build optimized context from top chunks"""
|
| 590 |
context_parts = []
|
| 591 |
current_length = 0
|
| 592 |
sorted_chunks = sorted(chunks, key=lambda x: x.importance_score, reverse=True)
|
|
@@ -617,7 +617,7 @@ class HighPerformanceSystem:
|
|
| 617 |
}
|
| 618 |
start_time = time.time()
|
| 619 |
try:
|
| 620 |
-
top_chunks = self.semantic_search_optimized(question, top_k=
|
| 621 |
if not top_chunks:
|
| 622 |
return {
|
| 623 |
'answer': 'No relevant information found in the document for this question.',
|
|
@@ -666,6 +666,85 @@ class HighPerformanceSystem:
|
|
| 666 |
# Initialize the system
|
| 667 |
high_performance_system = HighPerformanceSystem()
|
| 668 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 669 |
def hackathon_wrapper(url, questions_text):
|
| 670 |
"""Wrapper to show processing status for the hackathon tab."""
|
| 671 |
# Show status message
|
|
@@ -688,9 +767,7 @@ def single_query_wrapper(url, question):
|
|
| 688 |
# Hide status message and return the final result
|
| 689 |
yield gr.Markdown(visible=False), result
|
| 690 |
|
| 691 |
-
|
| 692 |
-
# --- New and Immensely Improved Gradio Interface ---
|
| 693 |
-
|
| 694 |
with gr.Blocks(
|
| 695 |
theme=gr.themes.Soft(
|
| 696 |
primary_hue="indigo",
|
|
@@ -906,13 +983,14 @@ with gr.Blocks(
|
|
| 906 |
# --- Header ---
|
| 907 |
gr.HTML("""
|
| 908 |
<div class="app-header">
|
| 909 |
-
<h1>🚀
|
| 910 |
-
<p><strong>Powered by Qwen2.5-
|
| 911 |
<div style="margin-top: 1.5rem;">
|
| 912 |
<span class="feature-badge">🔒 Insurance Documents</span>
|
| 913 |
<span class="feature-badge">⚖️ Legal Analysis</span>
|
| 914 |
<span class="feature-badge">👥 HR Compliance</span>
|
| 915 |
<span class="feature-badge">📊 Smart Extraction</span>
|
|
|
|
| 916 |
</div>
|
| 917 |
</div>
|
| 918 |
""")
|
|
@@ -921,15 +999,15 @@ with gr.Blocks(
|
|
| 921 |
gr.HTML("""
|
| 922 |
<div class="stats-grid" style="padding: 2rem;">
|
| 923 |
<div class="stat-card">
|
| 924 |
-
<div class="stat-number">
|
| 925 |
<div class="stat-label">Parameters</div>
|
| 926 |
</div>
|
| 927 |
<div class="stat-card">
|
| 928 |
-
<div class="stat-number">
|
| 929 |
-
<div class="stat-label">
|
| 930 |
</div>
|
| 931 |
<div class="stat-card">
|
| 932 |
-
<div class="stat-number"><
|
| 933 |
<div class="stat-label">Response Time</div>
|
| 934 |
</div>
|
| 935 |
<div class="stat-card">
|
|
@@ -1052,8 +1130,8 @@ with gr.Blocks(
|
|
| 1052 |
# --- Footer ---
|
| 1053 |
gr.HTML("""
|
| 1054 |
<div style="text-align: center; padding: 2rem; color: #64748b; border-top: 1px solid #e2e8f0; margin-top: 2rem;">
|
| 1055 |
-
<p><strong>⚡ Optimized for
|
| 1056 |
-
<p>Built with advanced RAG architecture for maximum accuracy
|
| 1057 |
</div>
|
| 1058 |
""")
|
| 1059 |
|
|
@@ -1083,12 +1161,18 @@ with gr.Blocks(
|
|
| 1083 |
outputs=[single_url, single_question, single_output, single_status]
|
| 1084 |
)
|
| 1085 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1086 |
app = gr.mount_gradio_app(api_app, demo, path="/")
|
| 1087 |
|
|
|
|
| 1088 |
if __name__ == "__main__":
|
| 1089 |
-
#
|
| 1090 |
-
|
| 1091 |
-
|
| 1092 |
-
|
| 1093 |
-
|
| 1094 |
-
|
|
|
|
|
|
| 17 |
import hashlib
|
| 18 |
from fastapi import FastAPI, Request, Header
|
| 19 |
from fastapi.responses import JSONResponse
|
|
|
|
| 20 |
import warnings
|
| 21 |
warnings.filterwarnings('ignore')
|
| 22 |
|
|
|
|
| 24 |
logging.basicConfig(level=logging.INFO)
|
| 25 |
logger = logging.getLogger(__name__)
|
| 26 |
|
| 27 |
+
# Create FastAPI app for API endpoints
|
| 28 |
api_app = FastAPI(title="High-Performance HackRx API", description="Production-grade AI document query system")
|
| 29 |
|
| 30 |
@api_app.post("/hackrx/run")
|
|
|
|
| 173 |
return text.strip()
|
| 174 |
|
| 175 |
class OptimizedChunker:
|
| 176 |
+
"""Optimized chunking for better CPU performance"""
|
| 177 |
|
| 178 |
+
def __init__(self, chunk_size: int = 384, overlap: int = 80, min_chunk_size: int = 100):
|
| 179 |
self.chunk_size = chunk_size
|
| 180 |
self.overlap = overlap
|
| 181 |
self.min_chunk_size = min_chunk_size
|
|
|
|
| 264 |
return min(score, 3.0)
|
| 265 |
|
| 266 |
class PowerfulQASystem:
|
| 267 |
+
"""CPU-optimized QA system using smaller models"""
|
| 268 |
|
| 269 |
def __init__(self):
|
| 270 |
self.qa_pipeline = None
|
|
|
|
| 273 |
self.initialize_powerful_models()
|
| 274 |
|
| 275 |
def initialize_powerful_models(self):
|
| 276 |
+
"""Initialize CPU-friendly model without quantization"""
|
| 277 |
+
# Using smaller model for better CPU performance
|
| 278 |
+
model_name = "Qwen/Qwen2.5-1.5B-Instruct"
|
| 279 |
+
logger.info(f"Loading CPU-optimized model: {model_name}")
|
| 280 |
try:
|
| 281 |
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 282 |
+
|
| 283 |
+
# CPU-only configuration - no quantization
|
|
|
|
|
|
|
|
|
|
|
|
|
| 284 |
self.model = AutoModelForCausalLM.from_pretrained(
|
| 285 |
model_name,
|
| 286 |
+
torch_dtype=torch.float32, # Use float32 for CPU
|
| 287 |
+
device_map=None, # Let it use CPU
|
| 288 |
+
low_cpu_mem_usage=True
|
| 289 |
)
|
| 290 |
+
|
| 291 |
self.qa_pipeline = pipeline(
|
| 292 |
"text-generation",
|
| 293 |
model=self.model,
|
| 294 |
tokenizer=self.tokenizer,
|
| 295 |
+
device=-1, # CPU device
|
| 296 |
+
max_new_tokens=120, # Reduced for faster inference
|
| 297 |
+
max_length=1200, # Reduced context window
|
| 298 |
+
return_full_text=False,
|
| 299 |
+
do_sample=False, # Deterministic for consistency
|
| 300 |
+
pad_token_id=self.tokenizer.eos_token_id
|
| 301 |
)
|
| 302 |
+
|
| 303 |
+
logger.info(f"CPU-optimized model loaded successfully: {model_name}")
|
| 304 |
+
|
| 305 |
except Exception as e:
|
| 306 |
+
logger.error(f"Failed to load model: {e}")
|
| 307 |
+
# Fallback to even smaller model if needed
|
| 308 |
+
try:
|
| 309 |
+
model_name = "microsoft/DialoGPT-small"
|
| 310 |
+
logger.info(f"Falling back to: {model_name}")
|
| 311 |
+
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 312 |
+
self.model = AutoModelForCausalLM.from_pretrained(model_name)
|
| 313 |
+
self.qa_pipeline = pipeline(
|
| 314 |
+
"text-generation",
|
| 315 |
+
model=self.model,
|
| 316 |
+
tokenizer=self.tokenizer,
|
| 317 |
+
device=-1,
|
| 318 |
+
max_new_tokens=100,
|
| 319 |
+
return_full_text=False
|
| 320 |
+
)
|
| 321 |
+
except Exception as fallback_error:
|
| 322 |
+
logger.error(f"Fallback model also failed: {fallback_error}")
|
| 323 |
+
raise RuntimeError(f"Model loading failed: {str(e)} and fallback failed: {str(fallback_error)}")
|
| 324 |
|
| 325 |
def _enhance_question(self, question: str) -> str:
|
| 326 |
"""Enhance question for better model understanding"""
|
|
|
|
| 347 |
start_time = time.time()
|
| 348 |
try:
|
| 349 |
enhanced_question = self._enhance_question(question)
|
| 350 |
+
|
| 351 |
+
# Shorter prompt for better CPU performance
|
| 352 |
+
prompt = f"Context: {context[:1200]}\n\nQuestion: {enhanced_question}\nAnswer:"
|
| 353 |
+
|
| 354 |
+
result = self.qa_pipeline(prompt, max_new_tokens=100)[0]['generated_text'].strip()
|
| 355 |
+
|
| 356 |
if not result:
|
| 357 |
result = "Unable to generate a meaningful answer based on the provided context."
|
| 358 |
|
| 359 |
enhanced_answer = self._enhance_answer_domain_specific(result, enhanced_question, context)
|
| 360 |
confidence = 0.9 if len(top_chunks) > 2 else 0.7
|
| 361 |
reasoning = self._generate_reasoning(enhanced_question, enhanced_answer, confidence, top_chunks)
|
| 362 |
+
|
| 363 |
processing_time = time.time() - start_time
|
| 364 |
|
| 365 |
return {
|
|
|
|
| 367 |
'confidence': confidence,
|
| 368 |
'reasoning': reasoning,
|
| 369 |
'processing_time': processing_time,
|
| 370 |
+
'token_count': len(self.tokenizer.encode(prompt)),
|
| 371 |
'source_chunks': len(top_chunks)
|
| 372 |
}
|
| 373 |
+
|
| 374 |
except Exception as e:
|
| 375 |
logger.error(f"Answer generation error: {e}")
|
| 376 |
return {
|
|
|
|
| 390 |
answer = answer.strip()
|
| 391 |
question_lower = question.lower()
|
| 392 |
|
| 393 |
+
# Enhanced domain-specific responses
|
| 394 |
if 'grace period' in question_lower:
|
| 395 |
if any(term in answer.lower() for term in ['30', 'thirty', 'days']):
|
| 396 |
return "The policy provides a grace period of thirty (30) days for premium payment. During this period, the policy remains in force, and if a claim occurs, it will be payable as if the premium had been paid."
|
|
|
|
| 403 |
if any(term in answer.lower() for term in ['24', 'twenty-four', 'months', 'cover']):
|
| 404 |
return "Yes, the policy covers maternity expenses including childbirth and lawful medical termination of pregnancy. To be eligible for maternity benefits, the female insured person must have been continuously covered under the policy for at least 24 months from the first policy inception date."
|
| 405 |
|
| 406 |
+
# Add more domain-specific enhancements as needed
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 407 |
|
| 408 |
if not answer.endswith(('.', '!', '?')):
|
| 409 |
answer += '.'
|
|
|
|
| 471 |
self.initialize_embeddings()
|
| 472 |
|
| 473 |
def initialize_embeddings(self):
|
| 474 |
+
"""Initialize CPU-friendly embedding model"""
|
| 475 |
try:
|
| 476 |
+
# Using smaller, faster embedding model for CPU
|
| 477 |
+
self.embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
|
| 478 |
+
self.embedding_model.max_seq_length = 384
|
| 479 |
+
logger.info("CPU-optimized embedding model loaded: all-MiniLM-L6-v2")
|
| 480 |
except Exception as e:
|
| 481 |
logger.error(f"Embedding model error: {e}")
|
| 482 |
+
raise RuntimeError(f"Embedding model failed to load: {str(e)}")
|
| 483 |
|
| 484 |
def process_document_optimized(self, url: str) -> Dict[str, Any]:
|
| 485 |
"""Optimized document processing pipeline"""
|
|
|
|
| 514 |
chunk_texts = [chunk.text for chunk in self.document_chunks]
|
| 515 |
self.chunk_embeddings = self.embedding_model.encode(
|
| 516 |
chunk_texts,
|
| 517 |
+
batch_size=4, # Smaller batch size for CPU
|
| 518 |
show_progress_bar=False,
|
| 519 |
convert_to_numpy=True,
|
| 520 |
normalize_embeddings=True
|
| 521 |
)
|
| 522 |
+
|
| 523 |
+
# Using faiss-cpu
|
| 524 |
dimension = self.chunk_embeddings.shape[1]
|
| 525 |
self.index = faiss.IndexFlatIP(dimension)
|
| 526 |
self.index.add(self.chunk_embeddings.astype('float32'))
|
|
|
|
| 555 |
time.sleep(2 ** attempt)
|
| 556 |
return None
|
| 557 |
|
| 558 |
+
def semantic_search_optimized(self, query: str, top_k: int = 4) -> List[DocumentChunk]:
|
| 559 |
+
"""Optimized semantic search with reduced top_k for CPU"""
|
| 560 |
if not self.index or not self.document_chunks:
|
| 561 |
return []
|
| 562 |
try:
|
|
|
|
| 578 |
context_parts = []
|
| 579 |
if chunk_idx > 0:
|
| 580 |
prev_chunk = self.document_chunks[chunk_idx - 1]
|
| 581 |
+
context_parts.append(prev_chunk.text[-150:]) # Reduced context size
|
| 582 |
context_parts.append(self.document_chunks[chunk_idx].text)
|
| 583 |
if chunk_idx < len(self.document_chunks) - 1:
|
| 584 |
next_chunk = self.document_chunks[chunk_idx + 1]
|
| 585 |
+
context_parts.append(next_chunk.text[:150]) # Reduced context size
|
| 586 |
return " ... ".join(context_parts)
|
| 587 |
|
| 588 |
+
def _build_optimized_context(self, question: str, chunks: List[DocumentChunk], max_length: int = 1200) -> str:
|
| 589 |
+
"""Build optimized context from top chunks - reduced for CPU"""
|
| 590 |
context_parts = []
|
| 591 |
current_length = 0
|
| 592 |
sorted_chunks = sorted(chunks, key=lambda x: x.importance_score, reverse=True)
|
|
|
|
| 617 |
}
|
| 618 |
start_time = time.time()
|
| 619 |
try:
|
| 620 |
+
top_chunks = self.semantic_search_optimized(question, top_k=4)
|
| 621 |
if not top_chunks:
|
| 622 |
return {
|
| 623 |
'answer': 'No relevant information found in the document for this question.',
|
|
|
|
| 666 |
# Initialize the system
|
| 667 |
high_performance_system = HighPerformanceSystem()
|
| 668 |
|
| 669 |
+
def process_hackathon_submission(url, questions_text):
|
| 670 |
+
"""Process hackathon submission format"""
|
| 671 |
+
if not url or not questions_text:
|
| 672 |
+
return "Please provide both document URL and questions."
|
| 673 |
+
|
| 674 |
+
try:
|
| 675 |
+
# Try to parse as JSON first
|
| 676 |
+
if questions_text.strip().startswith('[') and questions_text.strip().endswith(']'):
|
| 677 |
+
questions = json.loads(questions_text)
|
| 678 |
+
else:
|
| 679 |
+
# Split by lines if not JSON
|
| 680 |
+
questions = [q.strip() for q in questions_text.split('\n') if q.strip()]
|
| 681 |
+
|
| 682 |
+
if not questions:
|
| 683 |
+
return "No valid questions found. Please provide questions as JSON array or one per line."
|
| 684 |
+
|
| 685 |
+
# Process document
|
| 686 |
+
doc_result = high_performance_system.process_document_optimized(url)
|
| 687 |
+
if not doc_result.get("success"):
|
| 688 |
+
return f"Document processing failed: {doc_result.get('error')}"
|
| 689 |
+
|
| 690 |
+
# Process questions
|
| 691 |
+
batch_result = high_performance_system.process_batch_queries_optimized(questions)
|
| 692 |
+
|
| 693 |
+
# Format as hackathon response
|
| 694 |
+
hackathon_response = {
|
| 695 |
+
"answers": [answer['answer'] for answer in batch_result['answers']],
|
| 696 |
+
"metadata": {
|
| 697 |
+
"processing_time": batch_result['processing_time'],
|
| 698 |
+
"chunks_created": doc_result['chunks_created'],
|
| 699 |
+
"total_questions": len(questions),
|
| 700 |
+
"model_info": "Qwen2.5-1.5B-Instruct (CPU-optimized)"
|
| 701 |
+
}
|
| 702 |
+
}
|
| 703 |
+
|
| 704 |
+
return json.dumps(hackathon_response, indent=2)
|
| 705 |
+
|
| 706 |
+
except json.JSONDecodeError as e:
|
| 707 |
+
return f"JSON parsing error: {str(e)}. Please provide valid JSON array or one question per line."
|
| 708 |
+
except Exception as e:
|
| 709 |
+
return f"Error processing submission: {str(e)}"
|
| 710 |
+
|
| 711 |
+
def process_single_question(url, question):
|
| 712 |
+
"""Process single question with detailed response"""
|
| 713 |
+
if not url or not question:
|
| 714 |
+
return "Please provide both document URL and question."
|
| 715 |
+
|
| 716 |
+
try:
|
| 717 |
+
# Process document
|
| 718 |
+
doc_result = high_performance_system.process_document_optimized(url)
|
| 719 |
+
if not doc_result.get("success"):
|
| 720 |
+
return f"Document processing failed: {doc_result.get('error')}"
|
| 721 |
+
|
| 722 |
+
# Process single question
|
| 723 |
+
result = high_performance_system.process_single_query_optimized(question)
|
| 724 |
+
|
| 725 |
+
# Format detailed response
|
| 726 |
+
detailed_response = {
|
| 727 |
+
"question": question,
|
| 728 |
+
"answer": result['answer'],
|
| 729 |
+
"confidence": result['confidence'],
|
| 730 |
+
"reasoning": result['reasoning'],
|
| 731 |
+
"metadata": {
|
| 732 |
+
"processing_time": f"{result['processing_time']:.2f}s",
|
| 733 |
+
"source_chunks": result['source_chunks'],
|
| 734 |
+
"token_count": result['token_count'],
|
| 735 |
+
"document_stats": {
|
| 736 |
+
"chunks_created": doc_result['chunks_created'],
|
| 737 |
+
"total_words": doc_result['total_words'],
|
| 738 |
+
"processing_time": f"{doc_result['processing_time']:.2f}s"
|
| 739 |
+
}
|
| 740 |
+
}
|
| 741 |
+
}
|
| 742 |
+
|
| 743 |
+
return json.dumps(detailed_response, indent=2)
|
| 744 |
+
|
| 745 |
+
except Exception as e:
|
| 746 |
+
return f"Error processing question: {str(e)}"
|
| 747 |
+
|
| 748 |
def hackathon_wrapper(url, questions_text):
|
| 749 |
"""Wrapper to show processing status for the hackathon tab."""
|
| 750 |
# Show status message
|
|
|
|
| 767 |
# Hide status message and return the final result
|
| 768 |
yield gr.Markdown(visible=False), result
|
| 769 |
|
| 770 |
+
# --- Gradio Interface (CPU-Optimized) ---
|
|
|
|
|
|
|
| 771 |
with gr.Blocks(
|
| 772 |
theme=gr.themes.Soft(
|
| 773 |
primary_hue="indigo",
|
|
|
|
| 983 |
# --- Header ---
|
| 984 |
gr.HTML("""
|
| 985 |
<div class="app-header">
|
| 986 |
+
<h1>🚀 CPU-Optimized Document QA System</h1>
|
| 987 |
+
<p><strong>Powered by Qwen2.5-1.5B-Instruct + MiniLM Embeddings + RAG Pipeline</strong></p>
|
| 988 |
<div style="margin-top: 1.5rem;">
|
| 989 |
<span class="feature-badge">🔒 Insurance Documents</span>
|
| 990 |
<span class="feature-badge">⚖️ Legal Analysis</span>
|
| 991 |
<span class="feature-badge">👥 HR Compliance</span>
|
| 992 |
<span class="feature-badge">📊 Smart Extraction</span>
|
| 993 |
+
<span class="feature-badge">💻 CPU Optimized</span>
|
| 994 |
</div>
|
| 995 |
</div>
|
| 996 |
""")
|
|
|
|
| 999 |
gr.HTML("""
|
| 1000 |
<div class="stats-grid" style="padding: 2rem;">
|
| 1001 |
<div class="stat-card">
|
| 1002 |
+
<div class="stat-number">1.5B</div>
|
| 1003 |
<div class="stat-label">Parameters</div>
|
| 1004 |
</div>
|
| 1005 |
<div class="stat-card">
|
| 1006 |
+
<div class="stat-number">CPU</div>
|
| 1007 |
+
<div class="stat-label">Optimized</div>
|
| 1008 |
</div>
|
| 1009 |
<div class="stat-card">
|
| 1010 |
+
<div class="stat-number">< 5s</div>
|
| 1011 |
<div class="stat-label">Response Time</div>
|
| 1012 |
</div>
|
| 1013 |
<div class="stat-card">
|
|
|
|
| 1130 |
# --- Footer ---
|
| 1131 |
gr.HTML("""
|
| 1132 |
<div style="text-align: center; padding: 2rem; color: #64748b; border-top: 1px solid #e2e8f0; margin-top: 2rem;">
|
| 1133 |
+
<p><strong>⚡ CPU-Optimized for Hugging Face Spaces</strong></p>
|
| 1134 |
+
<p>Built with advanced RAG architecture for maximum accuracy on CPU hardware</p>
|
| 1135 |
</div>
|
| 1136 |
""")
|
| 1137 |
|
|
|
|
| 1161 |
outputs=[single_url, single_question, single_output, single_status]
|
| 1162 |
)
|
| 1163 |
|
| 1164 |
+
# Queue for better performance on Spaces
|
| 1165 |
+
demo.queue(concurrency_count=1, max_size=5)
|
| 1166 |
+
|
| 1167 |
+
# For Hugging Face Spaces deployment - mount the FastAPI app with Gradio
|
| 1168 |
app = gr.mount_gradio_app(api_app, demo, path="/")
|
| 1169 |
|
| 1170 |
+
# For local development only
|
| 1171 |
if __name__ == "__main__":
|
| 1172 |
+
# This will be ignored on Spaces - Spaces auto-detects and launches Gradio apps
|
| 1173 |
+
demo.launch(
|
| 1174 |
+
server_name="0.0.0.0",
|
| 1175 |
+
server_port=7860,
|
| 1176 |
+
share=False,
|
| 1177 |
+
show_error=True
|
| 1178 |
+
)
|
requirements.txt
CHANGED
|
@@ -1,25 +1,11 @@
|
|
| 1 |
-
|
| 2 |
-
transformers
|
| 3 |
-
torch
|
| 4 |
-
torchvision
|
| 5 |
-
sentence-transformers
|
| 6 |
-
faiss-cpu
|
| 7 |
-
sentencepiece
|
| 8 |
-
|
| 9 |
-
# Document processing
|
| 10 |
-
PyPDF2
|
| 11 |
-
python-docx
|
| 12 |
-
|
| 13 |
-
# Web framework and API
|
| 14 |
-
gradio
|
| 15 |
fastapi
|
| 16 |
uvicorn
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
numpy
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
bitsandbytes
|
|
|
|
| 1 |
+
gradio>=4.0.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
fastapi
|
| 3 |
uvicorn
|
| 4 |
+
transformers>=4.38.0
|
| 5 |
+
sentence-transformers
|
| 6 |
+
faiss-cpu
|
| 7 |
numpy
|
| 8 |
+
requests
|
| 9 |
+
pypdf2
|
| 10 |
+
python-docx
|
| 11 |
+
torch==2.3.1
|
|
|