diff --git "a/app.py" "b/app.py" --- "a/app.py" +++ "b/app.py" @@ -1,2787 +1,2787 @@ -import json -import re -import random -import sqlite3 as sql -from datetime import datetime -import string -import os -import tempfile -from PIL import Image -import pytesseract -import fitz -from werkzeug.utils import secure_filename -from flask import Flask, request, jsonify, redirect, url_for, render_template_string -from pathlib import Path -import requests - -# Import enhanced RAG utilities -from enhanced_rag_utils import get_comprehensive_context, format_context_for_prompt - -try: - from groq import Groq -except ImportError: - Groq = None - -app = Flask(__name__) -global_parameters = [] -global_json_template = {} - -# Enhanced system prompt with comprehensive QC requirements -ENHANCED_SYSTEM_PROMPT = """ -You are the Swift Check AI assistant, specialized in creating comprehensive Quality Control (QC) checklists and inspection documents for food products with full regulatory compliance. - -# CONTEXT: -You'll help users generate custom QC parameters for various food products following Al Kabeer Group's professional standards. The parameters will be used in quality inspection checklists that QC inspectors fill during product inspections, with full regulatory backing and clause references. - -# COMPREHENSIVE QC CHECKLIST REQUIREMENTS: - -## For Food Products, ALWAYS include these categories (MINIMUM 15+ PARAMETERS): - -### 1. Physical Parameters (4-5 parameters) -- Appearance (Image Upload + Toggle): Color, visual defects, physical state with photo evidence -- Texture (Dropdown + Remarks): Firmness, consistency, crispness with detailed observations -- Size/Dimensions (Numeric Input): Length, width, diameter with tolerance specs (e.g., "60±5mm") -- Weight (Numeric Input): Individual/batch weight with tolerance (e.g., "25±2g") -- Shape (Dropdown): Uniformity, deformation assessment - -### 2. Sensory Parameters (3-4 parameters) -- Flavor/Taste (Dropdown + Remarks): Characteristic flavors, off-tastes, intensity -- Aroma/Odor (Dropdown + Remarks): Normal smell, off-odors, freshness -- Mouthfeel (Dropdown): For applicable products (texture after cooking) -- Overall Sensory Assessment (Toggle): Acceptable/Not Acceptable - -### 3. Safety Parameters (4-5 parameters) -- Foreign Objects (Checklist + Image Upload): MUST include comprehensive list: stones, glass, metals, plastic, wood, insects/pests, hair, threads, paper, bones, feathers -- Microbiological Specifications (Table/Numeric Input): Total Plate Count, E.coli, Salmonella, etc. with limits -- Chemical Contaminants (Numeric Input): Heavy metals, pesticides if applicable with ppm limits -- Allergen Declaration (Checklist): All 14 major allergens verification -- Metal Detection Results (Text Input + Toggle): Fe, Non-Fe, SS readings with pass/fail - -### 4. Product-Specific Parameters (2-3 parameters) -- For filled products: Filling weight ratio, filling consistency -- For fried products: Oil absorption, crispness level -- For frozen products: Freezer burn check, ice crystals, clustering -- For baked products: Browning level, doneness, internal temperature - -### 5. Packaging Parameters (3-4 parameters) -- Packaging Integrity (Image Upload + Checklist): Sealing, tears, punctures, label accuracy with photo -- Net Weight Verification (Numeric Input): Package weight vs declared weight with tolerance -- Date Verification (Text Input): Best before date, production date accuracy -- Batch/Lot Traceability (Text Input): Batch code, lot number verification - -### 6. Process Control Parameters (2-3 parameters) -- Temperature Control (Numeric Input): Processing, storage, transport temperatures with specs -- Time Parameters (Numeric Input): Processing time, cooling time with specifications -- Equipment Calibration (Toggle + Text Input): Calibration status, last calibration date - -### 7. Compliance & Documentation (2-3 parameters) -- Regulatory Compliance (Checklist): HACCP, Dubai Municipality, ISO requirements -- Documentation Complete (Toggle): All required certificates present -- Inspector Assessment (Toggle + Remarks): Overall quality assessment with detailed remarks - -# PARAMETER TYPES AND INTELLIGENT SELECTION: - -## Image Upload - USE FOR: -- Visual inspections (appearance, defects, packaging condition) -- Evidence documentation (defects, foreign objects) -- Label verification and batch code photos -- Before/after cooking comparisons - -## Toggle - USE FOR: -- Pass/fail decisions (acceptable/not acceptable) -- Present/absent checks (clustering, defects) -- Compliance status (passed/failed) -- Binary quality assessments - -## Checklist - USE FOR: -- Foreign objects (comprehensive list of all possible contaminants) -- Allergens (all 14 major allergens) -- Packaging defects (multiple possible issues) -- Compliance requirements (multiple standards) -- Multi-item verification lists - -## Numeric Input - USE FOR: -- Measurements WITH specifications and units -- Weight: "25±2g", "165±5g" -- Dimensions: "60±5mm length", "7-8 inch diameter" -- Temperature: "-18°C ±2°C", "180°C ±10°C" -- Microbiological limits: "<10^4 CFU/g", "<10^2" -- Chemical limits: "<0.1ppm", "<0.10ppm" -- Time measurements: "2-3 minutes", "30±5 seconds" - -## Text Input - USE FOR: -- Alphanumeric data entry -- Batch numbers, lot codes -- Production dates, expiry dates -- Supplier codes, product codes -- Equipment serial numbers - -## Remarks - USE FOR: -- Detailed observations requiring explanation -- Corrective actions taken -- Special conditions noted -- Inspector additional comments -- Non-conformance descriptions - -# REGULATORY COMPLIANCE: -- Include specific clause references for each parameter when available -- Reference Dubai Municipality guidelines, HACCP principles, ISO standards -- Ensure traceability requirements are met -- Include metal detection and allergen management as per UAE regulations - -# OUTPUT FORMAT: -Provide comprehensive, actionable parameters with: -- Minimum 15+ parameters covering all categories above -- Appropriate types based on intelligent selection rules -- Realistic specifications with proper units and tolerances -- Comprehensive options for dropdowns/checklists -- Clause references where applicable (e.g., "Dubai Municipality Section 4.2.1") -- Professional formatting matching Al Kabeer Group standards - -Remember: Generate PROFESSIONAL, COMPREHENSIVE checklists that match Al Kabeer Group's quality standards with full regulatory compliance and intelligent parameter type selection. -""" - -# Enhanced default refine prompt -ENHANCED_DEFAULT_REFINE_PROMPT = """ -Create a comprehensive professional food quality control checklist for the specified product following Al Kabeer Group standards. Include a MINIMUM of 15+ parameters that cover: - -1. PHYSICAL ATTRIBUTES: Appearance (with photo), texture, dimensions, weight with precise tolerance limits -2. SENSORY EVALUATION: Flavor, aroma, taste, mouthfeel characteristics with detailed assessment -3. SAFETY PARAMETERS: Comprehensive foreign objects checklist, microbiological specifications, chemical contaminants, allergen verification -4. PRODUCT-SPECIFIC CHECKS: Based on processing method (frozen, fried, baked, filled, etc.) with specialized parameters -5. PACKAGING INTEGRITY: Visual inspection with photos, seal quality, labeling accuracy, weight verification -6. PROCESS CONTROL: Temperature monitoring, time parameters, equipment calibration status -7. COMPLIANCE VERIFICATION: HACCP principles, Dubai Municipality requirements, ISO standards, traceability -8. DOCUMENTATION: Batch codes, production dates, certificates, inspector assessment - -Use intelligent parameter type selection: -- Image Upload for visual inspections and evidence documentation -- Toggle for pass/fail and binary assessments -- Checklist for foreign objects, allergens, and multi-item verifications -- Numeric Input for all measurements with proper specifications and units -- Text Input for codes, dates, and identifiers -- Remarks for detailed observations and corrective actions - -Include specific regulatory clause references where applicable and ensure professional formatting that matches Al Kabeer Group's quality standards. -""" - -# Enhanced digitization system prompt -ENHANCED_DIGITIZE_SYSTEM_PROMPT = """ -You are the Swift Check AI digitization assistant. Your job is to analyze OCR-extracted text from scanned QC checklists and convert them into structured parameters for comprehensive food safety and quality control checklists. - -# YOUR TASKS: -1. Recognize and preserve table structures and section headings -2. Identify quality control parameters with their proper input types -3. Extract specifications, tolerance limits, and measurement units -4. Determine appropriate parameter types based on content analysis -5. Maintain professional formatting and organization - -# INTELLIGENT PARAMETER TYPE DETECTION: - -## Image Upload - DETECT FOR: -- Parameters mentioning "photo", "attach", "capture", "visual", "appearance" -- Instructions like "attach photos", "capture variations" -- Visual inspection requirements - -## Toggle - DETECT FOR: -- Binary choices: "Acceptable/Non-acceptable", "Present/Absent", "Pass/Fail" -- "Yes/No" type assessments -- Simple pass/fail criteria - -## Checklist - DETECT FOR: -- Lists of items to verify (foreign objects, allergens, defects) -- Multiple related items that can be selected simultaneously -- Categories with sub-items - -## Numeric Input - DETECT FOR: -- Measurements with units and tolerances -- Temperature readings, weights, dimensions -- Time durations, counts, percentages -- Values with specifications like "±5g", "<10^4", "2-3 minutes" - -## Text Input - DETECT FOR: -- Codes, dates, identifiers -- Batch numbers, lot codes -- Names, locations, serial numbers - -## Remarks - DETECT FOR: -- "Remarks", "Comments", "Observations", "Notes" -- Areas requiring detailed explanations -- Corrective action descriptions - -# TABLE STRUCTURE RECOGNITION: -- Preserve section headings like "ORGANOLEPTIC EVALUATION", "COOKING DETAILS", "PACKAGING & FREEZING" -- Maintain parameter groupings and logical flow -- Keep tolerance limits and specifications with their parameters -- Preserve professional formatting structure - -# OUTPUT FORMAT: -Provide a comprehensive JSON array with intelligent parameter type selection: -[ - { - "Parameter": "Actual Parameter Name from Document", - "Type": "Intelligently Selected Type", - "Spec": "Extracted specifications with units", - "DropdownOptions": "Specific options from document", - "ChecklistOptions": "Comprehensive list items", - "IncludeRemarks": "Yes/No based on parameter complexity", - "Section": "Document section/category", - "ClauseReference": "Regulatory reference if identified" - } -] - -Focus on creating comprehensive, professional parameters that maintain the structure and intelligence of the original document while using appropriate modern input types. -""" - -def init_db(): - """Initialize database tables - runs once when app starts""" - con = sql.connect("swift_check.db") - cur = con.cursor() - cur.execute(""" - CREATE TABLE IF NOT EXISTS qc_requests ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - doc_type TEXT NOT NULL, - product_name TEXT NOT NULL, - supplier_name TEXT NOT NULL, - created_at DATETIME DEFAULT CURRENT_TIMESTAMP, - user_message TEXT - )""") - - cur.execute(""" - CREATE TABLE IF NOT EXISTS llm_responses ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - request_id INTEGER, - llm_response TEXT, - summary_text TEXT, - created_at DATETIME DEFAULT CURRENT_TIMESTAMP, - FOREIGN KEY (request_id) REFERENCES qc_requests(id) - )""") - - cur.execute(""" - CREATE TABLE IF NOT EXISTS parameters ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - request_id INTEGER, - parameter_name TEXT, - type TEXT, - spec TEXT, - dropdown_options TEXT, - include_remarks TEXT, - section TEXT, - clause_reference TEXT, - created_at DATETIME DEFAULT CURRENT_TIMESTAMP, - FOREIGN KEY (request_id) REFERENCES qc_requests(id) - )""") - - cur.execute(""" - CREATE TABLE IF NOT EXISTS json_templates ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - request_id INTEGER, - template_json TEXT, - created_at DATETIME DEFAULT CURRENT_TIMESTAMP, - FOREIGN KEY (request_id) REFERENCES qc_requests(id) - )""") - - con.commit() - con.close() - -init_db() - -def extract_top_level_json_array(text): - """Extract the first top-level JSON array from text""" - start = text.find('[') - if start == -1: - return "" - balance = 0 - end = start - for i in range(start, len(text)): - char = text[i] - if char == '[': - balance += 1 - elif char == ']': - balance -= 1 - if balance == 0: - end = i - break - return text[start:end+1] - -def enhanced_call_groq_llm(user_message, doc_type, product_name, supplier_name, existing_parameters=None, is_digitization=False): - """ - Enhanced Groq LLM call with comprehensive RAG support. - Retrieves context from all 3 VDBs before calling the LLM. - """ - if not Groq: - return "Groq LLM call failed: 'groq' library not found or not installed." - - GROQ_API_KEY = "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" - domain = "Food Manufacturing" - - # Get comprehensive context from all VDBs - print(f"🔍 Retrieving comprehensive context for: {product_name}") - comprehensive_context = get_comprehensive_context(product_name, domain) - - # Format context for prompt - formatted_context = format_context_for_prompt(comprehensive_context, max_length=4500) - - # Generate header and supplier info - header_text = f"{product_name} {doc_type}" - supplier_info = f"Supplier Name: {supplier_name}" - - # Check if user message contains reference document content - has_reference = "Reference document content" in user_message - - # Select appropriate system prompt - if is_digitization: - system_instructions = ENHANCED_DIGITIZE_SYSTEM_PROMPT - else: - system_instructions = ENHANCED_SYSTEM_PROMPT - - # Enhanced context with regulatory compliance focus - enhanced_context = formatted_context - - if has_reference: - enhanced_context += f""" - -**CRITICAL DIGITIZATION GUIDANCE**: The reference document content is provided to understand the STRUCTURE and PROFESSIONAL FORMAT of QC parameters. - -Use the reference to identify: -1. Section headings and table structures (preserve them) -2. Parameter types and their appropriate input methods -3. Tolerance specifications and measurement units -4. Professional formatting and organization -5. Regulatory compliance requirements - -Create parameters with values, specifications, and input types SPECIFIC to {product_name} while maintaining the professional structure and comprehensive coverage of the reference document. - """ - else: - enhanced_context += f""" - -For {product_name}, ensure you include MINIMUM 15+ parameters covering these MANDATORY categories: - -1. **Physical Parameters** (4-5): Appearance (Image+Toggle), Texture (Dropdown+Remarks), Dimensions (Numeric), Weight (Numeric), Shape (Dropdown) -2. **Sensory Parameters** (3-4): Flavor (Dropdown+Remarks), Aroma (Dropdown+Remarks), Mouthfeel (Dropdown), Overall Sensory (Toggle) -3. **Safety Parameters** (4-5): Foreign Objects (Checklist+Image), Microbiological (Numeric), Chemical (Numeric), Allergens (Checklist), Metal Detection (Text+Toggle) -4. **Product-Specific** (2-3): Based on product type (frozen, fried, baked, etc.) -5. **Packaging** (3-4): Integrity (Image+Checklist), Weight Verification (Numeric), Date Verification (Text), Batch Traceability (Text) -6. **Process Control** (2-3): Temperature (Numeric), Time (Numeric), Equipment (Toggle+Text) -7. **Compliance** (2-3): Regulatory (Checklist), Documentation (Toggle), Inspector Assessment (Toggle+Remarks) - -**INTELLIGENT TYPE SELECTION RULES:** -- Image Upload: Visual inspections, appearance, defects, evidence documentation -- Toggle: Pass/fail, acceptable/not acceptable, present/absent, binary assessments -- Checklist: Foreign objects (stones, glass, metals, plastic, wood, insects, hair, threads), allergens (all 14), packaging defects, compliance items -- Numeric Input: ALL measurements with specifications and units (e.g., "Weight: 25±2g", "Temperature: -18°C ±2°C") -- Text Input: Codes, dates, identifiers, batch numbers -- Remarks: Detailed observations, corrective actions, complex assessments - -**REGULATORY COMPLIANCE:** -Based on retrieved context, ensure compliance with: {', '.join([req['regulatory_body'] + ' ' + req.get('clause_reference', req.get('standard_code', '')) for req in comprehensive_context.get('regulatory_requirements', [])[:3]])} - -**PROFESSIONAL FORMATTING:** -Match Al Kabeer Group's quality standards with proper section organization, comprehensive coverage, and intelligent parameter type selection. - """ - - # Construct the final system prompt - final_system_prompt = f""" -{system_instructions} - -User context: -- Doc Type: {doc_type} -- Product: {product_name} -- Supplier: {supplier_name} -- Generated Header: {header_text} -- Supplier Info: {supplier_info} - -{enhanced_context} - -**VALID PARAMETER TYPES:** -Checklist, Dropdown, Image Upload, Remarks, Text Input, Numeric Input, Toggle - -**MANDATORY REQUIREMENTS:** -1. MINIMUM 15+ parameters for comprehensive coverage -2. Use intelligent type selection based on parameter purpose -3. Include specifications with units for ALL Numeric Input parameters -4. Provide comprehensive options for Checklist and Dropdown parameters -5. Add clause references where regulatory compliance is required -6. Include section organization for professional formatting -7. Add "IncludeRemarks": "Yes" for complex parameters requiring detailed observations - -**OUTPUT INSTRUCTIONS:** -1. Provide a brief summary describing the comprehensive QC parameters created. -2. Then produce a bracketed JSON array with intelligent parameter selection. - Example: - [ - {{ - "action": "add", - "Parameter": "Product Appearance", - "Type": "Image Upload", - "Spec": "Visual inspection with photo evidence", - "DropdownOptions": "", - "ChecklistOptions": "", - "IncludeRemarks": "Yes", - "Section": "Physical Parameters", - "ClauseReference": "Dubai Municipality Section 4.1.2" - }}, - {{ - "action": "add", - "Parameter": "Foreign Objects Detection", - "Type": "Checklist", - "Spec": "Zero tolerance for all foreign materials", - "ChecklistOptions": "Stones, Glass, Metals, Plastic, Wood, Insects/Pests, Hair, Threads, Paper, Bones, Feathers", - "IncludeRemarks": "Yes", - "Section": "Safety Parameters", - "ClauseReference": "HACCP Principle 2" - }}, - {{ - "action": "add", - "Parameter": "Net Weight", - "Type": "Numeric Input", - "Spec": "25±2g per piece", - "DropdownOptions": "", - "IncludeRemarks": "No", - "Section": "Physical Parameters" - }} - ] -""" - - messages = [ - {"role": "system", "content": final_system_prompt}, - {"role": "user", "content": user_message}, - ] - - client = Groq(api_key=GROQ_API_KEY) - - try: - response = client.chat.completions.create( - messages=messages, - model="llama-3.3-70b-versatile", - stream=False, - temperature=0.2 # Lower temperature for more consistent professional output - ) - return response.choices[0].message.content.strip() - except Exception as e: - return f"Groq LLM call failed: {str(e)}" - -def parse_llm_changes(llm_text): - """Parse LLM response into summary and changes""" - json_array_text = extract_top_level_json_array(llm_text) - changes = [] - if json_array_text: - try: - changes = json.loads(json_array_text) - except Exception as e: - print("JSON parse error:", e) - summary_text = llm_text.replace(json_array_text, "").strip() if json_array_text else llm_text.strip() - return summary_text, changes - -def apply_changes_to_params(parameters, changes): - """Apply changes to parameters with enhanced parameter handling""" - valid_types = ["Checklist", "Dropdown", "Image Upload", "Remarks", "Text Input", "Numeric Input", "Toggle"] - - for change in changes: - if not isinstance(change, dict): - print(f"Skipping non-dict change: {change}") - continue - - action = change.get("action", "").lower() - p_name = change.get("Parameter", "Unnamed") - options = change.get("DropdownOptions", "") - checklist_options = change.get("ChecklistOptions", "") - - # Handle both DropdownOptions and ChecklistOptions - if not options and checklist_options: - options = checklist_options - if isinstance(options, list): - options = ", ".join(options) - - if action == "add": - new_type = change.get("Type", "Text Input") - if new_type not in valid_types: - new_type = "Text Input" - - new_param = { - "Parameter": p_name, - "Type": new_type, - "Spec": change.get("Spec", ""), - "DropdownOptions": options, - "IncludeRemarks": change.get("IncludeRemarks", "No"), - "Section": change.get("Section", "General"), - "ClauseReference": change.get("ClauseReference", "") - } - parameters.append(new_param) - - elif action == "remove": - parameters[:] = [p for p in parameters if p["Parameter"].lower() != p_name.lower()] - - elif action == "update": - for p in parameters: - if p["Parameter"].lower() == p_name.lower(): - new_type = change.get("Type", "Text Input") - if new_type not in valid_types: - new_type = "Text Input" - p["Type"] = new_type - p["Spec"] = change.get("Spec", "") - p["DropdownOptions"] = options - p["IncludeRemarks"] = change.get("IncludeRemarks", "No") - p["Section"] = change.get("Section", "General") - p["ClauseReference"] = change.get("ClauseReference", "") - break - - return parameters - -def generate_enhanced_json_template(doc_type, product_name, supplier_name, parameters): - """ - Enhanced JSON template generation with intelligent parameter type handling. - """ - header_text = f"{product_name} {doc_type}" - template = { - "templateId": "neY5j", - "isDrafted": False, - "pageStyle": { - "margin": { - "top": 10, - "bottom": 10, - "left": 10, - "right": 10 - }, - "showPageNumber": False, - "headerImgUrl": "", - "fotterImgUrl": "" - }, - "pageToolsDataList": [], - "workflowInfo": { - "currentState": "Draft", - "approvalStates": ["Draft", "Under Review", "Approved", "Rejected"], - "currentApprover": { - "userId": "user123", - "name": "Ashish Kumar", - "role": "QC Manager" - }, - "previousApprovers": [ - { - "userId": "user456", - "name": "Raj Singh", - "role": "QC Supervisor", - "approvalDate": "2025-05-01T10:30:00Z", - "status": "Approved", - "comments": "Looks good to me." - } - ], - "nextApprovers": [ - { - "userId": "user789", - "name": "Priya Patel", - "role": "CEO" - } - ] - } - } - - def generate_tool_id(): - return ''.join(random.choices(string.ascii_lowercase + string.digits, k=5)) - - # Add main header - title_text = header_text - heading_tool = { - "toolId": generate_tool_id(), - "toolType": "HEADING", - "textData": { - "text": title_text, - "isBold": True, - "isItalic": False, - "isUnderlined": False, - "textAliend": "LEFT", - "color": 4294967295, # White - "fontSize": 14 - }, - "boxData": { - "fillColor": 4288111521, # Blue background - "borderEnable": False, - "borderColor": 4294967295, - "borderWidth": 0.8, - "boxAlignment": "CENTER_LEFT", - "cornerRadius": { - "topLeft": 0, - "topRight": 0, - "bottomLeft": 0, - "bottomRight": 0 - }, - "padding": { - "top": 4, - "bottom": 4, - "left": 9, - "right": 4 - }, - "margin": { - "top": 0, - "bottom": 0, - "left": 0, - "right": 0 - } - }, - "toolWidth": 1.7976931348623157e+308 - } - template["pageToolsDataList"].append(heading_tool) - - # Add supplier information - supplier_text = { - "toolId": generate_tool_id(), - "toolType": "TEXT", - "textData": { - "text": f"Supplier Name: {supplier_name}", - "isBold": True, - "isItalic": False, - "isUnderlined": False, - "textAliend": "LEFT", - "color": 4278190080, # Black - "fontSize": 12 - }, - "toolHeight": 30, - "toolWidth": 1.7976931348623157e+308 - } - template["pageToolsDataList"].append(supplier_text) - - # Group parameters by section for better organization - sections = {} - for param in parameters: - section = param.get("Section", "General Parameters") - if section not in sections: - sections[section] = [] - sections[section].append(param) - - # Add parameters organized by sections - for section_name, section_params in sections.items(): - # Add section header - if section_name != "General Parameters": - section_header = { - "toolId": generate_tool_id(), - "toolType": "TEXT", - "textData": { - "text": section_name.upper(), - "isBold": True, - "isItalic": False, - "isUnderlined": True, - "textAliend": "LEFT", - "color": 4283215696, # Green - "fontSize": 13 - }, - "toolHeight": 35, - "toolWidth": 1.7976931348623157e+308 - } - template["pageToolsDataList"].append(section_header) - - # Add parameters in this section - for param in section_params: - param_name = param.get("Parameter", "") - param_type = param.get("Type", "Text Input") - spec = param.get("Spec", "") - options = param.get("DropdownOptions", "") - include_remarks = param.get("IncludeRemarks", "No") - clause_ref = param.get("ClauseReference", "") - - # Create display name with clause reference - display_name = param_name - if clause_ref: - display_name += f" ({clause_ref})" - - # Split options into a list if it's a string - option_list = [] - if isinstance(options, str) and options.strip(): - option_list = [opt.strip() for opt in options.split(",") if opt.strip()] - - # ENHANCED PARAMETER TYPE HANDLING - if param_type == "Image Upload": - # Create image upload tool with toggle - image_tool = { - "toolId": generate_tool_id(), - "toolType": "IMAGE", - "imageLableData": { - "text": display_name + ":", - "isBold": True, - "isItalic": False, - "isUnderlined": False, - "textAliend": "LEFT", - "fontSize": 14, - "lablePositioned": "LEFT", - "spacing": 10, - "txtColor": 4278190080, # Black - "showLable": True - }, - "imageData": { - "showImageUploadArea": True, - "width": 200, - "height": 150 - }, - "iconData": 57344, - "showIcon": False, - "iconCodePoint": 59729, - "iconSize": 30, - "iconColor": 4278190080, # Black - "toolHeight": 160, - "toolWidth": 1.7976931348623157e+308, - "showToggle": True, - "imageToggleData": { - "label": "Assessment", - "isBold": True, - "isItalic": False, - "isUnderlined": False, - "fontSize": 14, - "showLabel": True, - "enabledText": "Acceptable", - "disabledText": "Not Acceptable", - "enabledColor": 4283215696, # Green - "disabledColor": 4294198070, # Red - "isSelected": True - } - } - template["pageToolsDataList"].append(image_tool) - - elif param_type == "Toggle": - # Create toggle tool - toggle_tool = { - "toolId": generate_tool_id(), - "toolType": "TOGGLE", - "toggleData": { - "disabledColor": 4294198070, # Red - "disabledText": "Not Acceptable" if not option_list else option_list[1] if len(option_list) > 1 else "No", - "enabledColor": 4283215696, # Green - "enabledText": "Acceptable" if not option_list else option_list[0] if option_list else "Yes", - "showLabel": True, - "label": display_name, - "labelFontSize": 14, - "labelTextColor": 4278190080, # Black - "isBold": True, - "isItalic": False, - "isSelected": True, - "toggleTextFontSize": 12, - "toggleTextIsBold": False - }, - "toolWidth": 1.7976931348623157e+308, - "toolHeight": 80 - } - template["pageToolsDataList"].append(toggle_tool) - - elif param_type == "Dropdown": - # Create dropdown tool - dropdown_tool = { - "toolId": generate_tool_id(), - "toolType": "DROPDOWN", - "dropdownData": { - "hintText": f"Select {param_name.lower()}", - "hintTextColor": 4288585374, # Gray - "hintFontSize": 14, - "dropdownWidth": 350, - "spacingBetweeenLableAndDropdownWidth": 10, - "showLable": True, - "labelText": display_name, - "isBold": True, - "isItalic": False, - "isUnderlined": False, - "textAliend": "LEFT", - "lablePositioned": "TOP", - "labelFontSize": 14, - "lableTextColor": 4278190080, # Black - "numberOfOptions": len(option_list) if option_list else 3, - "optionFontSize": 14, - "optionTextColor": 4278190080, # Black - "optionLst": option_list if option_list else ["Acceptable", "Marginal", "Not Acceptable"], - "selectedOptionIndex": -1 - }, - "toolHeight": 90, - "toolWidth": 1.7976931348623157e+308 - } - template["pageToolsDataList"].append(dropdown_tool) - - elif param_type == "Checklist": - # Create checkbox tool for checklists - if not option_list: - option_list = ["Item 1", "Item 2", "Item 3"] - - checkbox_tool = { - "toolId": generate_tool_id(), - "toolType": "CHECKBOX", - "checkboxData": { - "numberOfCheckboxes": len(option_list), - "checkboxBgColor": 4294967295, # White - "spacing": 8, - "runSpacing": 8, - "checkboxTileWidth": 140, - "checkBoxAlignmentEnum": "HORIZONTAL", - "checkBoxButtonStyleEnum": "CHECKBOX", - "checkBoxPositionedEnum": "START", - "checkBoxSelectionModeEnum": "MULTIPLE", - "isBold": False, - "isItalic": False, - "isUnderlined": False, - "textAliend": "LEFT", - "fontSize": 13, - "lablePositioned": "LEFT", - "txtColor": 4278190080, # Black - "labelLst": option_list, - "showLable": True, - "selectedIndexLstForMultiSelect": [], - "selectedIndexForSingleSelect": 0 - }, - "toolWidth": 1.7976931348623157e+308, - "toolHeight": max(100, len(option_list) * 15 + 40) # Dynamic height based on items - } - - # Add section label for checklist - checklist_label = { - "toolId": generate_tool_id(), - "toolType": "TEXT", - "textData": { - "text": display_name + ":", - "isBold": True, - "isItalic": False, - "isUnderlined": False, - "textAliend": "LEFT", - "color": 4278190080, # Black - "fontSize": 14 - }, - "toolHeight": 25, - "toolWidth": 1.7976931348623157e+308 - } - template["pageToolsDataList"].append(checklist_label) - template["pageToolsDataList"].append(checkbox_tool) - - elif param_type == "Numeric Input": - # Create numeric input with specification - label_text = display_name - if spec: - label_text += f" (Spec: {spec})" - - numeric_tool = { - "toolId": generate_tool_id(), - "toolType": "TEXTAREA", - "lableData": { - "text": label_text + ":", - "isBold": True, - "isItalic": False, - "isUnderlined": False, - "textAliend": "LEFT", - "fontSize": 14, - "lablePositioned": "TOP_LEFT", - "spacing": 5, - "txtColor": 4278190080, # Black - "showLable": True - }, - "textAreaData": { - "isFilled": True, - "fillColor": 4292927712, # Light gray - "borderType": "UNDERLINED", - "storkStyle": "LINE", - "dummyTxt": "Enter numeric value" + (f" ({spec})" if spec else ""), - "borderColor": 4278190080, # Black - "isBold": False, - "isItalic": False, - "isUnderlined": False, - "fontSize": 12, - "txtColor": 4288585374 # Gray - }, - "toolHeight": 75, - "toolWidth": 1.7976931348623157e+308, - "toggleData": { - "label": "Status", - "isBold": True, - "isItalic": False, - "isUnderlined": False, - "fontSize": 12, - "showLabel": True, - "enabledText": "Within Spec", - "disabledText": "Out of Spec", - "enabledColor": 4283215696, # Green - "disabledColor": 4294198070, # Red - "isSelected": True - }, - "showToggle": True # Show toggle for spec compliance - } - template["pageToolsDataList"].append(numeric_tool) - - elif param_type == "Text Input": - # Create text input - text_tool = { - "toolId": generate_tool_id(), - "toolType": "TEXTAREA", - "lableData": { - "text": display_name + ":", - "isBold": True, - "isItalic": False, - "isUnderlined": False, - "textAliend": "LEFT", - "fontSize": 14, - "lablePositioned": "TOP_LEFT", - "spacing": 5, - "txtColor": 4278190080, # Black - "showLable": True - }, - "textAreaData": { - "isFilled": True, - "fillColor": 4292927712, # Light gray - "borderType": "UNDERLINED", - "storkStyle": "LINE", - "dummyTxt": "Enter " + param_name.lower(), - "borderColor": 4278190080, # Black - "isBold": False, - "isItalic": False, - "isUnderlined": False, - "fontSize": 12, - "txtColor": 4288585374 # Gray - }, - "toolHeight": 65, - "toolWidth": 1.7976931348623157e+308, - "showToggle": False - } - template["pageToolsDataList"].append(text_tool) - - elif param_type == "Remarks": - # Create remarks/textarea - remarks_tool = { - "toolId": generate_tool_id(), - "toolType": "TEXTAREA", - "lableData": { - "text": display_name + ":", - "isBold": True, - "isItalic": False, - "isUnderlined": False, - "textAliend": "LEFT", - "fontSize": 14, - "lablePositioned": "TOP_LEFT", - "spacing": 5, - "txtColor": 4278190080, # Black - "showLable": True - }, - "textAreaData": { - "isFilled": True, - "fillColor": 4292927712, # Light gray - "borderType": "UNDERLINED", - "storkStyle": "LINE", - "dummyTxt": "Enter detailed observations and remarks", - "borderColor": 4278190080, # Black - "isBold": False, - "isItalic": False, - "isUnderlined": False, - "fontSize": 12, - "txtColor": 4288585374 # Gray - }, - "toolHeight": 100, # Larger height for remarks - "toolWidth": 1.7976931348623157e+308, - "showToggle": False - } - template["pageToolsDataList"].append(remarks_tool) - - # Add additional remarks field if requested and not already a remarks parameter - if include_remarks == "Yes" and param_type != "Remarks": - additional_remarks = { - "toolId": generate_tool_id(), - "toolType": "TEXTAREA", - "lableData": { - "text": f"{param_name} - Additional Remarks:", - "isBold": False, - "isItalic": True, - "isUnderlined": False, - "textAliend": "LEFT", - "fontSize": 12, - "lablePositioned": "TOP_LEFT", - "spacing": 5, - "txtColor": 4278190080, # Black - "showLable": True - }, - "textAreaData": { - "isFilled": True, - "fillColor": 4292927712, # Light gray - "borderType": "UNDERLINED", - "storkStyle": "LINE", - "dummyTxt": "Additional observations or corrective actions", - "borderColor": 4278190080, # Black - "isBold": False, - "isItalic": False, - "isUnderlined": False, - "fontSize": 11, - "txtColor": 4288585374 # Gray - }, - "toolHeight": 60, - "toolWidth": 1.7976931348623157e+308, - "showToggle": False - } - template["pageToolsDataList"].append(additional_remarks) - - # Add final overall assessment section - final_assessment_header = { - "toolId": generate_tool_id(), - "toolType": "TEXT", - "textData": { - "text": "FINAL ASSESSMENT", - "isBold": True, - "isItalic": False, - "isUnderlined": True, - "textAliend": "CENTER", - "color": 4283215696, # Green - "fontSize": 14 - }, - "toolHeight": 35, - "toolWidth": 1.7976931348623157e+308 - } - template["pageToolsDataList"].append(final_assessment_header) - - # Overall quality assessment toggle - overall_toggle = { - "toolId": generate_tool_id(), - "toolType": "TOGGLE", - "toggleData": { - "disabledColor": 4294198070, # Red - "disabledText": "REJECTED", - "enabledColor": 4283215696, # Green - "enabledText": "APPROVED", - "showLabel": True, - "label": "Overall Quality Assessment", - "labelFontSize": 15, - "labelTextColor": 4278190080, # Black - "isBold": True, - "isItalic": False, - "isSelected": True, - "toggleTextFontSize": 14, - "toggleTextIsBold": True - }, - "toolWidth": 1.7976931348623157e+308, - "toolHeight": 100 - } - template["pageToolsDataList"].append(overall_toggle) - - # Inspector signature and date - inspector_info = { - "toolId": generate_tool_id(), - "toolType": "TEXTAREA", - "lableData": { - "text": "Inspector Name & Signature:", - "isBold": True, - "isItalic": False, - "isUnderlined": False, - "textAliend": "LEFT", - "fontSize": 14, - "lablePositioned": "TOP_LEFT", - "spacing": 5, - "txtColor": 4278190080, # Black - "showLable": True - }, - "textAreaData": { - "isFilled": True, - "fillColor": 4292927712, # Light gray - "borderType": "UNDERLINED", - "storkStyle": "LINE", - "dummyTxt": "Inspector name and signature", - "borderColor": 4278190080, # Black - "isBold": False, - "isItalic": False, - "isUnderlined": False, - "fontSize": 12, - "txtColor": 4288585374 # Gray - }, - "toolHeight": 80, - "toolWidth": 1.7976931348623157e+308, - "showToggle": False - } - template["pageToolsDataList"].append(inspector_info) - - # Final comprehensive remarks - final_remarks = { - "toolId": generate_tool_id(), - "toolType": "TEXTAREA", - "lableData": { - "text": "Final Comprehensive Remarks:", - "isBold": True, - "isItalic": False, - "isUnderlined": False, - "textAliend": "LEFT", - "fontSize": 14, - "lablePositioned": "TOP_LEFT", - "spacing": 5, - "txtColor": 4278190080, # Black - "showLable": True - }, - "textAreaData": { - "isFilled": True, - "fillColor": 4292927712, # Light gray - "borderType": "UNDERLINED", - "storkStyle": "LINE", - "dummyTxt": "Overall assessment, corrective actions, and additional observations", - "borderColor": 4278190080, # Black - "isBold": False, - "isItalic": False, - "isUnderlined": False, - "fontSize": 12, - "txtColor": 4288585374 # Gray - }, - "toolHeight": 120, - "toolWidth": 1.7976931348623157e+308, - "showToggle": False - } - template["pageToolsDataList"].append(final_remarks) - - return template - -# Enhanced OCR and text extraction functions -def enhanced_extract_text_from_document(filepath, file_ext): - """Enhanced text extraction with better table structure recognition""" - try: - extracted_text = "" - - if file_ext == 'pdf': - # Use PyMuPDF with enhanced table detection - pdf_document = fitz.open(filepath) - - for page_num in range(pdf_document.page_count): - page = pdf_document[page_num] - - # Try to extract text directly first - text = page.get_text() - - # If minimal text found, use OCR - if len(text.strip()) < 100: - # Convert page to high-quality image for OCR - mat = fitz.Matrix(3, 3) # Higher zoom for better OCR - pix = page.get_pixmap(matrix=mat) - img_data = pix.pil_tobytes(format="PNG") - - # Enhanced OCR with better table handling - from io import BytesIO - image = Image.open(BytesIO(img_data)) - - # Use OCR configuration optimized for tables - custom_config = r'--oem 3 --psm 6 -c preserve_interword_spaces=1' - text = pytesseract.image_to_string(image, config=custom_config) - - # Enhanced text processing to preserve table structure - processed_text = enhance_table_structure(text) - extracted_text += f"\n=== PAGE {page_num + 1} ===\n{processed_text}\n" - - pdf_document.close() - - else: # Image files - image = Image.open(filepath) - # Enhanced OCR for images with table preservation - custom_config = r'--oem 3 --psm 6 -c preserve_interword_spaces=1' - text = pytesseract.image_to_string(image, config=custom_config) - extracted_text = enhance_table_structure(text) - - return extracted_text.strip() - - except Exception as e: - print(f"Error during enhanced document processing: {str(e)}") - return None - -def enhance_table_structure(text): - """Enhance text to better preserve table structures and headings""" - if not text: - return text - - # Preserve important section headings - section_patterns = [ - (r'(ORGANOLEPTIC\s+EVALUATION)', r'\n## \1\n'), - (r'(COOKING\s+DETAILS)', r'\n## \1\n'), - (r'(PACKAGING\s*&\s*FREEZING)', r'\n## \1\n'), - (r'(FREEZING\s+DETAILS)', r'\n## \1\n'), - (r'(METAL\s+SCREENING)', r'\n## \1\n'), - (r'(SIZE\s+VARIATIONS)', r'\n## \1\n'), - (r'(COLOUR\s+VARIATIONS)', r'\n## \1\n'), - (r'(EVALUATION\s+OF\s+PASTRY)', r'\n## \1\n'), - (r'(FINAL\s+ASSESSMENT)', r'\n## \1\n'), - ] - - processed_text = text - for pattern, replacement in section_patterns: - processed_text = re.sub(pattern, replacement, processed_text, flags=re.IGNORECASE) - - # Preserve parameter-value pairs - param_patterns = [ - (r'([A-Za-z\s]+):\s*(Acceptable|Non-acceptable|Present|Absent|To be mentioned)', r'**\1**: \2'), - (r'([A-Za-z\s]+)\s+(Sam\s+\d+)', r'**\1** - \2'), - (r'(Temperature|Weight|Time|Dimension[s]?)[:\s]+([0-9\-\+\±°C\s\w]+)', r'**\1**: \2'), - ] - - for pattern, replacement in param_patterns: - processed_text = re.sub(pattern, replacement, processed_text, flags=re.IGNORECASE) - - # Clean up excessive whitespace while preserving structure - processed_text = re.sub(r'\n\s*\n\s*\n', '\n\n', processed_text) - processed_text = re.sub(r'[ \t]+', ' ', processed_text) - - return processed_text - -def enhanced_extract_metadata_from_ocr(ocr_text): - """Enhanced metadata extraction with better pattern recognition""" - # Enhanced document type detection - doc_type_patterns = { - r'(?i)(MALABAR\s*PARATHA.*INSPECTION)': "Malabar Paratha Inspection Record", - r'(?i)(GREEN\s*PEAS.*INSPECTION)': "Green Peas Inspection Record", - r'(?i)(VEGETABLE\s*SAMOSA.*INSPECTION)': "Vegetable Samosa Inspection Record", - r'(?i)(CONTAINER.*INSPECTION.*REPORT)': "Container Inspection Report", - r'(?i)quality\s*(?:control)?\s*checklist': "Quality Control Checklist", - r'(?i)inspection\s*(?:record|checklist)': "Inspection Checklist", - r'(?i)pre[\-\s]shipment.*inspection': "Pre-Shipment Inspection", - } - - detected_doc_type = "Quality Control Checklist" # Default - for pattern, doc_type in doc_type_patterns.items(): - if re.search(pattern, ocr_text): - detected_doc_type = doc_type - break - - # Enhanced product name extraction - product_patterns = [ - r'(?i)product\s*(?:name|description)[:\-\s]*([^\n]{1,50})', - r'(?i)(MALABAR\s*PARATHA)', - r'(?i)(GREEN\s*PEAS)', - r'(?i)(VEGETABLE\s*SAMOSA[S]?)', - r'(?i)(SWEET\s*CORN)', - ] - - detected_product = "Food Product" # Default - for pattern in product_patterns: - match = re.search(pattern, ocr_text) - if match: - detected_product = match.group(1).strip() - break - - # Enhanced supplier name extraction - supplier_patterns = [ - r'(?i)supplier\s*(?:name)?[:\-\s]*([^\n]{1,40})', - r'(?i)manufacturing\s*unit[:\-\s]*([^\n]{1,40})', - r'(?i)(AL\s*KABEER)', - r'(?i)(CASCADE\s*MARINE)', - r'(?i)(SAHAR\s*FOOD)', - ] - - detected_supplier = "" # Default empty - for pattern in supplier_patterns: - match = re.search(pattern, ocr_text) - if match: - detected_supplier = match.group(1).strip() - break - - return detected_doc_type, detected_product, detected_supplier - -# File upload handling -ALLOWED_EXTENSIONS = {'pdf', 'png', 'jpg', 'jpeg'} - -def allowed_file(filename): - """Check if file extension is allowed""" - return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS - -def fetch_json_from_firebase(firebase_json_url): - """Fetch JSON template from Firebase Storage URL""" - try: - response = requests.get(firebase_json_url) - if response.status_code == 200: - return response.json() - else: - return None - except Exception as e: - print(f"Error fetching JSON from Firebase: {str(e)}") - return None - -# API Routes -@app.route("/") -def index(): - return """ - - - Enhanced Swift Check LLM API - - - -
-

Enhanced Swift Check LLM API v2.0

- -
-
-

🎯 Enhanced Intelligence

-

15+ parameter minimum, smart type detection, comprehensive coverage

-
-
-

🏛️ Regulatory Compliance

-

Dubai Municipality, HACCP, ISO standards with clause references

-
-
-

📋 Professional Format

-

Al Kabeer Group standards, section organization, comprehensive structure

-
-
-

🔍 Advanced OCR

-

Table structure recognition, header preservation, intelligent parameter extraction

-
-
- -

API Endpoints:

- -
-

POST /refine - Create Enhanced QC Template

-

Creates comprehensive quality control templates with 15+ parameters, regulatory compliance, and intelligent type selection.

- -

Enhanced Features:

- - -

Parameters:

- -
- -
-

POST /edit - Edit with Enhanced Context

-

Modifies existing templates using comprehensive context and intelligent parameter optimization.

- -

Enhanced Features:

- -
- -
-

POST /digitize - Advanced Document Digitization

-

Enhanced OCR processing with table structure recognition and intelligent parameter extraction.

- -

Advanced Features:

- - -

Parameters (multipart/form-data):

- -
- -
-

GET /template/{request_id} - Get Enhanced Template JSON

-

Returns professionally formatted JSON templates with intelligent parameter types.

-
- -
-

GET /history - View Request History

-

Browse all QC requests with enhanced preview and download options.

-
- -
-

GET /test-rag - Test Enhanced RAG System

-

Test the comprehensive RAG system with all 3 vector databases.

-
- -

🚀 What's New in v2.0:

- -
- - - """ - -@app.route("/refine", methods=["POST"]) -def enhanced_refine_parameters(): - """Enhanced refine endpoint with comprehensive RAG and intelligent parameter generation""" - global global_parameters - global global_json_template - - print(">> Enhanced /refine route called <<") - - # Handle both form data and JSON - if request.content_type and request.content_type.startswith('multipart/form-data'): - data = { - "doc_type": request.form.get("doc_type", ""), - "product_name": request.form.get("product_name", ""), - "supplier_name": request.form.get("supplier_name", ""), - "user_message": request.form.get("user_message", "") - } - - # Handle file upload with enhanced OCR - uploaded_file = request.files.get('context_file') - file_context = "" - - if uploaded_file and allowed_file(uploaded_file.filename): - filename = secure_filename(uploaded_file.filename) - file_ext = filename.rsplit('.', 1)[1].lower() - - temp_dir = tempfile.mkdtemp() - filepath = os.path.join(temp_dir, filename) - uploaded_file.save(filepath) - - # Enhanced text extraction - extracted_text = enhanced_extract_text_from_document(filepath, file_ext) - - os.unlink(filepath) - os.rmdir(temp_dir) - - if extracted_text: - file_context = f"\n\nReference document content ({filename}):\n{extracted_text}" - print(f"✅ Enhanced OCR extracted {len(extracted_text)} characters from {filename}") - else: - file_context = f"\n\n[Failed to extract text from {filename}]" - - else: - data = request.get_json() - if not data: - return jsonify({"error": "No JSON payload found"}), 400 - file_context = "" - - # Validate required fields - doc_type = data.get("doc_type", "") - product_name = data.get("product_name", "") - supplier_name = data.get("supplier_name", "") - - if not doc_type: - return jsonify({"error": "doc_type is required"}), 400 - if not product_name: - return jsonify({"error": "product_name is required"}), 400 - if not supplier_name: - return jsonify({"error": "supplier_name is required"}), 400 - - # Use enhanced default prompt if none provided - user_message = data.get("user_message", "") - if not user_message: - user_message = ENHANCED_DEFAULT_REFINE_PROMPT - else: - user_message = ENHANCED_DEFAULT_REFINE_PROMPT + "\n\nAdditional instructions: " + user_message - - # Add file context to user message if available - if file_context: - user_message += file_context - - try: - con = sql.connect("swift_check.db") - cur = con.cursor() - - # Insert main request - cur.execute(""" - INSERT INTO qc_requests - (doc_type, product_name, supplier_name, user_message) - VALUES (?, ?, ?, ?) - """, (doc_type, product_name, supplier_name, user_message)) - - request_id = cur.lastrowid - print(f"✅ Created enhanced request with ID: {request_id}") - - # Call enhanced LLM with comprehensive RAG - llm_response = enhanced_call_groq_llm( - user_message=user_message, - doc_type=doc_type, - product_name=product_name, - supplier_name=supplier_name, - is_digitization=False - ) - - print("\n🎯 ENHANCED LLM RESPONSE:") - print("=" * 50) - print(llm_response[:500] + "..." if len(llm_response) > 500 else llm_response) - print("=" * 50) - - # Parse response with enhanced handling - summary_text, changes_list = parse_llm_changes(llm_response) - - # Store LLM response - cur.execute(""" - INSERT INTO llm_responses - (request_id, llm_response, summary_text) - VALUES (?, ?, ?) - """, (request_id, llm_response, summary_text)) - - # Apply changes with enhanced parameter handling - updated_params = apply_changes_to_params([], changes_list) - global_parameters = updated_params - - print(f"✅ Generated {len(updated_params)} enhanced parameters") - - # Store parameters with enhanced metadata - for param in updated_params: - cur.execute(""" - INSERT INTO parameters - (request_id, parameter_name, type, spec, dropdown_options, include_remarks, section, clause_reference) - VALUES (?, ?, ?, ?, ?, ?, ?, ?) - """, (request_id, - param.get("Parameter", ""), - param.get("Type", ""), - param.get("Spec", ""), - param.get("DropdownOptions", ""), - param.get("IncludeRemarks", "No"), - param.get("Section", "General"), - param.get("ClauseReference", ""))) - - # Generate enhanced JSON template - json_template = generate_enhanced_json_template( - doc_type=doc_type, - product_name=product_name, - supplier_name=supplier_name, - parameters=updated_params - ) - global_json_template = json_template - - # Store JSON template - cur.execute(""" - INSERT INTO json_templates - (request_id, template_json) - VALUES (?, ?) - """, (request_id, json.dumps(json_template))) - - con.commit() - con.close() - - response_data = { - "success": True, - "request_id": request_id, - "message": f"Enhanced QC template created with {len(updated_params)} comprehensive parameters", - "summary": summary_text, - "parameters_count": len(updated_params), - "enhancements": { - "comprehensive_rag": True, - "regulatory_compliance": True, - "intelligent_types": True, - "minimum_15_params": len(updated_params) >= 15 - } - } - - if file_context: - response_data["file_info"] = f"Enhanced OCR processed {filename}" if 'filename' in locals() else "File processed with enhanced OCR" - - return jsonify(response_data) - - except Exception as e: - print(f"❌ Error in enhanced /refine: {str(e)}") - if 'con' in locals(): - con.rollback() - con.close() - return jsonify({"error": str(e)}), 500 - -@app.route("/edit", methods=["POST"]) -def enhanced_edit_parameters(): - """Enhanced edit endpoint with comprehensive context and intelligent optimization""" - global global_parameters - global global_json_template - - print(">> Enhanced /edit route called <<") - - # Handle both form data and JSON - if request.content_type and request.content_type.startswith('multipart/form-data'): - data = { - "request_id": request.form.get("request_id"), - "firebase_json_url": request.form.get("firebase_json_url"), - "user_message": request.form.get("user_message", "") - } - - # Handle file upload with enhanced OCR - uploaded_file = request.files.get('context_file') - file_context = "" - - if uploaded_file and allowed_file(uploaded_file.filename): - filename = secure_filename(uploaded_file.filename) - file_ext = filename.rsplit('.', 1)[1].lower() - - temp_dir = tempfile.mkdtemp() - filepath = os.path.join(temp_dir, filename) - uploaded_file.save(filepath) - - extracted_text = enhanced_extract_text_from_document(filepath, file_ext) - - os.unlink(filepath) - os.rmdir(temp_dir) - - if extracted_text: - file_context = f"\n\nReference document content ({filename}):\n{extracted_text}" - print(f"✅ Enhanced OCR extracted {len(extracted_text)} characters from {filename}") - else: - file_context = f"\n\n[Failed to extract text from {filename}]" - - else: - data = request.get_json() - if not data: - return jsonify({"error": "No JSON payload found"}), 400 - file_context = "" - - # Validate required fields - user_message = data.get("user_message", "") - if not user_message: - return jsonify({"error": "user_message is required for editing"}), 400 - - request_id = data.get("request_id") - firebase_json_url = data.get("firebase_json_url") - - if not request_id and not firebase_json_url: - return jsonify({"error": "Either request_id or firebase_json_url is required"}), 400 - - # Add file context to user message if available - if file_context: - user_message += file_context - - try: - existing_parameters = [] - doc_type = "" - product_name = "" - supplier_name = "" - - con = sql.connect("swift_check.db") - cur = con.cursor() - - if request_id: - try: - request_id = int(request_id) - except ValueError: - return jsonify({"error": "request_id must be a valid integer"}), 400 - - # Fetch original request data with enhanced metadata - cur.execute(""" - SELECT doc_type, product_name, supplier_name - FROM qc_requests - WHERE id = ? - """, (request_id,)) - - original_data = cur.fetchone() - if not original_data: - con.close() - return jsonify({"error": f"Request ID {request_id} not found"}), 404 - - doc_type, product_name, supplier_name = original_data - - # Fetch existing parameters with enhanced metadata - cur.execute(""" - SELECT parameter_name, type, spec, dropdown_options, include_remarks, section, clause_reference - FROM parameters - WHERE request_id = ? - ORDER BY id - """, (request_id,)) - - param_rows = cur.fetchall() - existing_parameters = [ - { - "Parameter": row[0], - "Type": row[1], - "Spec": row[2], - "DropdownOptions": row[3], - "IncludeRemarks": row[4], - "Section": row[5] or "General", - "ClauseReference": row[6] or "" - } - for row in param_rows - ] - - elif firebase_json_url: - # Enhanced Firebase template processing - template_data = fetch_json_from_firebase(firebase_json_url) - if not template_data: - con.close() - return jsonify({"error": "Failed to fetch template from Firebase URL"}), 400 - - # Enhanced parameter extraction from Firebase JSON - existing_parameters = [] - - for tool in template_data.get("pageToolsDataList", []): - tool_type = tool.get("toolType", "") - - if tool_type == "DROPDOWN": - dropdown_data = tool.get("dropdownData", {}) - existing_parameters.append({ - "Parameter": dropdown_data.get("labelText", "Dropdown Field"), - "Type": "Dropdown", - "Spec": "", - "DropdownOptions": ", ".join(dropdown_data.get("optionLst", [])), - "IncludeRemarks": "No", - "Section": "General", - "ClauseReference": "" - }) - elif tool_type == "CHECKBOX": - checkbox_data = tool.get("checkboxData", {}) - existing_parameters.append({ - "Parameter": "Checklist Group", - "Type": "Checklist", - "Spec": "", - "DropdownOptions": ", ".join(checkbox_data.get("labelLst", [])), - "IncludeRemarks": "No", - "Section": "General", - "ClauseReference": "" - }) - elif tool_type == "IMAGE": - image_data = tool.get("imageLableData", {}) - existing_parameters.append({ - "Parameter": image_data.get("text", "Image Upload").replace(":", ""), - "Type": "Image Upload", - "Spec": "Visual inspection with photo evidence", - "DropdownOptions": "", - "IncludeRemarks": "Yes", - "Section": "Visual Inspection", - "ClauseReference": "" - }) - elif tool_type == "TOGGLE": - toggle_data = tool.get("toggleData", {}) - existing_parameters.append({ - "Parameter": toggle_data.get("label", "Toggle Assessment"), - "Type": "Toggle", - "Spec": "", - "DropdownOptions": f"{toggle_data.get('enabledText', 'Yes')}, {toggle_data.get('disabledText', 'No')}", - "IncludeRemarks": "No", - "Section": "Assessment", - "ClauseReference": "" - }) - elif tool_type == "TEXTAREA": - label_data = tool.get("lableData", {}) - text_area_data = tool.get("textAreaData", {}) - label_text = label_data.get("text", "").replace(":", "") - - if "Remarks" in label_text or "remarks" in text_area_data.get("dummyTxt", ""): - param_type = "Remarks" - elif "numeric" in text_area_data.get("dummyTxt", "").lower(): - param_type = "Numeric Input" - else: - param_type = "Text Input" - - existing_parameters.append({ - "Parameter": label_text, - "Type": param_type, - "Spec": "", - "DropdownOptions": "", - "IncludeRemarks": "No", - "Section": "General", - "ClauseReference": "" - }) - - # Extract basic info from template - for tool in template_data.get("pageToolsDataList", []): - if tool.get("toolType") == "HEADING": - title_text = tool.get("textData", {}).get("text", "") - parts = title_text.split(" ", 1) - if len(parts) >= 2: - product_name = parts[0] - doc_type = parts[1] - else: - product_name = title_text - doc_type = "Inspection Document" - break - - if not product_name: - product_name = "Product" - if not doc_type: - doc_type = "Inspection Document" - - # Find supplier info - for tool in template_data.get("pageToolsDataList", []): - if tool.get("toolType") == "TEXT": - text = tool.get("textData", {}).get("text", "") - if "Supplier" in text: - supplier_name = text.replace("Supplier Name:", "").strip() - break - - if not supplier_name: - supplier_name = "Unknown Supplier" - - # Generate enhanced version ID - new_version_id = None - if request_id: - base_id = str(request_id) - cur.execute(""" - SELECT id FROM qc_requests - WHERE CAST(id AS TEXT) LIKE ? OR id = ? - ORDER BY id DESC - """, (base_id + '%', request_id)) - - existing_versions = [row[0] for row in cur.fetchall()] - - if request_id < 10: - new_version_id = int(base_id + str(len(existing_versions) + 1)) - else: - base = int(str(request_id)[0]) - new_version_id = int(str(base) + str(len([v for v in existing_versions if str(v).startswith(str(base))]) + 1)) - - # Insert new enhanced version - cur.execute(""" - INSERT INTO qc_requests - (doc_type, product_name, supplier_name, user_message) - VALUES (?, ?, ?, ?) - """, (doc_type, product_name, supplier_name, user_message)) - - if new_version_id: - cur.execute("UPDATE qc_requests SET id = ? WHERE id = last_insert_rowid()", (new_version_id,)) - created_id = new_version_id - else: - cur.execute("SELECT last_insert_rowid()") - created_id = cur.fetchone()[0] - - print(f"✅ Created enhanced edit version with ID: {created_id}") - - # Call enhanced LLM with comprehensive context - enhanced_message = f"ENHANCED EDIT REQUEST: {user_message}\n\nExisting parameters for optimization and enhancement: {len(existing_parameters)} parameters" - llm_response = enhanced_call_groq_llm( - user_message=enhanced_message, - doc_type=doc_type, - product_name=product_name, - supplier_name=supplier_name, - existing_parameters=existing_parameters, - is_digitization=False - ) - - print(f"\n🎯 ENHANCED EDIT LLM RESPONSE:") - print("=" * 50) - print(llm_response[:500] + "..." if len(llm_response) > 500 else llm_response) - print("=" * 50) - - # Parse and apply changes with enhanced handling - summary_text, changes_list = parse_llm_changes(llm_response) - - cur.execute(""" - INSERT INTO llm_responses - (request_id, llm_response, summary_text) - VALUES (?, ?, ?) - """, (created_id, llm_response, summary_text)) - - updated_params = apply_changes_to_params(existing_parameters, changes_list) - global_parameters = updated_params - - print(f"✅ Enhanced edit generated {len(updated_params)} optimized parameters") - - # Store enhanced parameters - for param in updated_params: - cur.execute(""" - INSERT INTO parameters - (request_id, parameter_name, type, spec, dropdown_options, include_remarks, section, clause_reference) - VALUES (?, ?, ?, ?, ?, ?, ?, ?) - """, (created_id, - param.get("Parameter", ""), - param.get("Type", ""), - param.get("Spec", ""), - param.get("DropdownOptions", ""), - param.get("IncludeRemarks", "No"), - param.get("Section", "General"), - param.get("ClauseReference", ""))) - - # Generate enhanced JSON template - json_template = generate_enhanced_json_template( - doc_type=doc_type, - product_name=product_name, - supplier_name=supplier_name, - parameters=updated_params - ) - global_json_template = json_template - - cur.execute(""" - INSERT INTO json_templates - (request_id, template_json) - VALUES (?, ?) - """, (created_id, json.dumps(json_template))) - - con.commit() - con.close() - - response_data = { - "success": True, - "request_id": created_id, - "message": f"Enhanced template edited with {len(updated_params)} optimized parameters", - "summary": summary_text, - "parameters_count": len(updated_params), - "enhancements": { - "context_aware_editing": True, - "intelligent_optimization": True, - "regulatory_compliance": True, - "comprehensive_coverage": len(updated_params) >= 15 - } - } - - if request_id: - response_data["original_request_id"] = request_id - if firebase_json_url: - response_data["firebase_json_url"] = firebase_json_url - if file_context: - response_data["file_info"] = f"Enhanced OCR processed {filename}" if 'filename' in locals() else "File processed with enhanced OCR" - - return jsonify(response_data) - - except Exception as e: - print(f"❌ Error in enhanced /edit: {str(e)}") - if 'con' in locals(): - con.rollback() - con.close() - return jsonify({"error": str(e)}), 500 - -@app.route("/digitize", methods=["POST"]) -def enhanced_digitize_checklist(): - """Enhanced digitization with advanced OCR and intelligent parameter extraction""" - print(">> Enhanced /digitize route called <<") - - if 'checklist_file' not in request.files: - return jsonify({"error": "No file uploaded"}), 400 - - file = request.files['checklist_file'] - - if file.filename == '': - return jsonify({"error": "No file selected"}), 400 - - if not allowed_file(file.filename): - return jsonify({"error": "Invalid file type. Allowed: PDF, PNG, JPG, JPEG"}), 400 - - # Get optional parameters - doc_type = request.form.get("doc_type", "") - product_name = request.form.get("product_name", "") - supplier_name = request.form.get("supplier_name", "") - - try: - filename = secure_filename(file.filename) - temp_dir = tempfile.mkdtemp() - filepath = os.path.join(temp_dir, filename) - file.save(filepath) - - # Enhanced text extraction with table structure preservation - file_ext = filename.rsplit('.', 1)[1].lower() - extracted_text = enhanced_extract_text_from_document(filepath, file_ext) - - os.unlink(filepath) - os.rmdir(temp_dir) - - if not extracted_text: - return jsonify({"error": "Failed to extract text from file"}), 500 - - print(f"✅ Enhanced OCR extracted {len(extracted_text)} characters from {filename}") - print(f"📄 Preview: {extracted_text[:300]}...") - - # Enhanced metadata extraction - if not doc_type or not product_name or not supplier_name: - detected_doc_type, detected_product, detected_supplier = enhanced_extract_metadata_from_ocr(extracted_text) - - if not doc_type: - doc_type = detected_doc_type - if not product_name: - product_name = detected_product - if not supplier_name: - supplier_name = detected_supplier - - # Enhanced LLM processing for digitization - llm_prompt = f""" -I've extracted text from a scanned QC checklist using enhanced OCR with table structure preservation. - -DOCUMENT ANALYSIS: -- File: {filename} -- Detected Document Type: {doc_type} -- Detected Product: {product_name} -- Detected Supplier: {supplier_name} - -EXTRACTED TEXT WITH STRUCTURE: -{extracted_text} - -Please perform COMPREHENSIVE DIGITIZATION with: - -1. **TABLE STRUCTURE PRESERVATION**: Maintain section headings and organization -2. **INTELLIGENT PARAMETER EXTRACTION**: Convert each item to appropriate parameter type -3. **SPECIFICATION EXTRACTION**: Capture tolerance limits, measurement units, acceptable ranges -4. **REGULATORY COMPLIANCE**: Include any regulatory references or compliance requirements -5. **COMPREHENSIVE COVERAGE**: Ensure minimum 15+ parameters for professional QC checklist - -Focus on creating a PROFESSIONAL, COMPREHENSIVE parameter set that maintains the structure and intelligence of the original document while using modern parameter types and ensuring regulatory compliance. -""" - - GROQ_API_KEY = "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" - - if not Groq: - return jsonify({"error": "Groq library not available"}), 500 - - # Call enhanced LLM for digitization - llm_response = enhanced_call_groq_llm( - user_message=llm_prompt, - doc_type=doc_type, - product_name=product_name, - supplier_name=supplier_name, - is_digitization=True - ) - - print(f"\n🎯 ENHANCED DIGITIZATION LLM RESPONSE:") - print("=" * 50) - print(llm_response[:500] + "..." if len(llm_response) > 500 else llm_response) - print("=" * 50) - - # Parse parameters with enhanced handling - json_array_text = extract_top_level_json_array(llm_response) - parameters = [] - - if json_array_text: - try: - parameters = json.loads(json_array_text) - # Enhanced parameter processing - processed_params = [] - for param in parameters: - if isinstance(param, dict) and param.get("Parameter", "").strip(): - # Ensure parameter has meaningful content - param_name = param.get("Parameter", "").strip() - if param_name and param_name.lower() not in ["unknown", "parameter", "option", "item"]: - processed_params.append(param) - parameters = processed_params - except Exception as e: - print(f"❌ JSON parse error: {e}") - return jsonify({"error": f"Failed to parse enhanced LLM response: {str(e)}"}), 500 - - if not parameters: - return jsonify({"error": "No meaningful parameters extracted from document"}), 500 - - # Save to database with enhanced metadata - con = sql.connect("swift_check.db") - cur = con.cursor() - - cur.execute(""" - INSERT INTO qc_requests - (doc_type, product_name, supplier_name) - VALUES (?, ?, ?) - """, (doc_type, product_name, supplier_name)) - - request_id = cur.lastrowid - - # Store enhanced LLM response - cur.execute(""" - INSERT INTO llm_responses - (request_id, llm_response, summary_text) - VALUES (?, ?, ?) - """, (request_id, llm_response, f"Enhanced digitization: {len(parameters)} comprehensive parameters extracted from {filename}")) - - # Store parameters with enhanced metadata - for param in parameters: - options = param.get("DropdownOptions", "") - if not options: - options = param.get("ChecklistOptions", "") - if isinstance(options, list): - options = ", ".join(options) - - cur.execute(""" - INSERT INTO parameters - (request_id, parameter_name, type, spec, dropdown_options, include_remarks, section, clause_reference) - VALUES (?, ?, ?, ?, ?, ?, ?, ?) - """, (request_id, - param.get("Parameter", ""), - param.get("Type", "Text Input"), - param.get("Spec", ""), - options, - param.get("IncludeRemarks", "No"), - param.get("Section", "General"), - param.get("ClauseReference", ""))) - - # Generate enhanced JSON template - json_template = generate_enhanced_json_template( - doc_type=doc_type, - product_name=product_name, - supplier_name=supplier_name, - parameters=parameters - ) - - cur.execute(""" - INSERT INTO json_templates - (request_id, template_json) - VALUES (?, ?) - """, (request_id, json.dumps(json_template))) - - con.commit() - con.close() - - # Enhanced response data - response_data = { - "success": True, - "request_id": request_id, - "message": f"Enhanced digitization: {len(parameters)} comprehensive parameters extracted from {filename}", - "parameters_count": len(parameters), - "extracted_parameters": [p.get("Parameter", "") for p in parameters], - "doc_type": doc_type, - "product_name": product_name, - "supplier_name": supplier_name, - "enhancements": { - "table_structure_preserved": True, - "intelligent_type_detection": True, - "comprehensive_extraction": len(parameters) >= 10, - "specification_extraction": any(p.get("Spec") for p in parameters), - "section_organization": any(p.get("Section") != "General" for p in parameters) - }, - "file_processing": { - "filename": filename, - "text_extracted": len(extracted_text), - "ocr_enhanced": True - } - } - - return jsonify(response_data) - - except Exception as e: - print(f"❌ Error in enhanced /digitize: {str(e)}") - import traceback - traceback.print_exc() - return jsonify({"error": str(e)}), 500 - -# Existing routes with enhanced features -@app.route("/history", methods=["GET"]) -def enhanced_view_history(): - """Enhanced history view with additional metadata""" - if request.headers.get('Accept') == 'application/json' or request.args.get('format') == 'json': - try: - con = sql.connect("swift_check.db") - cur = con.cursor() - - # Enhanced query with parameter counts - cur.execute(""" - SELECT r.id, r.doc_type, r.product_name, r.supplier_name, r.created_at, - COUNT(p.id) as parameter_count - FROM qc_requests r - LEFT JOIN parameters p ON r.id = p.request_id - GROUP BY r.id - ORDER BY r.created_at DESC - """) - - rows = cur.fetchall() - con.close() - - return jsonify([{ - "id": row[0], - "doc_type": row[1], - "product_name": row[2], - "supplier_name": row[3], - "created_at": row[4], - "parameter_count": row[5] - } for row in rows]) - - except Exception as e: - return jsonify({"error": str(e)}), 500 - - # Enhanced HTML view - try: - con = sql.connect("swift_check.db") - cur = con.cursor() - - cur.execute(""" - SELECT r.id, r.doc_type, r.product_name, r.supplier_name, r.created_at, - COUNT(p.id) as parameter_count - FROM qc_requests r - LEFT JOIN parameters p ON r.id = p.request_id - GROUP BY r.id - ORDER BY r.created_at DESC - """) - - rows = cur.fetchall() - con.close() - - html = """ - - - Enhanced QC Request History - - - -
-

Enhanced QC Request History v2.0

- - - - - - - - - - - """ - - for row in rows: - param_badge = "🎯" if row[5] >= 15 else "⚠️" if row[5] >= 10 else "❌" - html += f""" - - - - - - - - - - """ - - html += """ -
IDProductDoc TypeSupplierParametersCreatedActions
{row[0]}{row[2]}{row[1]}{row[3]}{param_badge} {row[5]} params{row[4]} - Preview - JSON -
-
- Legend: - 🎯 15+ params (Professional) | - ⚠️ 10-14 params (Good) | - ❌ <10 params (Basic) -
-
- - - """ - return html - - except Exception as e: - return f"

Error

{str(e)}

", 500 - -@app.route("/template/", methods=["GET"]) -def get_enhanced_template_json(request_id): - """Get enhanced template JSON by request ID""" - try: - con = sql.connect("swift_check.db") - cur = con.cursor() - - cur.execute(""" - SELECT template_json - FROM json_templates - WHERE request_id = ? - """, (request_id,)) - - result = cur.fetchone() - con.close() - - if result: - template_data = json.loads(result[0]) - return jsonify(template_data) - else: - return jsonify({"error": f"Enhanced template not found for request ID {request_id}"}), 404 - - except Exception as e: - print(f"❌ Error in /template/{request_id}: {str(e)}") - return jsonify({"error": str(e)}), 500 - -@app.route("/preview/", methods=["GET"]) -def enhanced_preview_page(request_id): - """Enhanced preview with better formatting and metadata""" - try: - con = sql.connect("swift_check.db") - cur = con.cursor() - - # Get template JSON - cur.execute(""" - SELECT template_json - FROM json_templates - WHERE request_id = ? - """, (request_id,)) - - template_result = cur.fetchone() - - # Get enhanced parameters - cur.execute(""" - SELECT parameter_name, type, spec, dropdown_options, include_remarks, section, clause_reference - FROM parameters - WHERE request_id = ? - ORDER BY id - """, (request_id,)) - - parameters = cur.fetchall() - - # Get request details - cur.execute(""" - SELECT doc_type, product_name, supplier_name - FROM qc_requests - WHERE id = ? - """, (request_id,)) - - request_details = cur.fetchone() - con.close() - - if not template_result: - return f""" - - Not Found - -

Enhanced Template not found

-

No template exists for request ID {request_id}

- View History - - - """, 404 - - json_template = json.loads(template_result[0]) - - # Generate enhanced ASCII preview with sections - ascii_preview = "╔══════════════════════════════════════════════════════════════════════╗\n" - - if request_details: - header = f"{request_details[1]} {request_details[0]}" - else: - header = "Enhanced QC Template" - - header_padding = (70 - len(header)) // 2 - ascii_preview += f"║{' ' * header_padding}{header}{' ' * (70 - header_padding - len(header))}║\n" - - if request_details and request_details[2]: - supplier = f"Supplier: {request_details[2]}" - supplier_padding = (70 - len(supplier)) // 2 - ascii_preview += f"║{' ' * supplier_padding}{supplier}{' ' * (70 - supplier_padding - len(supplier))}║\n" - - ascii_preview += "╚══════════════════════════════════════════════════════════════════════╝\n\n" - - # Group parameters by section - sections = {} - for param in parameters: - param_name, param_type, spec, options, include_remarks, section, clause_ref = param - section = section or "General Parameters" - if section not in sections: - sections[section] = [] - sections[section].append(param) - - # Add parameters organized by sections - for section_name, section_params in sections.items(): - ascii_preview += f"\n🔹 {section_name.upper()}\n" - ascii_preview += "─" * 60 + "\n" - - for param in section_params: - param_name, param_type, spec, options, include_remarks, section, clause_ref = param - - # Add clause reference if available - display_name = param_name - if clause_ref: - display_name += f" ({clause_ref})" - - if param_type == "Image Upload": - ascii_preview += f"[📷] {display_name}: [ Upload Photo ] + Toggle Assessment\n" - elif param_type == "Toggle": - ascii_preview += f"[◐] {display_name}: ● Acceptable ○ Not Acceptable\n" - elif param_type == "Dropdown": - ascii_preview += f"[▼] {display_name}: _________________ " - if options: - option_list = [opt.strip() for opt in options.split(",")[:3]] - ascii_preview += f"({', '.join(option_list)}{'...' if len(options.split(',')) > 3 else ''})\n" - else: - ascii_preview += "\n" - elif param_type == "Checklist": - ascii_preview += f" {display_name}:\n" - if options: - option_list = [opt.strip() for opt in options.split(",")] - for opt in option_list[:5]: - ascii_preview += f" ☐ {opt}\n" - if len(option_list) > 5: - ascii_preview += f" ... and {len(option_list) - 5} more items\n" - else: - ascii_preview += " ☐ Item 1\n" - elif param_type == "Numeric Input": - ascii_preview += f"[#️⃣] {display_name}: _____________" - if spec: - ascii_preview += f" (Spec: {spec})\n" - else: - ascii_preview += "\n" - elif param_type == "Text Input": - ascii_preview += f"[✏️] {display_name}: _____________________________\n" - elif param_type == "Remarks": - ascii_preview += f"[📝] {display_name}:\n" - ascii_preview += " ┌─────────────────────────────────────┐\n" - ascii_preview += " │ │\n" - ascii_preview += " │ │\n" - ascii_preview += " └─────────────────────────────────────┘\n" - - if include_remarks == "Yes" and param_type != "Remarks": - ascii_preview += f" └─ Additional Remarks: _______________________\n" - - ascii_preview += "\n" - - # Add enhanced final assessment - ascii_preview += "═" * 70 + "\n" - ascii_preview += "🎯 FINAL ASSESSMENT\n" - ascii_preview += "═" * 70 + "\n" - ascii_preview += "[✅] Overall Quality Assessment: ● APPROVED ○ REJECTED\n\n" - ascii_preview += "[👤] Inspector Name & Signature: _________________________________\n\n" - ascii_preview += "[📝] Final Comprehensive Remarks:\n" - ascii_preview += " ┌─────────────────────────────────────────────────────────────┐\n" - ascii_preview += " │ Overall assessment, corrective actions, and observations │\n" - ascii_preview += " │ │\n" - ascii_preview += " │ │\n" - ascii_preview += " └─��───────────────────────────────────────────────────────────┘\n" - - # Enhanced statistics - total_params = len(parameters) - param_types = {} - sections_count = len(sections) - regulatory_refs = sum(1 for param in parameters if param[6]) # clause references - - for param in parameters: - param_type = param[1] - param_types[param_type] = param_types.get(param_type, 0) + 1 - - stats_html = f""" -
-

📊 Template Statistics

-
-
- {total_params}
- Total Parameters -
-
- {sections_count}
- Organized Sections -
-
- {regulatory_refs}
- Regulatory References -
-
- {len(param_types)}
- Parameter Types Used -
-
-

Parameter Type Distribution:

-
    - """ - - for ptype, count in param_types.items(): - emoji = {"Image Upload": "📷", "Toggle": "◐", "Dropdown": "▼", "Checklist": "☐", - "Numeric Input": "#️⃣", "Text Input": "✏️", "Remarks": "📝"}.get(ptype, "•") - stats_html += f"
  • {emoji} {ptype}: {count} parameters
  • " - - stats_html += """ -
-
- """ - - html = f""" - - - Enhanced QC Template Preview - Request #{request_id} - - - -
-

Enhanced QC Template Preview - Request #{request_id} - v2.0 - {'🎯 Professional' if total_params >= 15 else '⚠️ Good' if total_params >= 10 else '❌ Basic'} -

- - {stats_html} - -
-

🖥️ Enhanced ASCII Preview

-
{ascii_preview}
-
- -
-

📋 Enhanced JSON Template

-
- - - -
- -
- -
- - -
-
- - - - - """ - return html - - except Exception as e: - print(f"❌ Error in enhanced /preview/{request_id}: {str(e)}") - return f"

Error

{str(e)}

", 500 - -@app.route("/test-rag", methods=["GET"]) -def test_enhanced_rag(): - """Test enhanced RAG functionality with comprehensive context""" - try: - test_product = request.args.get('product', 'Malabar Paratha') - test_domain = request.args.get('domain', 'Food Manufacturing') - - print(f"\n🧪 Testing Enhanced RAG for {test_product} in {test_domain}") - - # Get comprehensive context from all VDBs - comprehensive_context = get_comprehensive_context(test_product, test_domain) - - # Format context for display - formatted_context = format_context_for_prompt(comprehensive_context, max_length=6000) - - results = { - "test_parameters": { - "product": test_product, - "domain": test_domain - }, - "comprehensive_context": { - "regulatory_requirements": len(comprehensive_context.get("regulatory_requirements", [])), - "product_specifications": len(comprehensive_context.get("product_specifications", [])), - "checklist_examples": len(comprehensive_context.get("checklist_examples", [])), - "parameter_patterns": len(comprehensive_context.get("parameter_patterns", [])), - }, - "context_summary": comprehensive_context.get("context_summary", {}), - "rag_quality": { - "total_sources": ( - len(comprehensive_context.get("regulatory_requirements", [])) + - len(comprehensive_context.get("product_specifications", [])) + - len(comprehensive_context.get("checklist_examples", [])) - ), - "regulatory_compliance": len(comprehensive_context.get("regulatory_requirements", [])) > 0, - "product_depth_reference": len(comprehensive_context.get("product_specifications", [])) > 0, - "professional_examples": len(comprehensive_context.get("checklist_examples", [])) > 0, - "parameter_intelligence": len(comprehensive_context.get("parameter_patterns", [])) > 0 - } - } - - print(f"✅ Enhanced RAG Test Complete: {results['rag_quality']['total_sources']} sources retrieved") - - if request.headers.get('Accept') == 'application/json': - return jsonify(results) - else: - # Enhanced HTML view - html = f""" - - - Enhanced RAG Test Results - - - -
-

Enhanced RAG Test Results v2.0

-

Testing: "{test_product}" in {test_domain}

- -
-

🎯 RAG Quality Assessment

-
-
- Total Sources Retrieved:
- - {results['rag_quality']['total_sources']} sources - -
-
- Regulatory Compliance:
- - {'✅ Available' if results['rag_quality']['regulatory_compliance'] else '❌ Missing'} - -
-
- Product Depth Reference:
- - {'✅ Available' if results['rag_quality']['product_depth_reference'] else '❌ Missing'} - -
-
- Professional Examples:
- - {'✅ Available' if results['rag_quality']['professional_examples'] else '❌ Missing'} - -
-
-
- -
-

📊 Context Retrieved

-
    -
  • Regulatory Requirements: {results['comprehensive_context']['regulatory_requirements']} documents
  • -
  • Product Specifications: {results['comprehensive_context']['product_specifications']} references
  • -
  • Checklist Examples: {results['comprehensive_context']['checklist_examples']} samples
  • -
  • Parameter Patterns: {results['comprehensive_context']['parameter_patterns']} intelligent patterns
  • -
-
- -
-

🔍 Context Preview

-
-
{formatted_context[:2000]}{'...' if len(formatted_context) > 2000 else ''}
-
-
- -
-

🧪 Test Other Products

-

Try these Enhanced RAG tests:

- -
-
- - - """ - - return html - - except Exception as e: - print(f"❌ Error in Enhanced RAG test: {str(e)}") - import traceback - traceback.print_exc() - - error_response = { - "error": str(e), - "traceback": traceback.format_exc() - } - - if request.headers.get('Accept') == 'application/json': - return jsonify(error_response), 500 - else: - return f""" - - Enhanced RAG Test Error - -

Error Testing Enhanced RAG

-
{error_response['traceback']}
- - - """, 500 - -if __name__ == "__main__": - print("🚀 Starting Enhanced Swift Check API v2.0...") - print("✅ Comprehensive RAG Integration") - print("✅ 15+ Parameter Minimum") - print("✅ Intelligent Type Selection") - print("✅ Regulatory Compliance") - print("✅ Enhanced OCR Processing") - print("✅ Professional Formatting") +import json +import re +import random +import sqlite3 as sql +from datetime import datetime +import string +import os +import tempfile +from PIL import Image +import pytesseract +import fitz +from werkzeug.utils import secure_filename +from flask import Flask, request, jsonify, redirect, url_for, render_template_string +from pathlib import Path +import requests + +# Import enhanced RAG utilities +from rag_utils import get_comprehensive_context, format_context_for_prompt + +try: + from groq import Groq +except ImportError: + Groq = None + +app = Flask(__name__) +global_parameters = [] +global_json_template = {} + +# Enhanced system prompt with comprehensive QC requirements +ENHANCED_SYSTEM_PROMPT = """ +You are the Swift Check AI assistant, specialized in creating comprehensive Quality Control (QC) checklists and inspection documents for food products with full regulatory compliance. + +# CONTEXT: +You'll help users generate custom QC parameters for various food products following Al Kabeer Group's professional standards. The parameters will be used in quality inspection checklists that QC inspectors fill during product inspections, with full regulatory backing and clause references. + +# COMPREHENSIVE QC CHECKLIST REQUIREMENTS: + +## For Food Products, ALWAYS include these categories (MINIMUM 15+ PARAMETERS): + +### 1. Physical Parameters (4-5 parameters) +- Appearance (Image Upload + Toggle): Color, visual defects, physical state with photo evidence +- Texture (Dropdown + Remarks): Firmness, consistency, crispness with detailed observations +- Size/Dimensions (Numeric Input): Length, width, diameter with tolerance specs (e.g., "60±5mm") +- Weight (Numeric Input): Individual/batch weight with tolerance (e.g., "25±2g") +- Shape (Dropdown): Uniformity, deformation assessment + +### 2. Sensory Parameters (3-4 parameters) +- Flavor/Taste (Dropdown + Remarks): Characteristic flavors, off-tastes, intensity +- Aroma/Odor (Dropdown + Remarks): Normal smell, off-odors, freshness +- Mouthfeel (Dropdown): For applicable products (texture after cooking) +- Overall Sensory Assessment (Toggle): Acceptable/Not Acceptable + +### 3. Safety Parameters (4-5 parameters) +- Foreign Objects (Checklist + Image Upload): MUST include comprehensive list: stones, glass, metals, plastic, wood, insects/pests, hair, threads, paper, bones, feathers +- Microbiological Specifications (Table/Numeric Input): Total Plate Count, E.coli, Salmonella, etc. with limits +- Chemical Contaminants (Numeric Input): Heavy metals, pesticides if applicable with ppm limits +- Allergen Declaration (Checklist): All 14 major allergens verification +- Metal Detection Results (Text Input + Toggle): Fe, Non-Fe, SS readings with pass/fail + +### 4. Product-Specific Parameters (2-3 parameters) +- For filled products: Filling weight ratio, filling consistency +- For fried products: Oil absorption, crispness level +- For frozen products: Freezer burn check, ice crystals, clustering +- For baked products: Browning level, doneness, internal temperature + +### 5. Packaging Parameters (3-4 parameters) +- Packaging Integrity (Image Upload + Checklist): Sealing, tears, punctures, label accuracy with photo +- Net Weight Verification (Numeric Input): Package weight vs declared weight with tolerance +- Date Verification (Text Input): Best before date, production date accuracy +- Batch/Lot Traceability (Text Input): Batch code, lot number verification + +### 6. Process Control Parameters (2-3 parameters) +- Temperature Control (Numeric Input): Processing, storage, transport temperatures with specs +- Time Parameters (Numeric Input): Processing time, cooling time with specifications +- Equipment Calibration (Toggle + Text Input): Calibration status, last calibration date + +### 7. Compliance & Documentation (2-3 parameters) +- Regulatory Compliance (Checklist): HACCP, Dubai Municipality, ISO requirements +- Documentation Complete (Toggle): All required certificates present +- Inspector Assessment (Toggle + Remarks): Overall quality assessment with detailed remarks + +# PARAMETER TYPES AND INTELLIGENT SELECTION: + +## Image Upload - USE FOR: +- Visual inspections (appearance, defects, packaging condition) +- Evidence documentation (defects, foreign objects) +- Label verification and batch code photos +- Before/after cooking comparisons + +## Toggle - USE FOR: +- Pass/fail decisions (acceptable/not acceptable) +- Present/absent checks (clustering, defects) +- Compliance status (passed/failed) +- Binary quality assessments + +## Checklist - USE FOR: +- Foreign objects (comprehensive list of all possible contaminants) +- Allergens (all 14 major allergens) +- Packaging defects (multiple possible issues) +- Compliance requirements (multiple standards) +- Multi-item verification lists + +## Numeric Input - USE FOR: +- Measurements WITH specifications and units +- Weight: "25±2g", "165±5g" +- Dimensions: "60±5mm length", "7-8 inch diameter" +- Temperature: "-18°C ±2°C", "180°C ±10°C" +- Microbiological limits: "<10^4 CFU/g", "<10^2" +- Chemical limits: "<0.1ppm", "<0.10ppm" +- Time measurements: "2-3 minutes", "30±5 seconds" + +## Text Input - USE FOR: +- Alphanumeric data entry +- Batch numbers, lot codes +- Production dates, expiry dates +- Supplier codes, product codes +- Equipment serial numbers + +## Remarks - USE FOR: +- Detailed observations requiring explanation +- Corrective actions taken +- Special conditions noted +- Inspector additional comments +- Non-conformance descriptions + +# REGULATORY COMPLIANCE: +- Include specific clause references for each parameter when available +- Reference Dubai Municipality guidelines, HACCP principles, ISO standards +- Ensure traceability requirements are met +- Include metal detection and allergen management as per UAE regulations + +# OUTPUT FORMAT: +Provide comprehensive, actionable parameters with: +- Minimum 15+ parameters covering all categories above +- Appropriate types based on intelligent selection rules +- Realistic specifications with proper units and tolerances +- Comprehensive options for dropdowns/checklists +- Clause references where applicable (e.g., "Dubai Municipality Section 4.2.1") +- Professional formatting matching Al Kabeer Group standards + +Remember: Generate PROFESSIONAL, COMPREHENSIVE checklists that match Al Kabeer Group's quality standards with full regulatory compliance and intelligent parameter type selection. +""" + +# Enhanced default refine prompt +ENHANCED_DEFAULT_REFINE_PROMPT = """ +Create a comprehensive professional food quality control checklist for the specified product following Al Kabeer Group standards. Include a MINIMUM of 15+ parameters that cover: + +1. PHYSICAL ATTRIBUTES: Appearance (with photo), texture, dimensions, weight with precise tolerance limits +2. SENSORY EVALUATION: Flavor, aroma, taste, mouthfeel characteristics with detailed assessment +3. SAFETY PARAMETERS: Comprehensive foreign objects checklist, microbiological specifications, chemical contaminants, allergen verification +4. PRODUCT-SPECIFIC CHECKS: Based on processing method (frozen, fried, baked, filled, etc.) with specialized parameters +5. PACKAGING INTEGRITY: Visual inspection with photos, seal quality, labeling accuracy, weight verification +6. PROCESS CONTROL: Temperature monitoring, time parameters, equipment calibration status +7. COMPLIANCE VERIFICATION: HACCP principles, Dubai Municipality requirements, ISO standards, traceability +8. DOCUMENTATION: Batch codes, production dates, certificates, inspector assessment + +Use intelligent parameter type selection: +- Image Upload for visual inspections and evidence documentation +- Toggle for pass/fail and binary assessments +- Checklist for foreign objects, allergens, and multi-item verifications +- Numeric Input for all measurements with proper specifications and units +- Text Input for codes, dates, and identifiers +- Remarks for detailed observations and corrective actions + +Include specific regulatory clause references where applicable and ensure professional formatting that matches Al Kabeer Group's quality standards. +""" + +# Enhanced digitization system prompt +ENHANCED_DIGITIZE_SYSTEM_PROMPT = """ +You are the Swift Check AI digitization assistant. Your job is to analyze OCR-extracted text from scanned QC checklists and convert them into structured parameters for comprehensive food safety and quality control checklists. + +# YOUR TASKS: +1. Recognize and preserve table structures and section headings +2. Identify quality control parameters with their proper input types +3. Extract specifications, tolerance limits, and measurement units +4. Determine appropriate parameter types based on content analysis +5. Maintain professional formatting and organization + +# INTELLIGENT PARAMETER TYPE DETECTION: + +## Image Upload - DETECT FOR: +- Parameters mentioning "photo", "attach", "capture", "visual", "appearance" +- Instructions like "attach photos", "capture variations" +- Visual inspection requirements + +## Toggle - DETECT FOR: +- Binary choices: "Acceptable/Non-acceptable", "Present/Absent", "Pass/Fail" +- "Yes/No" type assessments +- Simple pass/fail criteria + +## Checklist - DETECT FOR: +- Lists of items to verify (foreign objects, allergens, defects) +- Multiple related items that can be selected simultaneously +- Categories with sub-items + +## Numeric Input - DETECT FOR: +- Measurements with units and tolerances +- Temperature readings, weights, dimensions +- Time durations, counts, percentages +- Values with specifications like "±5g", "<10^4", "2-3 minutes" + +## Text Input - DETECT FOR: +- Codes, dates, identifiers +- Batch numbers, lot codes +- Names, locations, serial numbers + +## Remarks - DETECT FOR: +- "Remarks", "Comments", "Observations", "Notes" +- Areas requiring detailed explanations +- Corrective action descriptions + +# TABLE STRUCTURE RECOGNITION: +- Preserve section headings like "ORGANOLEPTIC EVALUATION", "COOKING DETAILS", "PACKAGING & FREEZING" +- Maintain parameter groupings and logical flow +- Keep tolerance limits and specifications with their parameters +- Preserve professional formatting structure + +# OUTPUT FORMAT: +Provide a comprehensive JSON array with intelligent parameter type selection: +[ + { + "Parameter": "Actual Parameter Name from Document", + "Type": "Intelligently Selected Type", + "Spec": "Extracted specifications with units", + "DropdownOptions": "Specific options from document", + "ChecklistOptions": "Comprehensive list items", + "IncludeRemarks": "Yes/No based on parameter complexity", + "Section": "Document section/category", + "ClauseReference": "Regulatory reference if identified" + } +] + +Focus on creating comprehensive, professional parameters that maintain the structure and intelligence of the original document while using appropriate modern input types. +""" + +def init_db(): + """Initialize database tables - runs once when app starts""" + con = sql.connect("swift_check.db") + cur = con.cursor() + cur.execute(""" + CREATE TABLE IF NOT EXISTS qc_requests ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + doc_type TEXT NOT NULL, + product_name TEXT NOT NULL, + supplier_name TEXT NOT NULL, + created_at DATETIME DEFAULT CURRENT_TIMESTAMP, + user_message TEXT + )""") + + cur.execute(""" + CREATE TABLE IF NOT EXISTS llm_responses ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + request_id INTEGER, + llm_response TEXT, + summary_text TEXT, + created_at DATETIME DEFAULT CURRENT_TIMESTAMP, + FOREIGN KEY (request_id) REFERENCES qc_requests(id) + )""") + + cur.execute(""" + CREATE TABLE IF NOT EXISTS parameters ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + request_id INTEGER, + parameter_name TEXT, + type TEXT, + spec TEXT, + dropdown_options TEXT, + include_remarks TEXT, + section TEXT, + clause_reference TEXT, + created_at DATETIME DEFAULT CURRENT_TIMESTAMP, + FOREIGN KEY (request_id) REFERENCES qc_requests(id) + )""") + + cur.execute(""" + CREATE TABLE IF NOT EXISTS json_templates ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + request_id INTEGER, + template_json TEXT, + created_at DATETIME DEFAULT CURRENT_TIMESTAMP, + FOREIGN KEY (request_id) REFERENCES qc_requests(id) + )""") + + con.commit() + con.close() + +init_db() + +def extract_top_level_json_array(text): + """Extract the first top-level JSON array from text""" + start = text.find('[') + if start == -1: + return "" + balance = 0 + end = start + for i in range(start, len(text)): + char = text[i] + if char == '[': + balance += 1 + elif char == ']': + balance -= 1 + if balance == 0: + end = i + break + return text[start:end+1] + +def enhanced_call_groq_llm(user_message, doc_type, product_name, supplier_name, existing_parameters=None, is_digitization=False): + """ + Enhanced Groq LLM call with comprehensive RAG support. + Retrieves context from all 3 VDBs before calling the LLM. + """ + if not Groq: + return "Groq LLM call failed: 'groq' library not found or not installed." + + GROQ_API_KEY = "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" + domain = "Food Manufacturing" + + # Get comprehensive context from all VDBs + print(f"🔍 Retrieving comprehensive context for: {product_name}") + comprehensive_context = get_comprehensive_context(product_name, domain) + + # Format context for prompt + formatted_context = format_context_for_prompt(comprehensive_context, max_length=4500) + + # Generate header and supplier info + header_text = f"{product_name} {doc_type}" + supplier_info = f"Supplier Name: {supplier_name}" + + # Check if user message contains reference document content + has_reference = "Reference document content" in user_message + + # Select appropriate system prompt + if is_digitization: + system_instructions = ENHANCED_DIGITIZE_SYSTEM_PROMPT + else: + system_instructions = ENHANCED_SYSTEM_PROMPT + + # Enhanced context with regulatory compliance focus + enhanced_context = formatted_context + + if has_reference: + enhanced_context += f""" + +**CRITICAL DIGITIZATION GUIDANCE**: The reference document content is provided to understand the STRUCTURE and PROFESSIONAL FORMAT of QC parameters. + +Use the reference to identify: +1. Section headings and table structures (preserve them) +2. Parameter types and their appropriate input methods +3. Tolerance specifications and measurement units +4. Professional formatting and organization +5. Regulatory compliance requirements + +Create parameters with values, specifications, and input types SPECIFIC to {product_name} while maintaining the professional structure and comprehensive coverage of the reference document. + """ + else: + enhanced_context += f""" + +For {product_name}, ensure you include MINIMUM 15+ parameters covering these MANDATORY categories: + +1. **Physical Parameters** (4-5): Appearance (Image+Toggle), Texture (Dropdown+Remarks), Dimensions (Numeric), Weight (Numeric), Shape (Dropdown) +2. **Sensory Parameters** (3-4): Flavor (Dropdown+Remarks), Aroma (Dropdown+Remarks), Mouthfeel (Dropdown), Overall Sensory (Toggle) +3. **Safety Parameters** (4-5): Foreign Objects (Checklist+Image), Microbiological (Numeric), Chemical (Numeric), Allergens (Checklist), Metal Detection (Text+Toggle) +4. **Product-Specific** (2-3): Based on product type (frozen, fried, baked, etc.) +5. **Packaging** (3-4): Integrity (Image+Checklist), Weight Verification (Numeric), Date Verification (Text), Batch Traceability (Text) +6. **Process Control** (2-3): Temperature (Numeric), Time (Numeric), Equipment (Toggle+Text) +7. **Compliance** (2-3): Regulatory (Checklist), Documentation (Toggle), Inspector Assessment (Toggle+Remarks) + +**INTELLIGENT TYPE SELECTION RULES:** +- Image Upload: Visual inspections, appearance, defects, evidence documentation +- Toggle: Pass/fail, acceptable/not acceptable, present/absent, binary assessments +- Checklist: Foreign objects (stones, glass, metals, plastic, wood, insects, hair, threads), allergens (all 14), packaging defects, compliance items +- Numeric Input: ALL measurements with specifications and units (e.g., "Weight: 25±2g", "Temperature: -18°C ±2°C") +- Text Input: Codes, dates, identifiers, batch numbers +- Remarks: Detailed observations, corrective actions, complex assessments + +**REGULATORY COMPLIANCE:** +Based on retrieved context, ensure compliance with: {', '.join([req['regulatory_body'] + ' ' + req.get('clause_reference', req.get('standard_code', '')) for req in comprehensive_context.get('regulatory_requirements', [])[:3]])} + +**PROFESSIONAL FORMATTING:** +Match Al Kabeer Group's quality standards with proper section organization, comprehensive coverage, and intelligent parameter type selection. + """ + + # Construct the final system prompt + final_system_prompt = f""" +{system_instructions} + +User context: +- Doc Type: {doc_type} +- Product: {product_name} +- Supplier: {supplier_name} +- Generated Header: {header_text} +- Supplier Info: {supplier_info} + +{enhanced_context} + +**VALID PARAMETER TYPES:** +Checklist, Dropdown, Image Upload, Remarks, Text Input, Numeric Input, Toggle + +**MANDATORY REQUIREMENTS:** +1. MINIMUM 15+ parameters for comprehensive coverage +2. Use intelligent type selection based on parameter purpose +3. Include specifications with units for ALL Numeric Input parameters +4. Provide comprehensive options for Checklist and Dropdown parameters +5. Add clause references where regulatory compliance is required +6. Include section organization for professional formatting +7. Add "IncludeRemarks": "Yes" for complex parameters requiring detailed observations + +**OUTPUT INSTRUCTIONS:** +1. Provide a brief summary describing the comprehensive QC parameters created. +2. Then produce a bracketed JSON array with intelligent parameter selection. + Example: + [ + {{ + "action": "add", + "Parameter": "Product Appearance", + "Type": "Image Upload", + "Spec": "Visual inspection with photo evidence", + "DropdownOptions": "", + "ChecklistOptions": "", + "IncludeRemarks": "Yes", + "Section": "Physical Parameters", + "ClauseReference": "Dubai Municipality Section 4.1.2" + }}, + {{ + "action": "add", + "Parameter": "Foreign Objects Detection", + "Type": "Checklist", + "Spec": "Zero tolerance for all foreign materials", + "ChecklistOptions": "Stones, Glass, Metals, Plastic, Wood, Insects/Pests, Hair, Threads, Paper, Bones, Feathers", + "IncludeRemarks": "Yes", + "Section": "Safety Parameters", + "ClauseReference": "HACCP Principle 2" + }}, + {{ + "action": "add", + "Parameter": "Net Weight", + "Type": "Numeric Input", + "Spec": "25±2g per piece", + "DropdownOptions": "", + "IncludeRemarks": "No", + "Section": "Physical Parameters" + }} + ] +""" + + messages = [ + {"role": "system", "content": final_system_prompt}, + {"role": "user", "content": user_message}, + ] + + client = Groq(api_key=GROQ_API_KEY) + + try: + response = client.chat.completions.create( + messages=messages, + model="llama-3.3-70b-versatile", + stream=False, + temperature=0.2 # Lower temperature for more consistent professional output + ) + return response.choices[0].message.content.strip() + except Exception as e: + return f"Groq LLM call failed: {str(e)}" + +def parse_llm_changes(llm_text): + """Parse LLM response into summary and changes""" + json_array_text = extract_top_level_json_array(llm_text) + changes = [] + if json_array_text: + try: + changes = json.loads(json_array_text) + except Exception as e: + print("JSON parse error:", e) + summary_text = llm_text.replace(json_array_text, "").strip() if json_array_text else llm_text.strip() + return summary_text, changes + +def apply_changes_to_params(parameters, changes): + """Apply changes to parameters with enhanced parameter handling""" + valid_types = ["Checklist", "Dropdown", "Image Upload", "Remarks", "Text Input", "Numeric Input", "Toggle"] + + for change in changes: + if not isinstance(change, dict): + print(f"Skipping non-dict change: {change}") + continue + + action = change.get("action", "").lower() + p_name = change.get("Parameter", "Unnamed") + options = change.get("DropdownOptions", "") + checklist_options = change.get("ChecklistOptions", "") + + # Handle both DropdownOptions and ChecklistOptions + if not options and checklist_options: + options = checklist_options + if isinstance(options, list): + options = ", ".join(options) + + if action == "add": + new_type = change.get("Type", "Text Input") + if new_type not in valid_types: + new_type = "Text Input" + + new_param = { + "Parameter": p_name, + "Type": new_type, + "Spec": change.get("Spec", ""), + "DropdownOptions": options, + "IncludeRemarks": change.get("IncludeRemarks", "No"), + "Section": change.get("Section", "General"), + "ClauseReference": change.get("ClauseReference", "") + } + parameters.append(new_param) + + elif action == "remove": + parameters[:] = [p for p in parameters if p["Parameter"].lower() != p_name.lower()] + + elif action == "update": + for p in parameters: + if p["Parameter"].lower() == p_name.lower(): + new_type = change.get("Type", "Text Input") + if new_type not in valid_types: + new_type = "Text Input" + p["Type"] = new_type + p["Spec"] = change.get("Spec", "") + p["DropdownOptions"] = options + p["IncludeRemarks"] = change.get("IncludeRemarks", "No") + p["Section"] = change.get("Section", "General") + p["ClauseReference"] = change.get("ClauseReference", "") + break + + return parameters + +def generate_enhanced_json_template(doc_type, product_name, supplier_name, parameters): + """ + Enhanced JSON template generation with intelligent parameter type handling. + """ + header_text = f"{product_name} {doc_type}" + template = { + "templateId": "neY5j", + "isDrafted": False, + "pageStyle": { + "margin": { + "top": 10, + "bottom": 10, + "left": 10, + "right": 10 + }, + "showPageNumber": False, + "headerImgUrl": "", + "fotterImgUrl": "" + }, + "pageToolsDataList": [], + "workflowInfo": { + "currentState": "Draft", + "approvalStates": ["Draft", "Under Review", "Approved", "Rejected"], + "currentApprover": { + "userId": "user123", + "name": "Ashish Kumar", + "role": "QC Manager" + }, + "previousApprovers": [ + { + "userId": "user456", + "name": "Raj Singh", + "role": "QC Supervisor", + "approvalDate": "2025-05-01T10:30:00Z", + "status": "Approved", + "comments": "Looks good to me." + } + ], + "nextApprovers": [ + { + "userId": "user789", + "name": "Priya Patel", + "role": "CEO" + } + ] + } + } + + def generate_tool_id(): + return ''.join(random.choices(string.ascii_lowercase + string.digits, k=5)) + + # Add main header + title_text = header_text + heading_tool = { + "toolId": generate_tool_id(), + "toolType": "HEADING", + "textData": { + "text": title_text, + "isBold": True, + "isItalic": False, + "isUnderlined": False, + "textAliend": "LEFT", + "color": 4294967295, # White + "fontSize": 14 + }, + "boxData": { + "fillColor": 4288111521, # Blue background + "borderEnable": False, + "borderColor": 4294967295, + "borderWidth": 0.8, + "boxAlignment": "CENTER_LEFT", + "cornerRadius": { + "topLeft": 0, + "topRight": 0, + "bottomLeft": 0, + "bottomRight": 0 + }, + "padding": { + "top": 4, + "bottom": 4, + "left": 9, + "right": 4 + }, + "margin": { + "top": 0, + "bottom": 0, + "left": 0, + "right": 0 + } + }, + "toolWidth": 1.7976931348623157e+308 + } + template["pageToolsDataList"].append(heading_tool) + + # Add supplier information + supplier_text = { + "toolId": generate_tool_id(), + "toolType": "TEXT", + "textData": { + "text": f"Supplier Name: {supplier_name}", + "isBold": True, + "isItalic": False, + "isUnderlined": False, + "textAliend": "LEFT", + "color": 4278190080, # Black + "fontSize": 12 + }, + "toolHeight": 30, + "toolWidth": 1.7976931348623157e+308 + } + template["pageToolsDataList"].append(supplier_text) + + # Group parameters by section for better organization + sections = {} + for param in parameters: + section = param.get("Section", "General Parameters") + if section not in sections: + sections[section] = [] + sections[section].append(param) + + # Add parameters organized by sections + for section_name, section_params in sections.items(): + # Add section header + if section_name != "General Parameters": + section_header = { + "toolId": generate_tool_id(), + "toolType": "TEXT", + "textData": { + "text": section_name.upper(), + "isBold": True, + "isItalic": False, + "isUnderlined": True, + "textAliend": "LEFT", + "color": 4283215696, # Green + "fontSize": 13 + }, + "toolHeight": 35, + "toolWidth": 1.7976931348623157e+308 + } + template["pageToolsDataList"].append(section_header) + + # Add parameters in this section + for param in section_params: + param_name = param.get("Parameter", "") + param_type = param.get("Type", "Text Input") + spec = param.get("Spec", "") + options = param.get("DropdownOptions", "") + include_remarks = param.get("IncludeRemarks", "No") + clause_ref = param.get("ClauseReference", "") + + # Create display name with clause reference + display_name = param_name + if clause_ref: + display_name += f" ({clause_ref})" + + # Split options into a list if it's a string + option_list = [] + if isinstance(options, str) and options.strip(): + option_list = [opt.strip() for opt in options.split(",") if opt.strip()] + + # ENHANCED PARAMETER TYPE HANDLING + if param_type == "Image Upload": + # Create image upload tool with toggle + image_tool = { + "toolId": generate_tool_id(), + "toolType": "IMAGE", + "imageLableData": { + "text": display_name + ":", + "isBold": True, + "isItalic": False, + "isUnderlined": False, + "textAliend": "LEFT", + "fontSize": 14, + "lablePositioned": "LEFT", + "spacing": 10, + "txtColor": 4278190080, # Black + "showLable": True + }, + "imageData": { + "showImageUploadArea": True, + "width": 200, + "height": 150 + }, + "iconData": 57344, + "showIcon": False, + "iconCodePoint": 59729, + "iconSize": 30, + "iconColor": 4278190080, # Black + "toolHeight": 160, + "toolWidth": 1.7976931348623157e+308, + "showToggle": True, + "imageToggleData": { + "label": "Assessment", + "isBold": True, + "isItalic": False, + "isUnderlined": False, + "fontSize": 14, + "showLabel": True, + "enabledText": "Acceptable", + "disabledText": "Not Acceptable", + "enabledColor": 4283215696, # Green + "disabledColor": 4294198070, # Red + "isSelected": True + } + } + template["pageToolsDataList"].append(image_tool) + + elif param_type == "Toggle": + # Create toggle tool + toggle_tool = { + "toolId": generate_tool_id(), + "toolType": "TOGGLE", + "toggleData": { + "disabledColor": 4294198070, # Red + "disabledText": "Not Acceptable" if not option_list else option_list[1] if len(option_list) > 1 else "No", + "enabledColor": 4283215696, # Green + "enabledText": "Acceptable" if not option_list else option_list[0] if option_list else "Yes", + "showLabel": True, + "label": display_name, + "labelFontSize": 14, + "labelTextColor": 4278190080, # Black + "isBold": True, + "isItalic": False, + "isSelected": True, + "toggleTextFontSize": 12, + "toggleTextIsBold": False + }, + "toolWidth": 1.7976931348623157e+308, + "toolHeight": 80 + } + template["pageToolsDataList"].append(toggle_tool) + + elif param_type == "Dropdown": + # Create dropdown tool + dropdown_tool = { + "toolId": generate_tool_id(), + "toolType": "DROPDOWN", + "dropdownData": { + "hintText": f"Select {param_name.lower()}", + "hintTextColor": 4288585374, # Gray + "hintFontSize": 14, + "dropdownWidth": 350, + "spacingBetweeenLableAndDropdownWidth": 10, + "showLable": True, + "labelText": display_name, + "isBold": True, + "isItalic": False, + "isUnderlined": False, + "textAliend": "LEFT", + "lablePositioned": "TOP", + "labelFontSize": 14, + "lableTextColor": 4278190080, # Black + "numberOfOptions": len(option_list) if option_list else 3, + "optionFontSize": 14, + "optionTextColor": 4278190080, # Black + "optionLst": option_list if option_list else ["Acceptable", "Marginal", "Not Acceptable"], + "selectedOptionIndex": -1 + }, + "toolHeight": 90, + "toolWidth": 1.7976931348623157e+308 + } + template["pageToolsDataList"].append(dropdown_tool) + + elif param_type == "Checklist": + # Create checkbox tool for checklists + if not option_list: + option_list = ["Item 1", "Item 2", "Item 3"] + + checkbox_tool = { + "toolId": generate_tool_id(), + "toolType": "CHECKBOX", + "checkboxData": { + "numberOfCheckboxes": len(option_list), + "checkboxBgColor": 4294967295, # White + "spacing": 8, + "runSpacing": 8, + "checkboxTileWidth": 140, + "checkBoxAlignmentEnum": "HORIZONTAL", + "checkBoxButtonStyleEnum": "CHECKBOX", + "checkBoxPositionedEnum": "START", + "checkBoxSelectionModeEnum": "MULTIPLE", + "isBold": False, + "isItalic": False, + "isUnderlined": False, + "textAliend": "LEFT", + "fontSize": 13, + "lablePositioned": "LEFT", + "txtColor": 4278190080, # Black + "labelLst": option_list, + "showLable": True, + "selectedIndexLstForMultiSelect": [], + "selectedIndexForSingleSelect": 0 + }, + "toolWidth": 1.7976931348623157e+308, + "toolHeight": max(100, len(option_list) * 15 + 40) # Dynamic height based on items + } + + # Add section label for checklist + checklist_label = { + "toolId": generate_tool_id(), + "toolType": "TEXT", + "textData": { + "text": display_name + ":", + "isBold": True, + "isItalic": False, + "isUnderlined": False, + "textAliend": "LEFT", + "color": 4278190080, # Black + "fontSize": 14 + }, + "toolHeight": 25, + "toolWidth": 1.7976931348623157e+308 + } + template["pageToolsDataList"].append(checklist_label) + template["pageToolsDataList"].append(checkbox_tool) + + elif param_type == "Numeric Input": + # Create numeric input with specification + label_text = display_name + if spec: + label_text += f" (Spec: {spec})" + + numeric_tool = { + "toolId": generate_tool_id(), + "toolType": "TEXTAREA", + "lableData": { + "text": label_text + ":", + "isBold": True, + "isItalic": False, + "isUnderlined": False, + "textAliend": "LEFT", + "fontSize": 14, + "lablePositioned": "TOP_LEFT", + "spacing": 5, + "txtColor": 4278190080, # Black + "showLable": True + }, + "textAreaData": { + "isFilled": True, + "fillColor": 4292927712, # Light gray + "borderType": "UNDERLINED", + "storkStyle": "LINE", + "dummyTxt": "Enter numeric value" + (f" ({spec})" if spec else ""), + "borderColor": 4278190080, # Black + "isBold": False, + "isItalic": False, + "isUnderlined": False, + "fontSize": 12, + "txtColor": 4288585374 # Gray + }, + "toolHeight": 75, + "toolWidth": 1.7976931348623157e+308, + "toggleData": { + "label": "Status", + "isBold": True, + "isItalic": False, + "isUnderlined": False, + "fontSize": 12, + "showLabel": True, + "enabledText": "Within Spec", + "disabledText": "Out of Spec", + "enabledColor": 4283215696, # Green + "disabledColor": 4294198070, # Red + "isSelected": True + }, + "showToggle": True # Show toggle for spec compliance + } + template["pageToolsDataList"].append(numeric_tool) + + elif param_type == "Text Input": + # Create text input + text_tool = { + "toolId": generate_tool_id(), + "toolType": "TEXTAREA", + "lableData": { + "text": display_name + ":", + "isBold": True, + "isItalic": False, + "isUnderlined": False, + "textAliend": "LEFT", + "fontSize": 14, + "lablePositioned": "TOP_LEFT", + "spacing": 5, + "txtColor": 4278190080, # Black + "showLable": True + }, + "textAreaData": { + "isFilled": True, + "fillColor": 4292927712, # Light gray + "borderType": "UNDERLINED", + "storkStyle": "LINE", + "dummyTxt": "Enter " + param_name.lower(), + "borderColor": 4278190080, # Black + "isBold": False, + "isItalic": False, + "isUnderlined": False, + "fontSize": 12, + "txtColor": 4288585374 # Gray + }, + "toolHeight": 65, + "toolWidth": 1.7976931348623157e+308, + "showToggle": False + } + template["pageToolsDataList"].append(text_tool) + + elif param_type == "Remarks": + # Create remarks/textarea + remarks_tool = { + "toolId": generate_tool_id(), + "toolType": "TEXTAREA", + "lableData": { + "text": display_name + ":", + "isBold": True, + "isItalic": False, + "isUnderlined": False, + "textAliend": "LEFT", + "fontSize": 14, + "lablePositioned": "TOP_LEFT", + "spacing": 5, + "txtColor": 4278190080, # Black + "showLable": True + }, + "textAreaData": { + "isFilled": True, + "fillColor": 4292927712, # Light gray + "borderType": "UNDERLINED", + "storkStyle": "LINE", + "dummyTxt": "Enter detailed observations and remarks", + "borderColor": 4278190080, # Black + "isBold": False, + "isItalic": False, + "isUnderlined": False, + "fontSize": 12, + "txtColor": 4288585374 # Gray + }, + "toolHeight": 100, # Larger height for remarks + "toolWidth": 1.7976931348623157e+308, + "showToggle": False + } + template["pageToolsDataList"].append(remarks_tool) + + # Add additional remarks field if requested and not already a remarks parameter + if include_remarks == "Yes" and param_type != "Remarks": + additional_remarks = { + "toolId": generate_tool_id(), + "toolType": "TEXTAREA", + "lableData": { + "text": f"{param_name} - Additional Remarks:", + "isBold": False, + "isItalic": True, + "isUnderlined": False, + "textAliend": "LEFT", + "fontSize": 12, + "lablePositioned": "TOP_LEFT", + "spacing": 5, + "txtColor": 4278190080, # Black + "showLable": True + }, + "textAreaData": { + "isFilled": True, + "fillColor": 4292927712, # Light gray + "borderType": "UNDERLINED", + "storkStyle": "LINE", + "dummyTxt": "Additional observations or corrective actions", + "borderColor": 4278190080, # Black + "isBold": False, + "isItalic": False, + "isUnderlined": False, + "fontSize": 11, + "txtColor": 4288585374 # Gray + }, + "toolHeight": 60, + "toolWidth": 1.7976931348623157e+308, + "showToggle": False + } + template["pageToolsDataList"].append(additional_remarks) + + # Add final overall assessment section + final_assessment_header = { + "toolId": generate_tool_id(), + "toolType": "TEXT", + "textData": { + "text": "FINAL ASSESSMENT", + "isBold": True, + "isItalic": False, + "isUnderlined": True, + "textAliend": "CENTER", + "color": 4283215696, # Green + "fontSize": 14 + }, + "toolHeight": 35, + "toolWidth": 1.7976931348623157e+308 + } + template["pageToolsDataList"].append(final_assessment_header) + + # Overall quality assessment toggle + overall_toggle = { + "toolId": generate_tool_id(), + "toolType": "TOGGLE", + "toggleData": { + "disabledColor": 4294198070, # Red + "disabledText": "REJECTED", + "enabledColor": 4283215696, # Green + "enabledText": "APPROVED", + "showLabel": True, + "label": "Overall Quality Assessment", + "labelFontSize": 15, + "labelTextColor": 4278190080, # Black + "isBold": True, + "isItalic": False, + "isSelected": True, + "toggleTextFontSize": 14, + "toggleTextIsBold": True + }, + "toolWidth": 1.7976931348623157e+308, + "toolHeight": 100 + } + template["pageToolsDataList"].append(overall_toggle) + + # Inspector signature and date + inspector_info = { + "toolId": generate_tool_id(), + "toolType": "TEXTAREA", + "lableData": { + "text": "Inspector Name & Signature:", + "isBold": True, + "isItalic": False, + "isUnderlined": False, + "textAliend": "LEFT", + "fontSize": 14, + "lablePositioned": "TOP_LEFT", + "spacing": 5, + "txtColor": 4278190080, # Black + "showLable": True + }, + "textAreaData": { + "isFilled": True, + "fillColor": 4292927712, # Light gray + "borderType": "UNDERLINED", + "storkStyle": "LINE", + "dummyTxt": "Inspector name and signature", + "borderColor": 4278190080, # Black + "isBold": False, + "isItalic": False, + "isUnderlined": False, + "fontSize": 12, + "txtColor": 4288585374 # Gray + }, + "toolHeight": 80, + "toolWidth": 1.7976931348623157e+308, + "showToggle": False + } + template["pageToolsDataList"].append(inspector_info) + + # Final comprehensive remarks + final_remarks = { + "toolId": generate_tool_id(), + "toolType": "TEXTAREA", + "lableData": { + "text": "Final Comprehensive Remarks:", + "isBold": True, + "isItalic": False, + "isUnderlined": False, + "textAliend": "LEFT", + "fontSize": 14, + "lablePositioned": "TOP_LEFT", + "spacing": 5, + "txtColor": 4278190080, # Black + "showLable": True + }, + "textAreaData": { + "isFilled": True, + "fillColor": 4292927712, # Light gray + "borderType": "UNDERLINED", + "storkStyle": "LINE", + "dummyTxt": "Overall assessment, corrective actions, and additional observations", + "borderColor": 4278190080, # Black + "isBold": False, + "isItalic": False, + "isUnderlined": False, + "fontSize": 12, + "txtColor": 4288585374 # Gray + }, + "toolHeight": 120, + "toolWidth": 1.7976931348623157e+308, + "showToggle": False + } + template["pageToolsDataList"].append(final_remarks) + + return template + +# Enhanced OCR and text extraction functions +def enhanced_extract_text_from_document(filepath, file_ext): + """Enhanced text extraction with better table structure recognition""" + try: + extracted_text = "" + + if file_ext == 'pdf': + # Use PyMuPDF with enhanced table detection + pdf_document = fitz.open(filepath) + + for page_num in range(pdf_document.page_count): + page = pdf_document[page_num] + + # Try to extract text directly first + text = page.get_text() + + # If minimal text found, use OCR + if len(text.strip()) < 100: + # Convert page to high-quality image for OCR + mat = fitz.Matrix(3, 3) # Higher zoom for better OCR + pix = page.get_pixmap(matrix=mat) + img_data = pix.pil_tobytes(format="PNG") + + # Enhanced OCR with better table handling + from io import BytesIO + image = Image.open(BytesIO(img_data)) + + # Use OCR configuration optimized for tables + custom_config = r'--oem 3 --psm 6 -c preserve_interword_spaces=1' + text = pytesseract.image_to_string(image, config=custom_config) + + # Enhanced text processing to preserve table structure + processed_text = enhance_table_structure(text) + extracted_text += f"\n=== PAGE {page_num + 1} ===\n{processed_text}\n" + + pdf_document.close() + + else: # Image files + image = Image.open(filepath) + # Enhanced OCR for images with table preservation + custom_config = r'--oem 3 --psm 6 -c preserve_interword_spaces=1' + text = pytesseract.image_to_string(image, config=custom_config) + extracted_text = enhance_table_structure(text) + + return extracted_text.strip() + + except Exception as e: + print(f"Error during enhanced document processing: {str(e)}") + return None + +def enhance_table_structure(text): + """Enhance text to better preserve table structures and headings""" + if not text: + return text + + # Preserve important section headings + section_patterns = [ + (r'(ORGANOLEPTIC\s+EVALUATION)', r'\n## \1\n'), + (r'(COOKING\s+DETAILS)', r'\n## \1\n'), + (r'(PACKAGING\s*&\s*FREEZING)', r'\n## \1\n'), + (r'(FREEZING\s+DETAILS)', r'\n## \1\n'), + (r'(METAL\s+SCREENING)', r'\n## \1\n'), + (r'(SIZE\s+VARIATIONS)', r'\n## \1\n'), + (r'(COLOUR\s+VARIATIONS)', r'\n## \1\n'), + (r'(EVALUATION\s+OF\s+PASTRY)', r'\n## \1\n'), + (r'(FINAL\s+ASSESSMENT)', r'\n## \1\n'), + ] + + processed_text = text + for pattern, replacement in section_patterns: + processed_text = re.sub(pattern, replacement, processed_text, flags=re.IGNORECASE) + + # Preserve parameter-value pairs + param_patterns = [ + (r'([A-Za-z\s]+):\s*(Acceptable|Non-acceptable|Present|Absent|To be mentioned)', r'**\1**: \2'), + (r'([A-Za-z\s]+)\s+(Sam\s+\d+)', r'**\1** - \2'), + (r'(Temperature|Weight|Time|Dimension[s]?)[:\s]+([0-9\-\+\±°C\s\w]+)', r'**\1**: \2'), + ] + + for pattern, replacement in param_patterns: + processed_text = re.sub(pattern, replacement, processed_text, flags=re.IGNORECASE) + + # Clean up excessive whitespace while preserving structure + processed_text = re.sub(r'\n\s*\n\s*\n', '\n\n', processed_text) + processed_text = re.sub(r'[ \t]+', ' ', processed_text) + + return processed_text + +def enhanced_extract_metadata_from_ocr(ocr_text): + """Enhanced metadata extraction with better pattern recognition""" + # Enhanced document type detection + doc_type_patterns = { + r'(?i)(MALABAR\s*PARATHA.*INSPECTION)': "Malabar Paratha Inspection Record", + r'(?i)(GREEN\s*PEAS.*INSPECTION)': "Green Peas Inspection Record", + r'(?i)(VEGETABLE\s*SAMOSA.*INSPECTION)': "Vegetable Samosa Inspection Record", + r'(?i)(CONTAINER.*INSPECTION.*REPORT)': "Container Inspection Report", + r'(?i)quality\s*(?:control)?\s*checklist': "Quality Control Checklist", + r'(?i)inspection\s*(?:record|checklist)': "Inspection Checklist", + r'(?i)pre[\-\s]shipment.*inspection': "Pre-Shipment Inspection", + } + + detected_doc_type = "Quality Control Checklist" # Default + for pattern, doc_type in doc_type_patterns.items(): + if re.search(pattern, ocr_text): + detected_doc_type = doc_type + break + + # Enhanced product name extraction + product_patterns = [ + r'(?i)product\s*(?:name|description)[:\-\s]*([^\n]{1,50})', + r'(?i)(MALABAR\s*PARATHA)', + r'(?i)(GREEN\s*PEAS)', + r'(?i)(VEGETABLE\s*SAMOSA[S]?)', + r'(?i)(SWEET\s*CORN)', + ] + + detected_product = "Food Product" # Default + for pattern in product_patterns: + match = re.search(pattern, ocr_text) + if match: + detected_product = match.group(1).strip() + break + + # Enhanced supplier name extraction + supplier_patterns = [ + r'(?i)supplier\s*(?:name)?[:\-\s]*([^\n]{1,40})', + r'(?i)manufacturing\s*unit[:\-\s]*([^\n]{1,40})', + r'(?i)(AL\s*KABEER)', + r'(?i)(CASCADE\s*MARINE)', + r'(?i)(SAHAR\s*FOOD)', + ] + + detected_supplier = "" # Default empty + for pattern in supplier_patterns: + match = re.search(pattern, ocr_text) + if match: + detected_supplier = match.group(1).strip() + break + + return detected_doc_type, detected_product, detected_supplier + +# File upload handling +ALLOWED_EXTENSIONS = {'pdf', 'png', 'jpg', 'jpeg'} + +def allowed_file(filename): + """Check if file extension is allowed""" + return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS + +def fetch_json_from_firebase(firebase_json_url): + """Fetch JSON template from Firebase Storage URL""" + try: + response = requests.get(firebase_json_url) + if response.status_code == 200: + return response.json() + else: + return None + except Exception as e: + print(f"Error fetching JSON from Firebase: {str(e)}") + return None + +# API Routes +@app.route("/") +def index(): + return """ + + + Enhanced Swift Check LLM API + + + +
+

Enhanced Swift Check LLM API v2.0

+ +
+
+

🎯 Enhanced Intelligence

+

15+ parameter minimum, smart type detection, comprehensive coverage

+
+
+

🏛️ Regulatory Compliance

+

Dubai Municipality, HACCP, ISO standards with clause references

+
+
+

📋 Professional Format

+

Al Kabeer Group standards, section organization, comprehensive structure

+
+
+

🔍 Advanced OCR

+

Table structure recognition, header preservation, intelligent parameter extraction

+
+
+ +

API Endpoints:

+ +
+

POST /refine - Create Enhanced QC Template

+

Creates comprehensive quality control templates with 15+ parameters, regulatory compliance, and intelligent type selection.

+ +

Enhanced Features:

+
    +
  • ✅ Comprehensive RAG context from 3 vector databases
  • +
  • ✅ Minimum 15+ parameters with intelligent type selection
  • +
  • ✅ Regulatory clause references (Dubai Municipality, HACCP, ISO)
  • +
  • ✅ Professional section organization
  • +
  • ✅ Smart parameter types: Image Upload, Toggle, Checklist, Numeric, Text, Remarks
  • +
+ +

Parameters:

+
    +
  • doc_type (required) - Document type
  • +
  • product_name (required) - Product name
  • +
  • supplier_name (required) - Supplier name
  • +
  • user_message (optional) - Additional instructions
  • +
  • context_file (optional) - Reference document
  • +
+
+ +
+

POST /edit - Edit with Enhanced Context

+

Modifies existing templates using comprehensive context and intelligent parameter optimization.

+ +

Enhanced Features:

+
    +
  • ✅ Context-aware parameter suggestions
  • +
  • ✅ Intelligent type conversion and optimization
  • +
  • ✅ Regulatory compliance updates
  • +
  • ✅ Professional formatting improvements
  • +
+
+ +
+

POST /digitize - Advanced Document Digitization

+

Enhanced OCR processing with table structure recognition and intelligent parameter extraction.

+ +

Advanced Features:

+
    +
  • 🔥 Table structure preservation
  • +
  • 🔥 Section heading recognition (ORGANOLEPTIC EVALUATION, COOKING DETAILS, etc.)
  • +
  • 🔥 Intelligent parameter type inference
  • +
  • 🔥 Professional formatting maintenance
  • +
  • 🔥 Comprehensive parameter extraction (15+ parameters)
  • +
+ +

Parameters (multipart/form-data):

+
    +
  • checklist_file (required) - Scanned document
  • +
  • doc_type (optional) - Document type
  • +
  • product_name (optional) - Product name
  • +
  • supplier_name (optional) - Supplier name
  • +
+
+ +
+

GET /template/{request_id} - Get Enhanced Template JSON

+

Returns professionally formatted JSON templates with intelligent parameter types.

+
+ +
+

GET /history - View Request History

+

Browse all QC requests with enhanced preview and download options.

+
+ +
+

GET /test-rag - Test Enhanced RAG System

+

Test the comprehensive RAG system with all 3 vector databases.

+
+ +

🚀 What's New in v2.0:

+
    +
  • Comprehensive RAG Integration: Product specs + Checklist examples + Regulatory guidelines
  • +
  • 15+ Parameter Minimum: Professional depth matching Al Kabeer standards
  • +
  • Intelligent Type Selection: 7 parameter types with smart detection
  • +
  • Regulatory Compliance: Clause references and compliance tracking
  • +
  • Enhanced OCR: Table structure and section preservation
  • +
  • Professional Formatting: Section organization and comprehensive coverage
  • +
+
+ + + """ + +@app.route("/refine", methods=["POST"]) +def enhanced_refine_parameters(): + """Enhanced refine endpoint with comprehensive RAG and intelligent parameter generation""" + global global_parameters + global global_json_template + + print(">> Enhanced /refine route called <<") + + # Handle both form data and JSON + if request.content_type and request.content_type.startswith('multipart/form-data'): + data = { + "doc_type": request.form.get("doc_type", ""), + "product_name": request.form.get("product_name", ""), + "supplier_name": request.form.get("supplier_name", ""), + "user_message": request.form.get("user_message", "") + } + + # Handle file upload with enhanced OCR + uploaded_file = request.files.get('context_file') + file_context = "" + + if uploaded_file and allowed_file(uploaded_file.filename): + filename = secure_filename(uploaded_file.filename) + file_ext = filename.rsplit('.', 1)[1].lower() + + temp_dir = tempfile.mkdtemp() + filepath = os.path.join(temp_dir, filename) + uploaded_file.save(filepath) + + # Enhanced text extraction + extracted_text = enhanced_extract_text_from_document(filepath, file_ext) + + os.unlink(filepath) + os.rmdir(temp_dir) + + if extracted_text: + file_context = f"\n\nReference document content ({filename}):\n{extracted_text}" + print(f"✅ Enhanced OCR extracted {len(extracted_text)} characters from {filename}") + else: + file_context = f"\n\n[Failed to extract text from {filename}]" + + else: + data = request.get_json() + if not data: + return jsonify({"error": "No JSON payload found"}), 400 + file_context = "" + + # Validate required fields + doc_type = data.get("doc_type", "") + product_name = data.get("product_name", "") + supplier_name = data.get("supplier_name", "") + + if not doc_type: + return jsonify({"error": "doc_type is required"}), 400 + if not product_name: + return jsonify({"error": "product_name is required"}), 400 + if not supplier_name: + return jsonify({"error": "supplier_name is required"}), 400 + + # Use enhanced default prompt if none provided + user_message = data.get("user_message", "") + if not user_message: + user_message = ENHANCED_DEFAULT_REFINE_PROMPT + else: + user_message = ENHANCED_DEFAULT_REFINE_PROMPT + "\n\nAdditional instructions: " + user_message + + # Add file context to user message if available + if file_context: + user_message += file_context + + try: + con = sql.connect("swift_check.db") + cur = con.cursor() + + # Insert main request + cur.execute(""" + INSERT INTO qc_requests + (doc_type, product_name, supplier_name, user_message) + VALUES (?, ?, ?, ?) + """, (doc_type, product_name, supplier_name, user_message)) + + request_id = cur.lastrowid + print(f"✅ Created enhanced request with ID: {request_id}") + + # Call enhanced LLM with comprehensive RAG + llm_response = enhanced_call_groq_llm( + user_message=user_message, + doc_type=doc_type, + product_name=product_name, + supplier_name=supplier_name, + is_digitization=False + ) + + print("\n🎯 ENHANCED LLM RESPONSE:") + print("=" * 50) + print(llm_response[:500] + "..." if len(llm_response) > 500 else llm_response) + print("=" * 50) + + # Parse response with enhanced handling + summary_text, changes_list = parse_llm_changes(llm_response) + + # Store LLM response + cur.execute(""" + INSERT INTO llm_responses + (request_id, llm_response, summary_text) + VALUES (?, ?, ?) + """, (request_id, llm_response, summary_text)) + + # Apply changes with enhanced parameter handling + updated_params = apply_changes_to_params([], changes_list) + global_parameters = updated_params + + print(f"✅ Generated {len(updated_params)} enhanced parameters") + + # Store parameters with enhanced metadata + for param in updated_params: + cur.execute(""" + INSERT INTO parameters + (request_id, parameter_name, type, spec, dropdown_options, include_remarks, section, clause_reference) + VALUES (?, ?, ?, ?, ?, ?, ?, ?) + """, (request_id, + param.get("Parameter", ""), + param.get("Type", ""), + param.get("Spec", ""), + param.get("DropdownOptions", ""), + param.get("IncludeRemarks", "No"), + param.get("Section", "General"), + param.get("ClauseReference", ""))) + + # Generate enhanced JSON template + json_template = generate_enhanced_json_template( + doc_type=doc_type, + product_name=product_name, + supplier_name=supplier_name, + parameters=updated_params + ) + global_json_template = json_template + + # Store JSON template + cur.execute(""" + INSERT INTO json_templates + (request_id, template_json) + VALUES (?, ?) + """, (request_id, json.dumps(json_template))) + + con.commit() + con.close() + + response_data = { + "success": True, + "request_id": request_id, + "message": f"Enhanced QC template created with {len(updated_params)} comprehensive parameters", + "summary": summary_text, + "parameters_count": len(updated_params), + "enhancements": { + "comprehensive_rag": True, + "regulatory_compliance": True, + "intelligent_types": True, + "minimum_15_params": len(updated_params) >= 15 + } + } + + if file_context: + response_data["file_info"] = f"Enhanced OCR processed {filename}" if 'filename' in locals() else "File processed with enhanced OCR" + + return jsonify(response_data) + + except Exception as e: + print(f"❌ Error in enhanced /refine: {str(e)}") + if 'con' in locals(): + con.rollback() + con.close() + return jsonify({"error": str(e)}), 500 + +@app.route("/edit", methods=["POST"]) +def enhanced_edit_parameters(): + """Enhanced edit endpoint with comprehensive context and intelligent optimization""" + global global_parameters + global global_json_template + + print(">> Enhanced /edit route called <<") + + # Handle both form data and JSON + if request.content_type and request.content_type.startswith('multipart/form-data'): + data = { + "request_id": request.form.get("request_id"), + "firebase_json_url": request.form.get("firebase_json_url"), + "user_message": request.form.get("user_message", "") + } + + # Handle file upload with enhanced OCR + uploaded_file = request.files.get('context_file') + file_context = "" + + if uploaded_file and allowed_file(uploaded_file.filename): + filename = secure_filename(uploaded_file.filename) + file_ext = filename.rsplit('.', 1)[1].lower() + + temp_dir = tempfile.mkdtemp() + filepath = os.path.join(temp_dir, filename) + uploaded_file.save(filepath) + + extracted_text = enhanced_extract_text_from_document(filepath, file_ext) + + os.unlink(filepath) + os.rmdir(temp_dir) + + if extracted_text: + file_context = f"\n\nReference document content ({filename}):\n{extracted_text}" + print(f"✅ Enhanced OCR extracted {len(extracted_text)} characters from {filename}") + else: + file_context = f"\n\n[Failed to extract text from {filename}]" + + else: + data = request.get_json() + if not data: + return jsonify({"error": "No JSON payload found"}), 400 + file_context = "" + + # Validate required fields + user_message = data.get("user_message", "") + if not user_message: + return jsonify({"error": "user_message is required for editing"}), 400 + + request_id = data.get("request_id") + firebase_json_url = data.get("firebase_json_url") + + if not request_id and not firebase_json_url: + return jsonify({"error": "Either request_id or firebase_json_url is required"}), 400 + + # Add file context to user message if available + if file_context: + user_message += file_context + + try: + existing_parameters = [] + doc_type = "" + product_name = "" + supplier_name = "" + + con = sql.connect("swift_check.db") + cur = con.cursor() + + if request_id: + try: + request_id = int(request_id) + except ValueError: + return jsonify({"error": "request_id must be a valid integer"}), 400 + + # Fetch original request data with enhanced metadata + cur.execute(""" + SELECT doc_type, product_name, supplier_name + FROM qc_requests + WHERE id = ? + """, (request_id,)) + + original_data = cur.fetchone() + if not original_data: + con.close() + return jsonify({"error": f"Request ID {request_id} not found"}), 404 + + doc_type, product_name, supplier_name = original_data + + # Fetch existing parameters with enhanced metadata + cur.execute(""" + SELECT parameter_name, type, spec, dropdown_options, include_remarks, section, clause_reference + FROM parameters + WHERE request_id = ? + ORDER BY id + """, (request_id,)) + + param_rows = cur.fetchall() + existing_parameters = [ + { + "Parameter": row[0], + "Type": row[1], + "Spec": row[2], + "DropdownOptions": row[3], + "IncludeRemarks": row[4], + "Section": row[5] or "General", + "ClauseReference": row[6] or "" + } + for row in param_rows + ] + + elif firebase_json_url: + # Enhanced Firebase template processing + template_data = fetch_json_from_firebase(firebase_json_url) + if not template_data: + con.close() + return jsonify({"error": "Failed to fetch template from Firebase URL"}), 400 + + # Enhanced parameter extraction from Firebase JSON + existing_parameters = [] + + for tool in template_data.get("pageToolsDataList", []): + tool_type = tool.get("toolType", "") + + if tool_type == "DROPDOWN": + dropdown_data = tool.get("dropdownData", {}) + existing_parameters.append({ + "Parameter": dropdown_data.get("labelText", "Dropdown Field"), + "Type": "Dropdown", + "Spec": "", + "DropdownOptions": ", ".join(dropdown_data.get("optionLst", [])), + "IncludeRemarks": "No", + "Section": "General", + "ClauseReference": "" + }) + elif tool_type == "CHECKBOX": + checkbox_data = tool.get("checkboxData", {}) + existing_parameters.append({ + "Parameter": "Checklist Group", + "Type": "Checklist", + "Spec": "", + "DropdownOptions": ", ".join(checkbox_data.get("labelLst", [])), + "IncludeRemarks": "No", + "Section": "General", + "ClauseReference": "" + }) + elif tool_type == "IMAGE": + image_data = tool.get("imageLableData", {}) + existing_parameters.append({ + "Parameter": image_data.get("text", "Image Upload").replace(":", ""), + "Type": "Image Upload", + "Spec": "Visual inspection with photo evidence", + "DropdownOptions": "", + "IncludeRemarks": "Yes", + "Section": "Visual Inspection", + "ClauseReference": "" + }) + elif tool_type == "TOGGLE": + toggle_data = tool.get("toggleData", {}) + existing_parameters.append({ + "Parameter": toggle_data.get("label", "Toggle Assessment"), + "Type": "Toggle", + "Spec": "", + "DropdownOptions": f"{toggle_data.get('enabledText', 'Yes')}, {toggle_data.get('disabledText', 'No')}", + "IncludeRemarks": "No", + "Section": "Assessment", + "ClauseReference": "" + }) + elif tool_type == "TEXTAREA": + label_data = tool.get("lableData", {}) + text_area_data = tool.get("textAreaData", {}) + label_text = label_data.get("text", "").replace(":", "") + + if "Remarks" in label_text or "remarks" in text_area_data.get("dummyTxt", ""): + param_type = "Remarks" + elif "numeric" in text_area_data.get("dummyTxt", "").lower(): + param_type = "Numeric Input" + else: + param_type = "Text Input" + + existing_parameters.append({ + "Parameter": label_text, + "Type": param_type, + "Spec": "", + "DropdownOptions": "", + "IncludeRemarks": "No", + "Section": "General", + "ClauseReference": "" + }) + + # Extract basic info from template + for tool in template_data.get("pageToolsDataList", []): + if tool.get("toolType") == "HEADING": + title_text = tool.get("textData", {}).get("text", "") + parts = title_text.split(" ", 1) + if len(parts) >= 2: + product_name = parts[0] + doc_type = parts[1] + else: + product_name = title_text + doc_type = "Inspection Document" + break + + if not product_name: + product_name = "Product" + if not doc_type: + doc_type = "Inspection Document" + + # Find supplier info + for tool in template_data.get("pageToolsDataList", []): + if tool.get("toolType") == "TEXT": + text = tool.get("textData", {}).get("text", "") + if "Supplier" in text: + supplier_name = text.replace("Supplier Name:", "").strip() + break + + if not supplier_name: + supplier_name = "Unknown Supplier" + + # Generate enhanced version ID + new_version_id = None + if request_id: + base_id = str(request_id) + cur.execute(""" + SELECT id FROM qc_requests + WHERE CAST(id AS TEXT) LIKE ? OR id = ? + ORDER BY id DESC + """, (base_id + '%', request_id)) + + existing_versions = [row[0] for row in cur.fetchall()] + + if request_id < 10: + new_version_id = int(base_id + str(len(existing_versions) + 1)) + else: + base = int(str(request_id)[0]) + new_version_id = int(str(base) + str(len([v for v in existing_versions if str(v).startswith(str(base))]) + 1)) + + # Insert new enhanced version + cur.execute(""" + INSERT INTO qc_requests + (doc_type, product_name, supplier_name, user_message) + VALUES (?, ?, ?, ?) + """, (doc_type, product_name, supplier_name, user_message)) + + if new_version_id: + cur.execute("UPDATE qc_requests SET id = ? WHERE id = last_insert_rowid()", (new_version_id,)) + created_id = new_version_id + else: + cur.execute("SELECT last_insert_rowid()") + created_id = cur.fetchone()[0] + + print(f"✅ Created enhanced edit version with ID: {created_id}") + + # Call enhanced LLM with comprehensive context + enhanced_message = f"ENHANCED EDIT REQUEST: {user_message}\n\nExisting parameters for optimization and enhancement: {len(existing_parameters)} parameters" + llm_response = enhanced_call_groq_llm( + user_message=enhanced_message, + doc_type=doc_type, + product_name=product_name, + supplier_name=supplier_name, + existing_parameters=existing_parameters, + is_digitization=False + ) + + print(f"\n🎯 ENHANCED EDIT LLM RESPONSE:") + print("=" * 50) + print(llm_response[:500] + "..." if len(llm_response) > 500 else llm_response) + print("=" * 50) + + # Parse and apply changes with enhanced handling + summary_text, changes_list = parse_llm_changes(llm_response) + + cur.execute(""" + INSERT INTO llm_responses + (request_id, llm_response, summary_text) + VALUES (?, ?, ?) + """, (created_id, llm_response, summary_text)) + + updated_params = apply_changes_to_params(existing_parameters, changes_list) + global_parameters = updated_params + + print(f"✅ Enhanced edit generated {len(updated_params)} optimized parameters") + + # Store enhanced parameters + for param in updated_params: + cur.execute(""" + INSERT INTO parameters + (request_id, parameter_name, type, spec, dropdown_options, include_remarks, section, clause_reference) + VALUES (?, ?, ?, ?, ?, ?, ?, ?) + """, (created_id, + param.get("Parameter", ""), + param.get("Type", ""), + param.get("Spec", ""), + param.get("DropdownOptions", ""), + param.get("IncludeRemarks", "No"), + param.get("Section", "General"), + param.get("ClauseReference", ""))) + + # Generate enhanced JSON template + json_template = generate_enhanced_json_template( + doc_type=doc_type, + product_name=product_name, + supplier_name=supplier_name, + parameters=updated_params + ) + global_json_template = json_template + + cur.execute(""" + INSERT INTO json_templates + (request_id, template_json) + VALUES (?, ?) + """, (created_id, json.dumps(json_template))) + + con.commit() + con.close() + + response_data = { + "success": True, + "request_id": created_id, + "message": f"Enhanced template edited with {len(updated_params)} optimized parameters", + "summary": summary_text, + "parameters_count": len(updated_params), + "enhancements": { + "context_aware_editing": True, + "intelligent_optimization": True, + "regulatory_compliance": True, + "comprehensive_coverage": len(updated_params) >= 15 + } + } + + if request_id: + response_data["original_request_id"] = request_id + if firebase_json_url: + response_data["firebase_json_url"] = firebase_json_url + if file_context: + response_data["file_info"] = f"Enhanced OCR processed {filename}" if 'filename' in locals() else "File processed with enhanced OCR" + + return jsonify(response_data) + + except Exception as e: + print(f"❌ Error in enhanced /edit: {str(e)}") + if 'con' in locals(): + con.rollback() + con.close() + return jsonify({"error": str(e)}), 500 + +@app.route("/digitize", methods=["POST"]) +def enhanced_digitize_checklist(): + """Enhanced digitization with advanced OCR and intelligent parameter extraction""" + print(">> Enhanced /digitize route called <<") + + if 'checklist_file' not in request.files: + return jsonify({"error": "No file uploaded"}), 400 + + file = request.files['checklist_file'] + + if file.filename == '': + return jsonify({"error": "No file selected"}), 400 + + if not allowed_file(file.filename): + return jsonify({"error": "Invalid file type. Allowed: PDF, PNG, JPG, JPEG"}), 400 + + # Get optional parameters + doc_type = request.form.get("doc_type", "") + product_name = request.form.get("product_name", "") + supplier_name = request.form.get("supplier_name", "") + + try: + filename = secure_filename(file.filename) + temp_dir = tempfile.mkdtemp() + filepath = os.path.join(temp_dir, filename) + file.save(filepath) + + # Enhanced text extraction with table structure preservation + file_ext = filename.rsplit('.', 1)[1].lower() + extracted_text = enhanced_extract_text_from_document(filepath, file_ext) + + os.unlink(filepath) + os.rmdir(temp_dir) + + if not extracted_text: + return jsonify({"error": "Failed to extract text from file"}), 500 + + print(f"✅ Enhanced OCR extracted {len(extracted_text)} characters from {filename}") + print(f"📄 Preview: {extracted_text[:300]}...") + + # Enhanced metadata extraction + if not doc_type or not product_name or not supplier_name: + detected_doc_type, detected_product, detected_supplier = enhanced_extract_metadata_from_ocr(extracted_text) + + if not doc_type: + doc_type = detected_doc_type + if not product_name: + product_name = detected_product + if not supplier_name: + supplier_name = detected_supplier + + # Enhanced LLM processing for digitization + llm_prompt = f""" +I've extracted text from a scanned QC checklist using enhanced OCR with table structure preservation. + +DOCUMENT ANALYSIS: +- File: {filename} +- Detected Document Type: {doc_type} +- Detected Product: {product_name} +- Detected Supplier: {supplier_name} + +EXTRACTED TEXT WITH STRUCTURE: +{extracted_text} + +Please perform COMPREHENSIVE DIGITIZATION with: + +1. **TABLE STRUCTURE PRESERVATION**: Maintain section headings and organization +2. **INTELLIGENT PARAMETER EXTRACTION**: Convert each item to appropriate parameter type +3. **SPECIFICATION EXTRACTION**: Capture tolerance limits, measurement units, acceptable ranges +4. **REGULATORY COMPLIANCE**: Include any regulatory references or compliance requirements +5. **COMPREHENSIVE COVERAGE**: Ensure minimum 15+ parameters for professional QC checklist + +Focus on creating a PROFESSIONAL, COMPREHENSIVE parameter set that maintains the structure and intelligence of the original document while using modern parameter types and ensuring regulatory compliance. +""" + + GROQ_API_KEY = "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" + + if not Groq: + return jsonify({"error": "Groq library not available"}), 500 + + # Call enhanced LLM for digitization + llm_response = enhanced_call_groq_llm( + user_message=llm_prompt, + doc_type=doc_type, + product_name=product_name, + supplier_name=supplier_name, + is_digitization=True + ) + + print(f"\n🎯 ENHANCED DIGITIZATION LLM RESPONSE:") + print("=" * 50) + print(llm_response[:500] + "..." if len(llm_response) > 500 else llm_response) + print("=" * 50) + + # Parse parameters with enhanced handling + json_array_text = extract_top_level_json_array(llm_response) + parameters = [] + + if json_array_text: + try: + parameters = json.loads(json_array_text) + # Enhanced parameter processing + processed_params = [] + for param in parameters: + if isinstance(param, dict) and param.get("Parameter", "").strip(): + # Ensure parameter has meaningful content + param_name = param.get("Parameter", "").strip() + if param_name and param_name.lower() not in ["unknown", "parameter", "option", "item"]: + processed_params.append(param) + parameters = processed_params + except Exception as e: + print(f"❌ JSON parse error: {e}") + return jsonify({"error": f"Failed to parse enhanced LLM response: {str(e)}"}), 500 + + if not parameters: + return jsonify({"error": "No meaningful parameters extracted from document"}), 500 + + # Save to database with enhanced metadata + con = sql.connect("swift_check.db") + cur = con.cursor() + + cur.execute(""" + INSERT INTO qc_requests + (doc_type, product_name, supplier_name) + VALUES (?, ?, ?) + """, (doc_type, product_name, supplier_name)) + + request_id = cur.lastrowid + + # Store enhanced LLM response + cur.execute(""" + INSERT INTO llm_responses + (request_id, llm_response, summary_text) + VALUES (?, ?, ?) + """, (request_id, llm_response, f"Enhanced digitization: {len(parameters)} comprehensive parameters extracted from {filename}")) + + # Store parameters with enhanced metadata + for param in parameters: + options = param.get("DropdownOptions", "") + if not options: + options = param.get("ChecklistOptions", "") + if isinstance(options, list): + options = ", ".join(options) + + cur.execute(""" + INSERT INTO parameters + (request_id, parameter_name, type, spec, dropdown_options, include_remarks, section, clause_reference) + VALUES (?, ?, ?, ?, ?, ?, ?, ?) + """, (request_id, + param.get("Parameter", ""), + param.get("Type", "Text Input"), + param.get("Spec", ""), + options, + param.get("IncludeRemarks", "No"), + param.get("Section", "General"), + param.get("ClauseReference", ""))) + + # Generate enhanced JSON template + json_template = generate_enhanced_json_template( + doc_type=doc_type, + product_name=product_name, + supplier_name=supplier_name, + parameters=parameters + ) + + cur.execute(""" + INSERT INTO json_templates + (request_id, template_json) + VALUES (?, ?) + """, (request_id, json.dumps(json_template))) + + con.commit() + con.close() + + # Enhanced response data + response_data = { + "success": True, + "request_id": request_id, + "message": f"Enhanced digitization: {len(parameters)} comprehensive parameters extracted from {filename}", + "parameters_count": len(parameters), + "extracted_parameters": [p.get("Parameter", "") for p in parameters], + "doc_type": doc_type, + "product_name": product_name, + "supplier_name": supplier_name, + "enhancements": { + "table_structure_preserved": True, + "intelligent_type_detection": True, + "comprehensive_extraction": len(parameters) >= 10, + "specification_extraction": any(p.get("Spec") for p in parameters), + "section_organization": any(p.get("Section") != "General" for p in parameters) + }, + "file_processing": { + "filename": filename, + "text_extracted": len(extracted_text), + "ocr_enhanced": True + } + } + + return jsonify(response_data) + + except Exception as e: + print(f"❌ Error in enhanced /digitize: {str(e)}") + import traceback + traceback.print_exc() + return jsonify({"error": str(e)}), 500 + +# Existing routes with enhanced features +@app.route("/history", methods=["GET"]) +def enhanced_view_history(): + """Enhanced history view with additional metadata""" + if request.headers.get('Accept') == 'application/json' or request.args.get('format') == 'json': + try: + con = sql.connect("swift_check.db") + cur = con.cursor() + + # Enhanced query with parameter counts + cur.execute(""" + SELECT r.id, r.doc_type, r.product_name, r.supplier_name, r.created_at, + COUNT(p.id) as parameter_count + FROM qc_requests r + LEFT JOIN parameters p ON r.id = p.request_id + GROUP BY r.id + ORDER BY r.created_at DESC + """) + + rows = cur.fetchall() + con.close() + + return jsonify([{ + "id": row[0], + "doc_type": row[1], + "product_name": row[2], + "supplier_name": row[3], + "created_at": row[4], + "parameter_count": row[5] + } for row in rows]) + + except Exception as e: + return jsonify({"error": str(e)}), 500 + + # Enhanced HTML view + try: + con = sql.connect("swift_check.db") + cur = con.cursor() + + cur.execute(""" + SELECT r.id, r.doc_type, r.product_name, r.supplier_name, r.created_at, + COUNT(p.id) as parameter_count + FROM qc_requests r + LEFT JOIN parameters p ON r.id = p.request_id + GROUP BY r.id + ORDER BY r.created_at DESC + """) + + rows = cur.fetchall() + con.close() + + html = """ + + + Enhanced QC Request History + + + +
+

Enhanced QC Request History v2.0

+ + + + + + + + + + + """ + + for row in rows: + param_badge = "🎯" if row[5] >= 15 else "⚠️" if row[5] >= 10 else "❌" + html += f""" + + + + + + + + + + """ + + html += """ +
IDProductDoc TypeSupplierParametersCreatedActions
{row[0]}{row[2]}{row[1]}{row[3]}{param_badge} {row[5]} params{row[4]} + Preview + JSON +
+
+ Legend: + 🎯 15+ params (Professional) | + ⚠️ 10-14 params (Good) | + ❌ <10 params (Basic) +
+
+ + + """ + return html + + except Exception as e: + return f"

Error

{str(e)}

", 500 + +@app.route("/template/", methods=["GET"]) +def get_enhanced_template_json(request_id): + """Get enhanced template JSON by request ID""" + try: + con = sql.connect("swift_check.db") + cur = con.cursor() + + cur.execute(""" + SELECT template_json + FROM json_templates + WHERE request_id = ? + """, (request_id,)) + + result = cur.fetchone() + con.close() + + if result: + template_data = json.loads(result[0]) + return jsonify(template_data) + else: + return jsonify({"error": f"Enhanced template not found for request ID {request_id}"}), 404 + + except Exception as e: + print(f"❌ Error in /template/{request_id}: {str(e)}") + return jsonify({"error": str(e)}), 500 + +@app.route("/preview/", methods=["GET"]) +def enhanced_preview_page(request_id): + """Enhanced preview with better formatting and metadata""" + try: + con = sql.connect("swift_check.db") + cur = con.cursor() + + # Get template JSON + cur.execute(""" + SELECT template_json + FROM json_templates + WHERE request_id = ? + """, (request_id,)) + + template_result = cur.fetchone() + + # Get enhanced parameters + cur.execute(""" + SELECT parameter_name, type, spec, dropdown_options, include_remarks, section, clause_reference + FROM parameters + WHERE request_id = ? + ORDER BY id + """, (request_id,)) + + parameters = cur.fetchall() + + # Get request details + cur.execute(""" + SELECT doc_type, product_name, supplier_name + FROM qc_requests + WHERE id = ? + """, (request_id,)) + + request_details = cur.fetchone() + con.close() + + if not template_result: + return f""" + + Not Found + +

Enhanced Template not found

+

No template exists for request ID {request_id}

+ View History + + + """, 404 + + json_template = json.loads(template_result[0]) + + # Generate enhanced ASCII preview with sections + ascii_preview = "╔══════════════════════════════════════════════════════════════════════╗\n" + + if request_details: + header = f"{request_details[1]} {request_details[0]}" + else: + header = "Enhanced QC Template" + + header_padding = (70 - len(header)) // 2 + ascii_preview += f"║{' ' * header_padding}{header}{' ' * (70 - header_padding - len(header))}║\n" + + if request_details and request_details[2]: + supplier = f"Supplier: {request_details[2]}" + supplier_padding = (70 - len(supplier)) // 2 + ascii_preview += f"║{' ' * supplier_padding}{supplier}{' ' * (70 - supplier_padding - len(supplier))}║\n" + + ascii_preview += "╚══════════════════════════════════════════════════════════════════════╝\n\n" + + # Group parameters by section + sections = {} + for param in parameters: + param_name, param_type, spec, options, include_remarks, section, clause_ref = param + section = section or "General Parameters" + if section not in sections: + sections[section] = [] + sections[section].append(param) + + # Add parameters organized by sections + for section_name, section_params in sections.items(): + ascii_preview += f"\n🔹 {section_name.upper()}\n" + ascii_preview += "─" * 60 + "\n" + + for param in section_params: + param_name, param_type, spec, options, include_remarks, section, clause_ref = param + + # Add clause reference if available + display_name = param_name + if clause_ref: + display_name += f" ({clause_ref})" + + if param_type == "Image Upload": + ascii_preview += f"[📷] {display_name}: [ Upload Photo ] + Toggle Assessment\n" + elif param_type == "Toggle": + ascii_preview += f"[◐] {display_name}: ● Acceptable ○ Not Acceptable\n" + elif param_type == "Dropdown": + ascii_preview += f"[▼] {display_name}: _________________ " + if options: + option_list = [opt.strip() for opt in options.split(",")[:3]] + ascii_preview += f"({', '.join(option_list)}{'...' if len(options.split(',')) > 3 else ''})\n" + else: + ascii_preview += "\n" + elif param_type == "Checklist": + ascii_preview += f" {display_name}:\n" + if options: + option_list = [opt.strip() for opt in options.split(",")] + for opt in option_list[:5]: + ascii_preview += f" ☐ {opt}\n" + if len(option_list) > 5: + ascii_preview += f" ... and {len(option_list) - 5} more items\n" + else: + ascii_preview += " ☐ Item 1\n" + elif param_type == "Numeric Input": + ascii_preview += f"[#️⃣] {display_name}: _____________" + if spec: + ascii_preview += f" (Spec: {spec})\n" + else: + ascii_preview += "\n" + elif param_type == "Text Input": + ascii_preview += f"[✏️] {display_name}: _____________________________\n" + elif param_type == "Remarks": + ascii_preview += f"[📝] {display_name}:\n" + ascii_preview += " ┌─────────────────────────────────────┐\n" + ascii_preview += " │ │\n" + ascii_preview += " │ │\n" + ascii_preview += " └─────────────────────────────────────┘\n" + + if include_remarks == "Yes" and param_type != "Remarks": + ascii_preview += f" └─ Additional Remarks: _______________________\n" + + ascii_preview += "\n" + + # Add enhanced final assessment + ascii_preview += "═" * 70 + "\n" + ascii_preview += "🎯 FINAL ASSESSMENT\n" + ascii_preview += "═" * 70 + "\n" + ascii_preview += "[✅] Overall Quality Assessment: ● APPROVED ○ REJECTED\n\n" + ascii_preview += "[👤] Inspector Name & Signature: _________________________________\n\n" + ascii_preview += "[📝] Final Comprehensive Remarks:\n" + ascii_preview += " ┌─────────────────────────────────���───────────────────────────┐\n" + ascii_preview += " │ Overall assessment, corrective actions, and observations │\n" + ascii_preview += " │ │\n" + ascii_preview += " │ │\n" + ascii_preview += " └─────────────────────────────────────────────────────────────┘\n" + + # Enhanced statistics + total_params = len(parameters) + param_types = {} + sections_count = len(sections) + regulatory_refs = sum(1 for param in parameters if param[6]) # clause references + + for param in parameters: + param_type = param[1] + param_types[param_type] = param_types.get(param_type, 0) + 1 + + stats_html = f""" +
+

📊 Template Statistics

+
+
+ {total_params}
+ Total Parameters +
+
+ {sections_count}
+ Organized Sections +
+
+ {regulatory_refs}
+ Regulatory References +
+
+ {len(param_types)}
+ Parameter Types Used +
+
+

Parameter Type Distribution:

+
    + """ + + for ptype, count in param_types.items(): + emoji = {"Image Upload": "📷", "Toggle": "◐", "Dropdown": "▼", "Checklist": "☐", + "Numeric Input": "#️⃣", "Text Input": "✏️", "Remarks": "📝"}.get(ptype, "•") + stats_html += f"
  • {emoji} {ptype}: {count} parameters
  • " + + stats_html += """ +
+
+ """ + + html = f""" + + + Enhanced QC Template Preview - Request #{request_id} + + + +
+

Enhanced QC Template Preview - Request #{request_id} + v2.0 + {'🎯 Professional' if total_params >= 15 else '⚠️ Good' if total_params >= 10 else '❌ Basic'} +

+ + {stats_html} + +
+

🖥️ Enhanced ASCII Preview

+
{ascii_preview}
+
+ +
+

📋 Enhanced JSON Template

+
+ + + +
+ +
+ +
+ + +
+
+ + + + + """ + return html + + except Exception as e: + print(f"❌ Error in enhanced /preview/{request_id}: {str(e)}") + return f"

Error

{str(e)}

", 500 + +@app.route("/test-rag", methods=["GET"]) +def test_enhanced_rag(): + """Test enhanced RAG functionality with comprehensive context""" + try: + test_product = request.args.get('product', 'Malabar Paratha') + test_domain = request.args.get('domain', 'Food Manufacturing') + + print(f"\n🧪 Testing Enhanced RAG for {test_product} in {test_domain}") + + # Get comprehensive context from all VDBs + comprehensive_context = get_comprehensive_context(test_product, test_domain) + + # Format context for display + formatted_context = format_context_for_prompt(comprehensive_context, max_length=6000) + + results = { + "test_parameters": { + "product": test_product, + "domain": test_domain + }, + "comprehensive_context": { + "regulatory_requirements": len(comprehensive_context.get("regulatory_requirements", [])), + "product_specifications": len(comprehensive_context.get("product_specifications", [])), + "checklist_examples": len(comprehensive_context.get("checklist_examples", [])), + "parameter_patterns": len(comprehensive_context.get("parameter_patterns", [])), + }, + "context_summary": comprehensive_context.get("context_summary", {}), + "rag_quality": { + "total_sources": ( + len(comprehensive_context.get("regulatory_requirements", [])) + + len(comprehensive_context.get("product_specifications", [])) + + len(comprehensive_context.get("checklist_examples", [])) + ), + "regulatory_compliance": len(comprehensive_context.get("regulatory_requirements", [])) > 0, + "product_depth_reference": len(comprehensive_context.get("product_specifications", [])) > 0, + "professional_examples": len(comprehensive_context.get("checklist_examples", [])) > 0, + "parameter_intelligence": len(comprehensive_context.get("parameter_patterns", [])) > 0 + } + } + + print(f"✅ Enhanced RAG Test Complete: {results['rag_quality']['total_sources']} sources retrieved") + + if request.headers.get('Accept') == 'application/json': + return jsonify(results) + else: + # Enhanced HTML view + html = f""" + + + Enhanced RAG Test Results + + + +
+

Enhanced RAG Test Results v2.0

+

Testing: "{test_product}" in {test_domain}

+ +
+

🎯 RAG Quality Assessment

+
+
+ Total Sources Retrieved:
+ + {results['rag_quality']['total_sources']} sources + +
+
+ Regulatory Compliance:
+ + {'✅ Available' if results['rag_quality']['regulatory_compliance'] else '❌ Missing'} + +
+
+ Product Depth Reference:
+ + {'✅ Available' if results['rag_quality']['product_depth_reference'] else '❌ Missing'} + +
+
+ Professional Examples:
+ + {'✅ Available' if results['rag_quality']['professional_examples'] else '❌ Missing'} + +
+
+
+ +
+

📊 Context Retrieved

+
    +
  • Regulatory Requirements: {results['comprehensive_context']['regulatory_requirements']} documents
  • +
  • Product Specifications: {results['comprehensive_context']['product_specifications']} references
  • +
  • Checklist Examples: {results['comprehensive_context']['checklist_examples']} samples
  • +
  • Parameter Patterns: {results['comprehensive_context']['parameter_patterns']} intelligent patterns
  • +
+
+ +
+

🔍 Context Preview

+
+
{formatted_context[:2000]}{'...' if len(formatted_context) > 2000 else ''}
+
+
+ +
+

🧪 Test Other Products

+

Try these Enhanced RAG tests:

+ +
+
+ + + """ + + return html + + except Exception as e: + print(f"❌ Error in Enhanced RAG test: {str(e)}") + import traceback + traceback.print_exc() + + error_response = { + "error": str(e), + "traceback": traceback.format_exc() + } + + if request.headers.get('Accept') == 'application/json': + return jsonify(error_response), 500 + else: + return f""" + + Enhanced RAG Test Error + +

Error Testing Enhanced RAG

+
{error_response['traceback']}
+ + + """, 500 + +if __name__ == "__main__": + print("🚀 Starting Enhanced Swift Check API v2.0...") + print("✅ Comprehensive RAG Integration") + print("✅ 15+ Parameter Minimum") + print("✅ Intelligent Type Selection") + print("✅ Regulatory Compliance") + print("✅ Enhanced OCR Processing") + print("✅ Professional Formatting") app.run(host="127.0.0.1", port=5000, debug=True) \ No newline at end of file