diff --git "a/app.py" "b/app.py" --- "a/app.py" +++ "b/app.py" @@ -1,2787 +1,2787 @@ -import json -import re -import random -import sqlite3 as sql -from datetime import datetime -import string -import os -import tempfile -from PIL import Image -import pytesseract -import fitz -from werkzeug.utils import secure_filename -from flask import Flask, request, jsonify, redirect, url_for, render_template_string -from pathlib import Path -import requests - -# Import enhanced RAG utilities -from enhanced_rag_utils import get_comprehensive_context, format_context_for_prompt - -try: - from groq import Groq -except ImportError: - Groq = None - -app = Flask(__name__) -global_parameters = [] -global_json_template = {} - -# Enhanced system prompt with comprehensive QC requirements -ENHANCED_SYSTEM_PROMPT = """ -You are the Swift Check AI assistant, specialized in creating comprehensive Quality Control (QC) checklists and inspection documents for food products with full regulatory compliance. - -# CONTEXT: -You'll help users generate custom QC parameters for various food products following Al Kabeer Group's professional standards. The parameters will be used in quality inspection checklists that QC inspectors fill during product inspections, with full regulatory backing and clause references. - -# COMPREHENSIVE QC CHECKLIST REQUIREMENTS: - -## For Food Products, ALWAYS include these categories (MINIMUM 15+ PARAMETERS): - -### 1. Physical Parameters (4-5 parameters) -- Appearance (Image Upload + Toggle): Color, visual defects, physical state with photo evidence -- Texture (Dropdown + Remarks): Firmness, consistency, crispness with detailed observations -- Size/Dimensions (Numeric Input): Length, width, diameter with tolerance specs (e.g., "60±5mm") -- Weight (Numeric Input): Individual/batch weight with tolerance (e.g., "25±2g") -- Shape (Dropdown): Uniformity, deformation assessment - -### 2. Sensory Parameters (3-4 parameters) -- Flavor/Taste (Dropdown + Remarks): Characteristic flavors, off-tastes, intensity -- Aroma/Odor (Dropdown + Remarks): Normal smell, off-odors, freshness -- Mouthfeel (Dropdown): For applicable products (texture after cooking) -- Overall Sensory Assessment (Toggle): Acceptable/Not Acceptable - -### 3. Safety Parameters (4-5 parameters) -- Foreign Objects (Checklist + Image Upload): MUST include comprehensive list: stones, glass, metals, plastic, wood, insects/pests, hair, threads, paper, bones, feathers -- Microbiological Specifications (Table/Numeric Input): Total Plate Count, E.coli, Salmonella, etc. with limits -- Chemical Contaminants (Numeric Input): Heavy metals, pesticides if applicable with ppm limits -- Allergen Declaration (Checklist): All 14 major allergens verification -- Metal Detection Results (Text Input + Toggle): Fe, Non-Fe, SS readings with pass/fail - -### 4. Product-Specific Parameters (2-3 parameters) -- For filled products: Filling weight ratio, filling consistency -- For fried products: Oil absorption, crispness level -- For frozen products: Freezer burn check, ice crystals, clustering -- For baked products: Browning level, doneness, internal temperature - -### 5. Packaging Parameters (3-4 parameters) -- Packaging Integrity (Image Upload + Checklist): Sealing, tears, punctures, label accuracy with photo -- Net Weight Verification (Numeric Input): Package weight vs declared weight with tolerance -- Date Verification (Text Input): Best before date, production date accuracy -- Batch/Lot Traceability (Text Input): Batch code, lot number verification - -### 6. Process Control Parameters (2-3 parameters) -- Temperature Control (Numeric Input): Processing, storage, transport temperatures with specs -- Time Parameters (Numeric Input): Processing time, cooling time with specifications -- Equipment Calibration (Toggle + Text Input): Calibration status, last calibration date - -### 7. Compliance & Documentation (2-3 parameters) -- Regulatory Compliance (Checklist): HACCP, Dubai Municipality, ISO requirements -- Documentation Complete (Toggle): All required certificates present -- Inspector Assessment (Toggle + Remarks): Overall quality assessment with detailed remarks - -# PARAMETER TYPES AND INTELLIGENT SELECTION: - -## Image Upload - USE FOR: -- Visual inspections (appearance, defects, packaging condition) -- Evidence documentation (defects, foreign objects) -- Label verification and batch code photos -- Before/after cooking comparisons - -## Toggle - USE FOR: -- Pass/fail decisions (acceptable/not acceptable) -- Present/absent checks (clustering, defects) -- Compliance status (passed/failed) -- Binary quality assessments - -## Checklist - USE FOR: -- Foreign objects (comprehensive list of all possible contaminants) -- Allergens (all 14 major allergens) -- Packaging defects (multiple possible issues) -- Compliance requirements (multiple standards) -- Multi-item verification lists - -## Numeric Input - USE FOR: -- Measurements WITH specifications and units -- Weight: "25±2g", "165±5g" -- Dimensions: "60±5mm length", "7-8 inch diameter" -- Temperature: "-18°C ±2°C", "180°C ±10°C" -- Microbiological limits: "<10^4 CFU/g", "<10^2" -- Chemical limits: "<0.1ppm", "<0.10ppm" -- Time measurements: "2-3 minutes", "30±5 seconds" - -## Text Input - USE FOR: -- Alphanumeric data entry -- Batch numbers, lot codes -- Production dates, expiry dates -- Supplier codes, product codes -- Equipment serial numbers - -## Remarks - USE FOR: -- Detailed observations requiring explanation -- Corrective actions taken -- Special conditions noted -- Inspector additional comments -- Non-conformance descriptions - -# REGULATORY COMPLIANCE: -- Include specific clause references for each parameter when available -- Reference Dubai Municipality guidelines, HACCP principles, ISO standards -- Ensure traceability requirements are met -- Include metal detection and allergen management as per UAE regulations - -# OUTPUT FORMAT: -Provide comprehensive, actionable parameters with: -- Minimum 15+ parameters covering all categories above -- Appropriate types based on intelligent selection rules -- Realistic specifications with proper units and tolerances -- Comprehensive options for dropdowns/checklists -- Clause references where applicable (e.g., "Dubai Municipality Section 4.2.1") -- Professional formatting matching Al Kabeer Group standards - -Remember: Generate PROFESSIONAL, COMPREHENSIVE checklists that match Al Kabeer Group's quality standards with full regulatory compliance and intelligent parameter type selection. -""" - -# Enhanced default refine prompt -ENHANCED_DEFAULT_REFINE_PROMPT = """ -Create a comprehensive professional food quality control checklist for the specified product following Al Kabeer Group standards. Include a MINIMUM of 15+ parameters that cover: - -1. PHYSICAL ATTRIBUTES: Appearance (with photo), texture, dimensions, weight with precise tolerance limits -2. SENSORY EVALUATION: Flavor, aroma, taste, mouthfeel characteristics with detailed assessment -3. SAFETY PARAMETERS: Comprehensive foreign objects checklist, microbiological specifications, chemical contaminants, allergen verification -4. PRODUCT-SPECIFIC CHECKS: Based on processing method (frozen, fried, baked, filled, etc.) with specialized parameters -5. PACKAGING INTEGRITY: Visual inspection with photos, seal quality, labeling accuracy, weight verification -6. PROCESS CONTROL: Temperature monitoring, time parameters, equipment calibration status -7. COMPLIANCE VERIFICATION: HACCP principles, Dubai Municipality requirements, ISO standards, traceability -8. DOCUMENTATION: Batch codes, production dates, certificates, inspector assessment - -Use intelligent parameter type selection: -- Image Upload for visual inspections and evidence documentation -- Toggle for pass/fail and binary assessments -- Checklist for foreign objects, allergens, and multi-item verifications -- Numeric Input for all measurements with proper specifications and units -- Text Input for codes, dates, and identifiers -- Remarks for detailed observations and corrective actions - -Include specific regulatory clause references where applicable and ensure professional formatting that matches Al Kabeer Group's quality standards. -""" - -# Enhanced digitization system prompt -ENHANCED_DIGITIZE_SYSTEM_PROMPT = """ -You are the Swift Check AI digitization assistant. Your job is to analyze OCR-extracted text from scanned QC checklists and convert them into structured parameters for comprehensive food safety and quality control checklists. - -# YOUR TASKS: -1. Recognize and preserve table structures and section headings -2. Identify quality control parameters with their proper input types -3. Extract specifications, tolerance limits, and measurement units -4. Determine appropriate parameter types based on content analysis -5. Maintain professional formatting and organization - -# INTELLIGENT PARAMETER TYPE DETECTION: - -## Image Upload - DETECT FOR: -- Parameters mentioning "photo", "attach", "capture", "visual", "appearance" -- Instructions like "attach photos", "capture variations" -- Visual inspection requirements - -## Toggle - DETECT FOR: -- Binary choices: "Acceptable/Non-acceptable", "Present/Absent", "Pass/Fail" -- "Yes/No" type assessments -- Simple pass/fail criteria - -## Checklist - DETECT FOR: -- Lists of items to verify (foreign objects, allergens, defects) -- Multiple related items that can be selected simultaneously -- Categories with sub-items - -## Numeric Input - DETECT FOR: -- Measurements with units and tolerances -- Temperature readings, weights, dimensions -- Time durations, counts, percentages -- Values with specifications like "±5g", "<10^4", "2-3 minutes" - -## Text Input - DETECT FOR: -- Codes, dates, identifiers -- Batch numbers, lot codes -- Names, locations, serial numbers - -## Remarks - DETECT FOR: -- "Remarks", "Comments", "Observations", "Notes" -- Areas requiring detailed explanations -- Corrective action descriptions - -# TABLE STRUCTURE RECOGNITION: -- Preserve section headings like "ORGANOLEPTIC EVALUATION", "COOKING DETAILS", "PACKAGING & FREEZING" -- Maintain parameter groupings and logical flow -- Keep tolerance limits and specifications with their parameters -- Preserve professional formatting structure - -# OUTPUT FORMAT: -Provide a comprehensive JSON array with intelligent parameter type selection: -[ - { - "Parameter": "Actual Parameter Name from Document", - "Type": "Intelligently Selected Type", - "Spec": "Extracted specifications with units", - "DropdownOptions": "Specific options from document", - "ChecklistOptions": "Comprehensive list items", - "IncludeRemarks": "Yes/No based on parameter complexity", - "Section": "Document section/category", - "ClauseReference": "Regulatory reference if identified" - } -] - -Focus on creating comprehensive, professional parameters that maintain the structure and intelligence of the original document while using appropriate modern input types. -""" - -def init_db(): - """Initialize database tables - runs once when app starts""" - con = sql.connect("swift_check.db") - cur = con.cursor() - cur.execute(""" - CREATE TABLE IF NOT EXISTS qc_requests ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - doc_type TEXT NOT NULL, - product_name TEXT NOT NULL, - supplier_name TEXT NOT NULL, - created_at DATETIME DEFAULT CURRENT_TIMESTAMP, - user_message TEXT - )""") - - cur.execute(""" - CREATE TABLE IF NOT EXISTS llm_responses ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - request_id INTEGER, - llm_response TEXT, - summary_text TEXT, - created_at DATETIME DEFAULT CURRENT_TIMESTAMP, - FOREIGN KEY (request_id) REFERENCES qc_requests(id) - )""") - - cur.execute(""" - CREATE TABLE IF NOT EXISTS parameters ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - request_id INTEGER, - parameter_name TEXT, - type TEXT, - spec TEXT, - dropdown_options TEXT, - include_remarks TEXT, - section TEXT, - clause_reference TEXT, - created_at DATETIME DEFAULT CURRENT_TIMESTAMP, - FOREIGN KEY (request_id) REFERENCES qc_requests(id) - )""") - - cur.execute(""" - CREATE TABLE IF NOT EXISTS json_templates ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - request_id INTEGER, - template_json TEXT, - created_at DATETIME DEFAULT CURRENT_TIMESTAMP, - FOREIGN KEY (request_id) REFERENCES qc_requests(id) - )""") - - con.commit() - con.close() - -init_db() - -def extract_top_level_json_array(text): - """Extract the first top-level JSON array from text""" - start = text.find('[') - if start == -1: - return "" - balance = 0 - end = start - for i in range(start, len(text)): - char = text[i] - if char == '[': - balance += 1 - elif char == ']': - balance -= 1 - if balance == 0: - end = i - break - return text[start:end+1] - -def enhanced_call_groq_llm(user_message, doc_type, product_name, supplier_name, existing_parameters=None, is_digitization=False): - """ - Enhanced Groq LLM call with comprehensive RAG support. - Retrieves context from all 3 VDBs before calling the LLM. - """ - if not Groq: - return "Groq LLM call failed: 'groq' library not found or not installed." - - GROQ_API_KEY = "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" - domain = "Food Manufacturing" - - # Get comprehensive context from all VDBs - print(f"🔍 Retrieving comprehensive context for: {product_name}") - comprehensive_context = get_comprehensive_context(product_name, domain) - - # Format context for prompt - formatted_context = format_context_for_prompt(comprehensive_context, max_length=4500) - - # Generate header and supplier info - header_text = f"{product_name} {doc_type}" - supplier_info = f"Supplier Name: {supplier_name}" - - # Check if user message contains reference document content - has_reference = "Reference document content" in user_message - - # Select appropriate system prompt - if is_digitization: - system_instructions = ENHANCED_DIGITIZE_SYSTEM_PROMPT - else: - system_instructions = ENHANCED_SYSTEM_PROMPT - - # Enhanced context with regulatory compliance focus - enhanced_context = formatted_context - - if has_reference: - enhanced_context += f""" - -**CRITICAL DIGITIZATION GUIDANCE**: The reference document content is provided to understand the STRUCTURE and PROFESSIONAL FORMAT of QC parameters. - -Use the reference to identify: -1. Section headings and table structures (preserve them) -2. Parameter types and their appropriate input methods -3. Tolerance specifications and measurement units -4. Professional formatting and organization -5. Regulatory compliance requirements - -Create parameters with values, specifications, and input types SPECIFIC to {product_name} while maintaining the professional structure and comprehensive coverage of the reference document. - """ - else: - enhanced_context += f""" - -For {product_name}, ensure you include MINIMUM 15+ parameters covering these MANDATORY categories: - -1. **Physical Parameters** (4-5): Appearance (Image+Toggle), Texture (Dropdown+Remarks), Dimensions (Numeric), Weight (Numeric), Shape (Dropdown) -2. **Sensory Parameters** (3-4): Flavor (Dropdown+Remarks), Aroma (Dropdown+Remarks), Mouthfeel (Dropdown), Overall Sensory (Toggle) -3. **Safety Parameters** (4-5): Foreign Objects (Checklist+Image), Microbiological (Numeric), Chemical (Numeric), Allergens (Checklist), Metal Detection (Text+Toggle) -4. **Product-Specific** (2-3): Based on product type (frozen, fried, baked, etc.) -5. **Packaging** (3-4): Integrity (Image+Checklist), Weight Verification (Numeric), Date Verification (Text), Batch Traceability (Text) -6. **Process Control** (2-3): Temperature (Numeric), Time (Numeric), Equipment (Toggle+Text) -7. **Compliance** (2-3): Regulatory (Checklist), Documentation (Toggle), Inspector Assessment (Toggle+Remarks) - -**INTELLIGENT TYPE SELECTION RULES:** -- Image Upload: Visual inspections, appearance, defects, evidence documentation -- Toggle: Pass/fail, acceptable/not acceptable, present/absent, binary assessments -- Checklist: Foreign objects (stones, glass, metals, plastic, wood, insects, hair, threads), allergens (all 14), packaging defects, compliance items -- Numeric Input: ALL measurements with specifications and units (e.g., "Weight: 25±2g", "Temperature: -18°C ±2°C") -- Text Input: Codes, dates, identifiers, batch numbers -- Remarks: Detailed observations, corrective actions, complex assessments - -**REGULATORY COMPLIANCE:** -Based on retrieved context, ensure compliance with: {', '.join([req['regulatory_body'] + ' ' + req.get('clause_reference', req.get('standard_code', '')) for req in comprehensive_context.get('regulatory_requirements', [])[:3]])} - -**PROFESSIONAL FORMATTING:** -Match Al Kabeer Group's quality standards with proper section organization, comprehensive coverage, and intelligent parameter type selection. - """ - - # Construct the final system prompt - final_system_prompt = f""" -{system_instructions} - -User context: -- Doc Type: {doc_type} -- Product: {product_name} -- Supplier: {supplier_name} -- Generated Header: {header_text} -- Supplier Info: {supplier_info} - -{enhanced_context} - -**VALID PARAMETER TYPES:** -Checklist, Dropdown, Image Upload, Remarks, Text Input, Numeric Input, Toggle - -**MANDATORY REQUIREMENTS:** -1. MINIMUM 15+ parameters for comprehensive coverage -2. Use intelligent type selection based on parameter purpose -3. Include specifications with units for ALL Numeric Input parameters -4. Provide comprehensive options for Checklist and Dropdown parameters -5. Add clause references where regulatory compliance is required -6. Include section organization for professional formatting -7. Add "IncludeRemarks": "Yes" for complex parameters requiring detailed observations - -**OUTPUT INSTRUCTIONS:** -1. Provide a brief summary describing the comprehensive QC parameters created. -2. Then produce a bracketed JSON array with intelligent parameter selection. - Example: - [ - {{ - "action": "add", - "Parameter": "Product Appearance", - "Type": "Image Upload", - "Spec": "Visual inspection with photo evidence", - "DropdownOptions": "", - "ChecklistOptions": "", - "IncludeRemarks": "Yes", - "Section": "Physical Parameters", - "ClauseReference": "Dubai Municipality Section 4.1.2" - }}, - {{ - "action": "add", - "Parameter": "Foreign Objects Detection", - "Type": "Checklist", - "Spec": "Zero tolerance for all foreign materials", - "ChecklistOptions": "Stones, Glass, Metals, Plastic, Wood, Insects/Pests, Hair, Threads, Paper, Bones, Feathers", - "IncludeRemarks": "Yes", - "Section": "Safety Parameters", - "ClauseReference": "HACCP Principle 2" - }}, - {{ - "action": "add", - "Parameter": "Net Weight", - "Type": "Numeric Input", - "Spec": "25±2g per piece", - "DropdownOptions": "", - "IncludeRemarks": "No", - "Section": "Physical Parameters" - }} - ] -""" - - messages = [ - {"role": "system", "content": final_system_prompt}, - {"role": "user", "content": user_message}, - ] - - client = Groq(api_key=GROQ_API_KEY) - - try: - response = client.chat.completions.create( - messages=messages, - model="llama-3.3-70b-versatile", - stream=False, - temperature=0.2 # Lower temperature for more consistent professional output - ) - return response.choices[0].message.content.strip() - except Exception as e: - return f"Groq LLM call failed: {str(e)}" - -def parse_llm_changes(llm_text): - """Parse LLM response into summary and changes""" - json_array_text = extract_top_level_json_array(llm_text) - changes = [] - if json_array_text: - try: - changes = json.loads(json_array_text) - except Exception as e: - print("JSON parse error:", e) - summary_text = llm_text.replace(json_array_text, "").strip() if json_array_text else llm_text.strip() - return summary_text, changes - -def apply_changes_to_params(parameters, changes): - """Apply changes to parameters with enhanced parameter handling""" - valid_types = ["Checklist", "Dropdown", "Image Upload", "Remarks", "Text Input", "Numeric Input", "Toggle"] - - for change in changes: - if not isinstance(change, dict): - print(f"Skipping non-dict change: {change}") - continue - - action = change.get("action", "").lower() - p_name = change.get("Parameter", "Unnamed") - options = change.get("DropdownOptions", "") - checklist_options = change.get("ChecklistOptions", "") - - # Handle both DropdownOptions and ChecklistOptions - if not options and checklist_options: - options = checklist_options - if isinstance(options, list): - options = ", ".join(options) - - if action == "add": - new_type = change.get("Type", "Text Input") - if new_type not in valid_types: - new_type = "Text Input" - - new_param = { - "Parameter": p_name, - "Type": new_type, - "Spec": change.get("Spec", ""), - "DropdownOptions": options, - "IncludeRemarks": change.get("IncludeRemarks", "No"), - "Section": change.get("Section", "General"), - "ClauseReference": change.get("ClauseReference", "") - } - parameters.append(new_param) - - elif action == "remove": - parameters[:] = [p for p in parameters if p["Parameter"].lower() != p_name.lower()] - - elif action == "update": - for p in parameters: - if p["Parameter"].lower() == p_name.lower(): - new_type = change.get("Type", "Text Input") - if new_type not in valid_types: - new_type = "Text Input" - p["Type"] = new_type - p["Spec"] = change.get("Spec", "") - p["DropdownOptions"] = options - p["IncludeRemarks"] = change.get("IncludeRemarks", "No") - p["Section"] = change.get("Section", "General") - p["ClauseReference"] = change.get("ClauseReference", "") - break - - return parameters - -def generate_enhanced_json_template(doc_type, product_name, supplier_name, parameters): - """ - Enhanced JSON template generation with intelligent parameter type handling. - """ - header_text = f"{product_name} {doc_type}" - template = { - "templateId": "neY5j", - "isDrafted": False, - "pageStyle": { - "margin": { - "top": 10, - "bottom": 10, - "left": 10, - "right": 10 - }, - "showPageNumber": False, - "headerImgUrl": "", - "fotterImgUrl": "" - }, - "pageToolsDataList": [], - "workflowInfo": { - "currentState": "Draft", - "approvalStates": ["Draft", "Under Review", "Approved", "Rejected"], - "currentApprover": { - "userId": "user123", - "name": "Ashish Kumar", - "role": "QC Manager" - }, - "previousApprovers": [ - { - "userId": "user456", - "name": "Raj Singh", - "role": "QC Supervisor", - "approvalDate": "2025-05-01T10:30:00Z", - "status": "Approved", - "comments": "Looks good to me." - } - ], - "nextApprovers": [ - { - "userId": "user789", - "name": "Priya Patel", - "role": "CEO" - } - ] - } - } - - def generate_tool_id(): - return ''.join(random.choices(string.ascii_lowercase + string.digits, k=5)) - - # Add main header - title_text = header_text - heading_tool = { - "toolId": generate_tool_id(), - "toolType": "HEADING", - "textData": { - "text": title_text, - "isBold": True, - "isItalic": False, - "isUnderlined": False, - "textAliend": "LEFT", - "color": 4294967295, # White - "fontSize": 14 - }, - "boxData": { - "fillColor": 4288111521, # Blue background - "borderEnable": False, - "borderColor": 4294967295, - "borderWidth": 0.8, - "boxAlignment": "CENTER_LEFT", - "cornerRadius": { - "topLeft": 0, - "topRight": 0, - "bottomLeft": 0, - "bottomRight": 0 - }, - "padding": { - "top": 4, - "bottom": 4, - "left": 9, - "right": 4 - }, - "margin": { - "top": 0, - "bottom": 0, - "left": 0, - "right": 0 - } - }, - "toolWidth": 1.7976931348623157e+308 - } - template["pageToolsDataList"].append(heading_tool) - - # Add supplier information - supplier_text = { - "toolId": generate_tool_id(), - "toolType": "TEXT", - "textData": { - "text": f"Supplier Name: {supplier_name}", - "isBold": True, - "isItalic": False, - "isUnderlined": False, - "textAliend": "LEFT", - "color": 4278190080, # Black - "fontSize": 12 - }, - "toolHeight": 30, - "toolWidth": 1.7976931348623157e+308 - } - template["pageToolsDataList"].append(supplier_text) - - # Group parameters by section for better organization - sections = {} - for param in parameters: - section = param.get("Section", "General Parameters") - if section not in sections: - sections[section] = [] - sections[section].append(param) - - # Add parameters organized by sections - for section_name, section_params in sections.items(): - # Add section header - if section_name != "General Parameters": - section_header = { - "toolId": generate_tool_id(), - "toolType": "TEXT", - "textData": { - "text": section_name.upper(), - "isBold": True, - "isItalic": False, - "isUnderlined": True, - "textAliend": "LEFT", - "color": 4283215696, # Green - "fontSize": 13 - }, - "toolHeight": 35, - "toolWidth": 1.7976931348623157e+308 - } - template["pageToolsDataList"].append(section_header) - - # Add parameters in this section - for param in section_params: - param_name = param.get("Parameter", "") - param_type = param.get("Type", "Text Input") - spec = param.get("Spec", "") - options = param.get("DropdownOptions", "") - include_remarks = param.get("IncludeRemarks", "No") - clause_ref = param.get("ClauseReference", "") - - # Create display name with clause reference - display_name = param_name - if clause_ref: - display_name += f" ({clause_ref})" - - # Split options into a list if it's a string - option_list = [] - if isinstance(options, str) and options.strip(): - option_list = [opt.strip() for opt in options.split(",") if opt.strip()] - - # ENHANCED PARAMETER TYPE HANDLING - if param_type == "Image Upload": - # Create image upload tool with toggle - image_tool = { - "toolId": generate_tool_id(), - "toolType": "IMAGE", - "imageLableData": { - "text": display_name + ":", - "isBold": True, - "isItalic": False, - "isUnderlined": False, - "textAliend": "LEFT", - "fontSize": 14, - "lablePositioned": "LEFT", - "spacing": 10, - "txtColor": 4278190080, # Black - "showLable": True - }, - "imageData": { - "showImageUploadArea": True, - "width": 200, - "height": 150 - }, - "iconData": 57344, - "showIcon": False, - "iconCodePoint": 59729, - "iconSize": 30, - "iconColor": 4278190080, # Black - "toolHeight": 160, - "toolWidth": 1.7976931348623157e+308, - "showToggle": True, - "imageToggleData": { - "label": "Assessment", - "isBold": True, - "isItalic": False, - "isUnderlined": False, - "fontSize": 14, - "showLabel": True, - "enabledText": "Acceptable", - "disabledText": "Not Acceptable", - "enabledColor": 4283215696, # Green - "disabledColor": 4294198070, # Red - "isSelected": True - } - } - template["pageToolsDataList"].append(image_tool) - - elif param_type == "Toggle": - # Create toggle tool - toggle_tool = { - "toolId": generate_tool_id(), - "toolType": "TOGGLE", - "toggleData": { - "disabledColor": 4294198070, # Red - "disabledText": "Not Acceptable" if not option_list else option_list[1] if len(option_list) > 1 else "No", - "enabledColor": 4283215696, # Green - "enabledText": "Acceptable" if not option_list else option_list[0] if option_list else "Yes", - "showLabel": True, - "label": display_name, - "labelFontSize": 14, - "labelTextColor": 4278190080, # Black - "isBold": True, - "isItalic": False, - "isSelected": True, - "toggleTextFontSize": 12, - "toggleTextIsBold": False - }, - "toolWidth": 1.7976931348623157e+308, - "toolHeight": 80 - } - template["pageToolsDataList"].append(toggle_tool) - - elif param_type == "Dropdown": - # Create dropdown tool - dropdown_tool = { - "toolId": generate_tool_id(), - "toolType": "DROPDOWN", - "dropdownData": { - "hintText": f"Select {param_name.lower()}", - "hintTextColor": 4288585374, # Gray - "hintFontSize": 14, - "dropdownWidth": 350, - "spacingBetweeenLableAndDropdownWidth": 10, - "showLable": True, - "labelText": display_name, - "isBold": True, - "isItalic": False, - "isUnderlined": False, - "textAliend": "LEFT", - "lablePositioned": "TOP", - "labelFontSize": 14, - "lableTextColor": 4278190080, # Black - "numberOfOptions": len(option_list) if option_list else 3, - "optionFontSize": 14, - "optionTextColor": 4278190080, # Black - "optionLst": option_list if option_list else ["Acceptable", "Marginal", "Not Acceptable"], - "selectedOptionIndex": -1 - }, - "toolHeight": 90, - "toolWidth": 1.7976931348623157e+308 - } - template["pageToolsDataList"].append(dropdown_tool) - - elif param_type == "Checklist": - # Create checkbox tool for checklists - if not option_list: - option_list = ["Item 1", "Item 2", "Item 3"] - - checkbox_tool = { - "toolId": generate_tool_id(), - "toolType": "CHECKBOX", - "checkboxData": { - "numberOfCheckboxes": len(option_list), - "checkboxBgColor": 4294967295, # White - "spacing": 8, - "runSpacing": 8, - "checkboxTileWidth": 140, - "checkBoxAlignmentEnum": "HORIZONTAL", - "checkBoxButtonStyleEnum": "CHECKBOX", - "checkBoxPositionedEnum": "START", - "checkBoxSelectionModeEnum": "MULTIPLE", - "isBold": False, - "isItalic": False, - "isUnderlined": False, - "textAliend": "LEFT", - "fontSize": 13, - "lablePositioned": "LEFT", - "txtColor": 4278190080, # Black - "labelLst": option_list, - "showLable": True, - "selectedIndexLstForMultiSelect": [], - "selectedIndexForSingleSelect": 0 - }, - "toolWidth": 1.7976931348623157e+308, - "toolHeight": max(100, len(option_list) * 15 + 40) # Dynamic height based on items - } - - # Add section label for checklist - checklist_label = { - "toolId": generate_tool_id(), - "toolType": "TEXT", - "textData": { - "text": display_name + ":", - "isBold": True, - "isItalic": False, - "isUnderlined": False, - "textAliend": "LEFT", - "color": 4278190080, # Black - "fontSize": 14 - }, - "toolHeight": 25, - "toolWidth": 1.7976931348623157e+308 - } - template["pageToolsDataList"].append(checklist_label) - template["pageToolsDataList"].append(checkbox_tool) - - elif param_type == "Numeric Input": - # Create numeric input with specification - label_text = display_name - if spec: - label_text += f" (Spec: {spec})" - - numeric_tool = { - "toolId": generate_tool_id(), - "toolType": "TEXTAREA", - "lableData": { - "text": label_text + ":", - "isBold": True, - "isItalic": False, - "isUnderlined": False, - "textAliend": "LEFT", - "fontSize": 14, - "lablePositioned": "TOP_LEFT", - "spacing": 5, - "txtColor": 4278190080, # Black - "showLable": True - }, - "textAreaData": { - "isFilled": True, - "fillColor": 4292927712, # Light gray - "borderType": "UNDERLINED", - "storkStyle": "LINE", - "dummyTxt": "Enter numeric value" + (f" ({spec})" if spec else ""), - "borderColor": 4278190080, # Black - "isBold": False, - "isItalic": False, - "isUnderlined": False, - "fontSize": 12, - "txtColor": 4288585374 # Gray - }, - "toolHeight": 75, - "toolWidth": 1.7976931348623157e+308, - "toggleData": { - "label": "Status", - "isBold": True, - "isItalic": False, - "isUnderlined": False, - "fontSize": 12, - "showLabel": True, - "enabledText": "Within Spec", - "disabledText": "Out of Spec", - "enabledColor": 4283215696, # Green - "disabledColor": 4294198070, # Red - "isSelected": True - }, - "showToggle": True # Show toggle for spec compliance - } - template["pageToolsDataList"].append(numeric_tool) - - elif param_type == "Text Input": - # Create text input - text_tool = { - "toolId": generate_tool_id(), - "toolType": "TEXTAREA", - "lableData": { - "text": display_name + ":", - "isBold": True, - "isItalic": False, - "isUnderlined": False, - "textAliend": "LEFT", - "fontSize": 14, - "lablePositioned": "TOP_LEFT", - "spacing": 5, - "txtColor": 4278190080, # Black - "showLable": True - }, - "textAreaData": { - "isFilled": True, - "fillColor": 4292927712, # Light gray - "borderType": "UNDERLINED", - "storkStyle": "LINE", - "dummyTxt": "Enter " + param_name.lower(), - "borderColor": 4278190080, # Black - "isBold": False, - "isItalic": False, - "isUnderlined": False, - "fontSize": 12, - "txtColor": 4288585374 # Gray - }, - "toolHeight": 65, - "toolWidth": 1.7976931348623157e+308, - "showToggle": False - } - template["pageToolsDataList"].append(text_tool) - - elif param_type == "Remarks": - # Create remarks/textarea - remarks_tool = { - "toolId": generate_tool_id(), - "toolType": "TEXTAREA", - "lableData": { - "text": display_name + ":", - "isBold": True, - "isItalic": False, - "isUnderlined": False, - "textAliend": "LEFT", - "fontSize": 14, - "lablePositioned": "TOP_LEFT", - "spacing": 5, - "txtColor": 4278190080, # Black - "showLable": True - }, - "textAreaData": { - "isFilled": True, - "fillColor": 4292927712, # Light gray - "borderType": "UNDERLINED", - "storkStyle": "LINE", - "dummyTxt": "Enter detailed observations and remarks", - "borderColor": 4278190080, # Black - "isBold": False, - "isItalic": False, - "isUnderlined": False, - "fontSize": 12, - "txtColor": 4288585374 # Gray - }, - "toolHeight": 100, # Larger height for remarks - "toolWidth": 1.7976931348623157e+308, - "showToggle": False - } - template["pageToolsDataList"].append(remarks_tool) - - # Add additional remarks field if requested and not already a remarks parameter - if include_remarks == "Yes" and param_type != "Remarks": - additional_remarks = { - "toolId": generate_tool_id(), - "toolType": "TEXTAREA", - "lableData": { - "text": f"{param_name} - Additional Remarks:", - "isBold": False, - "isItalic": True, - "isUnderlined": False, - "textAliend": "LEFT", - "fontSize": 12, - "lablePositioned": "TOP_LEFT", - "spacing": 5, - "txtColor": 4278190080, # Black - "showLable": True - }, - "textAreaData": { - "isFilled": True, - "fillColor": 4292927712, # Light gray - "borderType": "UNDERLINED", - "storkStyle": "LINE", - "dummyTxt": "Additional observations or corrective actions", - "borderColor": 4278190080, # Black - "isBold": False, - "isItalic": False, - "isUnderlined": False, - "fontSize": 11, - "txtColor": 4288585374 # Gray - }, - "toolHeight": 60, - "toolWidth": 1.7976931348623157e+308, - "showToggle": False - } - template["pageToolsDataList"].append(additional_remarks) - - # Add final overall assessment section - final_assessment_header = { - "toolId": generate_tool_id(), - "toolType": "TEXT", - "textData": { - "text": "FINAL ASSESSMENT", - "isBold": True, - "isItalic": False, - "isUnderlined": True, - "textAliend": "CENTER", - "color": 4283215696, # Green - "fontSize": 14 - }, - "toolHeight": 35, - "toolWidth": 1.7976931348623157e+308 - } - template["pageToolsDataList"].append(final_assessment_header) - - # Overall quality assessment toggle - overall_toggle = { - "toolId": generate_tool_id(), - "toolType": "TOGGLE", - "toggleData": { - "disabledColor": 4294198070, # Red - "disabledText": "REJECTED", - "enabledColor": 4283215696, # Green - "enabledText": "APPROVED", - "showLabel": True, - "label": "Overall Quality Assessment", - "labelFontSize": 15, - "labelTextColor": 4278190080, # Black - "isBold": True, - "isItalic": False, - "isSelected": True, - "toggleTextFontSize": 14, - "toggleTextIsBold": True - }, - "toolWidth": 1.7976931348623157e+308, - "toolHeight": 100 - } - template["pageToolsDataList"].append(overall_toggle) - - # Inspector signature and date - inspector_info = { - "toolId": generate_tool_id(), - "toolType": "TEXTAREA", - "lableData": { - "text": "Inspector Name & Signature:", - "isBold": True, - "isItalic": False, - "isUnderlined": False, - "textAliend": "LEFT", - "fontSize": 14, - "lablePositioned": "TOP_LEFT", - "spacing": 5, - "txtColor": 4278190080, # Black - "showLable": True - }, - "textAreaData": { - "isFilled": True, - "fillColor": 4292927712, # Light gray - "borderType": "UNDERLINED", - "storkStyle": "LINE", - "dummyTxt": "Inspector name and signature", - "borderColor": 4278190080, # Black - "isBold": False, - "isItalic": False, - "isUnderlined": False, - "fontSize": 12, - "txtColor": 4288585374 # Gray - }, - "toolHeight": 80, - "toolWidth": 1.7976931348623157e+308, - "showToggle": False - } - template["pageToolsDataList"].append(inspector_info) - - # Final comprehensive remarks - final_remarks = { - "toolId": generate_tool_id(), - "toolType": "TEXTAREA", - "lableData": { - "text": "Final Comprehensive Remarks:", - "isBold": True, - "isItalic": False, - "isUnderlined": False, - "textAliend": "LEFT", - "fontSize": 14, - "lablePositioned": "TOP_LEFT", - "spacing": 5, - "txtColor": 4278190080, # Black - "showLable": True - }, - "textAreaData": { - "isFilled": True, - "fillColor": 4292927712, # Light gray - "borderType": "UNDERLINED", - "storkStyle": "LINE", - "dummyTxt": "Overall assessment, corrective actions, and additional observations", - "borderColor": 4278190080, # Black - "isBold": False, - "isItalic": False, - "isUnderlined": False, - "fontSize": 12, - "txtColor": 4288585374 # Gray - }, - "toolHeight": 120, - "toolWidth": 1.7976931348623157e+308, - "showToggle": False - } - template["pageToolsDataList"].append(final_remarks) - - return template - -# Enhanced OCR and text extraction functions -def enhanced_extract_text_from_document(filepath, file_ext): - """Enhanced text extraction with better table structure recognition""" - try: - extracted_text = "" - - if file_ext == 'pdf': - # Use PyMuPDF with enhanced table detection - pdf_document = fitz.open(filepath) - - for page_num in range(pdf_document.page_count): - page = pdf_document[page_num] - - # Try to extract text directly first - text = page.get_text() - - # If minimal text found, use OCR - if len(text.strip()) < 100: - # Convert page to high-quality image for OCR - mat = fitz.Matrix(3, 3) # Higher zoom for better OCR - pix = page.get_pixmap(matrix=mat) - img_data = pix.pil_tobytes(format="PNG") - - # Enhanced OCR with better table handling - from io import BytesIO - image = Image.open(BytesIO(img_data)) - - # Use OCR configuration optimized for tables - custom_config = r'--oem 3 --psm 6 -c preserve_interword_spaces=1' - text = pytesseract.image_to_string(image, config=custom_config) - - # Enhanced text processing to preserve table structure - processed_text = enhance_table_structure(text) - extracted_text += f"\n=== PAGE {page_num + 1} ===\n{processed_text}\n" - - pdf_document.close() - - else: # Image files - image = Image.open(filepath) - # Enhanced OCR for images with table preservation - custom_config = r'--oem 3 --psm 6 -c preserve_interword_spaces=1' - text = pytesseract.image_to_string(image, config=custom_config) - extracted_text = enhance_table_structure(text) - - return extracted_text.strip() - - except Exception as e: - print(f"Error during enhanced document processing: {str(e)}") - return None - -def enhance_table_structure(text): - """Enhance text to better preserve table structures and headings""" - if not text: - return text - - # Preserve important section headings - section_patterns = [ - (r'(ORGANOLEPTIC\s+EVALUATION)', r'\n## \1\n'), - (r'(COOKING\s+DETAILS)', r'\n## \1\n'), - (r'(PACKAGING\s*&\s*FREEZING)', r'\n## \1\n'), - (r'(FREEZING\s+DETAILS)', r'\n## \1\n'), - (r'(METAL\s+SCREENING)', r'\n## \1\n'), - (r'(SIZE\s+VARIATIONS)', r'\n## \1\n'), - (r'(COLOUR\s+VARIATIONS)', r'\n## \1\n'), - (r'(EVALUATION\s+OF\s+PASTRY)', r'\n## \1\n'), - (r'(FINAL\s+ASSESSMENT)', r'\n## \1\n'), - ] - - processed_text = text - for pattern, replacement in section_patterns: - processed_text = re.sub(pattern, replacement, processed_text, flags=re.IGNORECASE) - - # Preserve parameter-value pairs - param_patterns = [ - (r'([A-Za-z\s]+):\s*(Acceptable|Non-acceptable|Present|Absent|To be mentioned)', r'**\1**: \2'), - (r'([A-Za-z\s]+)\s+(Sam\s+\d+)', r'**\1** - \2'), - (r'(Temperature|Weight|Time|Dimension[s]?)[:\s]+([0-9\-\+\±°C\s\w]+)', r'**\1**: \2'), - ] - - for pattern, replacement in param_patterns: - processed_text = re.sub(pattern, replacement, processed_text, flags=re.IGNORECASE) - - # Clean up excessive whitespace while preserving structure - processed_text = re.sub(r'\n\s*\n\s*\n', '\n\n', processed_text) - processed_text = re.sub(r'[ \t]+', ' ', processed_text) - - return processed_text - -def enhanced_extract_metadata_from_ocr(ocr_text): - """Enhanced metadata extraction with better pattern recognition""" - # Enhanced document type detection - doc_type_patterns = { - r'(?i)(MALABAR\s*PARATHA.*INSPECTION)': "Malabar Paratha Inspection Record", - r'(?i)(GREEN\s*PEAS.*INSPECTION)': "Green Peas Inspection Record", - r'(?i)(VEGETABLE\s*SAMOSA.*INSPECTION)': "Vegetable Samosa Inspection Record", - r'(?i)(CONTAINER.*INSPECTION.*REPORT)': "Container Inspection Report", - r'(?i)quality\s*(?:control)?\s*checklist': "Quality Control Checklist", - r'(?i)inspection\s*(?:record|checklist)': "Inspection Checklist", - r'(?i)pre[\-\s]shipment.*inspection': "Pre-Shipment Inspection", - } - - detected_doc_type = "Quality Control Checklist" # Default - for pattern, doc_type in doc_type_patterns.items(): - if re.search(pattern, ocr_text): - detected_doc_type = doc_type - break - - # Enhanced product name extraction - product_patterns = [ - r'(?i)product\s*(?:name|description)[:\-\s]*([^\n]{1,50})', - r'(?i)(MALABAR\s*PARATHA)', - r'(?i)(GREEN\s*PEAS)', - r'(?i)(VEGETABLE\s*SAMOSA[S]?)', - r'(?i)(SWEET\s*CORN)', - ] - - detected_product = "Food Product" # Default - for pattern in product_patterns: - match = re.search(pattern, ocr_text) - if match: - detected_product = match.group(1).strip() - break - - # Enhanced supplier name extraction - supplier_patterns = [ - r'(?i)supplier\s*(?:name)?[:\-\s]*([^\n]{1,40})', - r'(?i)manufacturing\s*unit[:\-\s]*([^\n]{1,40})', - r'(?i)(AL\s*KABEER)', - r'(?i)(CASCADE\s*MARINE)', - r'(?i)(SAHAR\s*FOOD)', - ] - - detected_supplier = "" # Default empty - for pattern in supplier_patterns: - match = re.search(pattern, ocr_text) - if match: - detected_supplier = match.group(1).strip() - break - - return detected_doc_type, detected_product, detected_supplier - -# File upload handling -ALLOWED_EXTENSIONS = {'pdf', 'png', 'jpg', 'jpeg'} - -def allowed_file(filename): - """Check if file extension is allowed""" - return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS - -def fetch_json_from_firebase(firebase_json_url): - """Fetch JSON template from Firebase Storage URL""" - try: - response = requests.get(firebase_json_url) - if response.status_code == 200: - return response.json() - else: - return None - except Exception as e: - print(f"Error fetching JSON from Firebase: {str(e)}") - return None - -# API Routes -@app.route("/") -def index(): - return """ - -
-15+ parameter minimum, smart type detection, comprehensive coverage
-Dubai Municipality, HACCP, ISO standards with clause references
-Al Kabeer Group standards, section organization, comprehensive structure
-Table structure recognition, header preservation, intelligent parameter extraction
-/refine - Create Enhanced QC TemplateCreates comprehensive quality control templates with 15+ parameters, regulatory compliance, and intelligent type selection.
- -doc_type (required) - Document typeproduct_name (required) - Product namesupplier_name (required) - Supplier nameuser_message (optional) - Additional instructionscontext_file (optional) - Reference document/edit - Edit with Enhanced ContextModifies existing templates using comprehensive context and intelligent parameter optimization.
- -/digitize - Advanced Document DigitizationEnhanced OCR processing with table structure recognition and intelligent parameter extraction.
- -checklist_file (required) - Scanned documentdoc_type (optional) - Document typeproduct_name (optional) - Product namesupplier_name (optional) - Supplier name/template/{request_id} - Get Enhanced Template JSONReturns professionally formatted JSON templates with intelligent parameter types.
-/history - View Request HistoryBrowse all QC requests with enhanced preview and download options.
-/test-rag - Test Enhanced RAG SystemTest the comprehensive RAG system with all 3 vector databases.
-| ID | -Product | -Doc Type | -Supplier | -Parameters | -Created | -Actions | -
|---|---|---|---|---|---|---|
| {row[0]} | -{row[2]} | -{row[1]} | -{row[3]} | -{param_badge} {row[5]} params | -{row[4]} | -- Preview - JSON - | -
{str(e)}
", 500 - -@app.route("/template/No template exists for request ID {request_id}
- View History - - - """, 404 - - json_template = json.loads(template_result[0]) - - # Generate enhanced ASCII preview with sections - ascii_preview = "╔══════════════════════════════════════════════════════════════════════╗\n" - - if request_details: - header = f"{request_details[1]} {request_details[0]}" - else: - header = "Enhanced QC Template" - - header_padding = (70 - len(header)) // 2 - ascii_preview += f"║{' ' * header_padding}{header}{' ' * (70 - header_padding - len(header))}║\n" - - if request_details and request_details[2]: - supplier = f"Supplier: {request_details[2]}" - supplier_padding = (70 - len(supplier)) // 2 - ascii_preview += f"║{' ' * supplier_padding}{supplier}{' ' * (70 - supplier_padding - len(supplier))}║\n" - - ascii_preview += "╚══════════════════════════════════════════════════════════════════════╝\n\n" - - # Group parameters by section - sections = {} - for param in parameters: - param_name, param_type, spec, options, include_remarks, section, clause_ref = param - section = section or "General Parameters" - if section not in sections: - sections[section] = [] - sections[section].append(param) - - # Add parameters organized by sections - for section_name, section_params in sections.items(): - ascii_preview += f"\n🔹 {section_name.upper()}\n" - ascii_preview += "─" * 60 + "\n" - - for param in section_params: - param_name, param_type, spec, options, include_remarks, section, clause_ref = param - - # Add clause reference if available - display_name = param_name - if clause_ref: - display_name += f" ({clause_ref})" - - if param_type == "Image Upload": - ascii_preview += f"[📷] {display_name}: [ Upload Photo ] + Toggle Assessment\n" - elif param_type == "Toggle": - ascii_preview += f"[◐] {display_name}: ● Acceptable ○ Not Acceptable\n" - elif param_type == "Dropdown": - ascii_preview += f"[▼] {display_name}: _________________ " - if options: - option_list = [opt.strip() for opt in options.split(",")[:3]] - ascii_preview += f"({', '.join(option_list)}{'...' if len(options.split(',')) > 3 else ''})\n" - else: - ascii_preview += "\n" - elif param_type == "Checklist": - ascii_preview += f" {display_name}:\n" - if options: - option_list = [opt.strip() for opt in options.split(",")] - for opt in option_list[:5]: - ascii_preview += f" ☐ {opt}\n" - if len(option_list) > 5: - ascii_preview += f" ... and {len(option_list) - 5} more items\n" - else: - ascii_preview += " ☐ Item 1\n" - elif param_type == "Numeric Input": - ascii_preview += f"[#️⃣] {display_name}: _____________" - if spec: - ascii_preview += f" (Spec: {spec})\n" - else: - ascii_preview += "\n" - elif param_type == "Text Input": - ascii_preview += f"[✏️] {display_name}: _____________________________\n" - elif param_type == "Remarks": - ascii_preview += f"[📝] {display_name}:\n" - ascii_preview += " ┌─────────────────────────────────────┐\n" - ascii_preview += " │ │\n" - ascii_preview += " │ │\n" - ascii_preview += " └─────────────────────────────────────┘\n" - - if include_remarks == "Yes" and param_type != "Remarks": - ascii_preview += f" └─ Additional Remarks: _______________________\n" - - ascii_preview += "\n" - - # Add enhanced final assessment - ascii_preview += "═" * 70 + "\n" - ascii_preview += "🎯 FINAL ASSESSMENT\n" - ascii_preview += "═" * 70 + "\n" - ascii_preview += "[✅] Overall Quality Assessment: ● APPROVED ○ REJECTED\n\n" - ascii_preview += "[👤] Inspector Name & Signature: _________________________________\n\n" - ascii_preview += "[📝] Final Comprehensive Remarks:\n" - ascii_preview += " ┌─────────────────────────────────────────────────────────────┐\n" - ascii_preview += " │ Overall assessment, corrective actions, and observations │\n" - ascii_preview += " │ │\n" - ascii_preview += " │ │\n" - ascii_preview += " └─��───────────────────────────────────────────────────────────┘\n" - - # Enhanced statistics - total_params = len(parameters) - param_types = {} - sections_count = len(sections) - regulatory_refs = sum(1 for param in parameters if param[6]) # clause references - - for param in parameters: - param_type = param[1] - param_types[param_type] = param_types.get(param_type, 0) + 1 - - stats_html = f""" -{str(e)}
", 500 - -@app.route("/test-rag", methods=["GET"]) -def test_enhanced_rag(): - """Test enhanced RAG functionality with comprehensive context""" - try: - test_product = request.args.get('product', 'Malabar Paratha') - test_domain = request.args.get('domain', 'Food Manufacturing') - - print(f"\n🧪 Testing Enhanced RAG for {test_product} in {test_domain}") - - # Get comprehensive context from all VDBs - comprehensive_context = get_comprehensive_context(test_product, test_domain) - - # Format context for display - formatted_context = format_context_for_prompt(comprehensive_context, max_length=6000) - - results = { - "test_parameters": { - "product": test_product, - "domain": test_domain - }, - "comprehensive_context": { - "regulatory_requirements": len(comprehensive_context.get("regulatory_requirements", [])), - "product_specifications": len(comprehensive_context.get("product_specifications", [])), - "checklist_examples": len(comprehensive_context.get("checklist_examples", [])), - "parameter_patterns": len(comprehensive_context.get("parameter_patterns", [])), - }, - "context_summary": comprehensive_context.get("context_summary", {}), - "rag_quality": { - "total_sources": ( - len(comprehensive_context.get("regulatory_requirements", [])) + - len(comprehensive_context.get("product_specifications", [])) + - len(comprehensive_context.get("checklist_examples", [])) - ), - "regulatory_compliance": len(comprehensive_context.get("regulatory_requirements", [])) > 0, - "product_depth_reference": len(comprehensive_context.get("product_specifications", [])) > 0, - "professional_examples": len(comprehensive_context.get("checklist_examples", [])) > 0, - "parameter_intelligence": len(comprehensive_context.get("parameter_patterns", [])) > 0 - } - } - - print(f"✅ Enhanced RAG Test Complete: {results['rag_quality']['total_sources']} sources retrieved") - - if request.headers.get('Accept') == 'application/json': - return jsonify(results) - else: - # Enhanced HTML view - html = f""" - - -{formatted_context[:2000]}{'...' if len(formatted_context) > 2000 else ''}
- Try these Enhanced RAG tests:
-{error_response['traceback']}
-
-
- """, 500
-
-if __name__ == "__main__":
- print("🚀 Starting Enhanced Swift Check API v2.0...")
- print("✅ Comprehensive RAG Integration")
- print("✅ 15+ Parameter Minimum")
- print("✅ Intelligent Type Selection")
- print("✅ Regulatory Compliance")
- print("✅ Enhanced OCR Processing")
- print("✅ Professional Formatting")
+import json
+import re
+import random
+import sqlite3 as sql
+from datetime import datetime
+import string
+import os
+import tempfile
+from PIL import Image
+import pytesseract
+import fitz
+from werkzeug.utils import secure_filename
+from flask import Flask, request, jsonify, redirect, url_for, render_template_string
+from pathlib import Path
+import requests
+
+# Import enhanced RAG utilities
+from rag_utils import get_comprehensive_context, format_context_for_prompt
+
+try:
+ from groq import Groq
+except ImportError:
+ Groq = None
+
+app = Flask(__name__)
+global_parameters = []
+global_json_template = {}
+
+# Enhanced system prompt with comprehensive QC requirements
+ENHANCED_SYSTEM_PROMPT = """
+You are the Swift Check AI assistant, specialized in creating comprehensive Quality Control (QC) checklists and inspection documents for food products with full regulatory compliance.
+
+# CONTEXT:
+You'll help users generate custom QC parameters for various food products following Al Kabeer Group's professional standards. The parameters will be used in quality inspection checklists that QC inspectors fill during product inspections, with full regulatory backing and clause references.
+
+# COMPREHENSIVE QC CHECKLIST REQUIREMENTS:
+
+## For Food Products, ALWAYS include these categories (MINIMUM 15+ PARAMETERS):
+
+### 1. Physical Parameters (4-5 parameters)
+- Appearance (Image Upload + Toggle): Color, visual defects, physical state with photo evidence
+- Texture (Dropdown + Remarks): Firmness, consistency, crispness with detailed observations
+- Size/Dimensions (Numeric Input): Length, width, diameter with tolerance specs (e.g., "60±5mm")
+- Weight (Numeric Input): Individual/batch weight with tolerance (e.g., "25±2g")
+- Shape (Dropdown): Uniformity, deformation assessment
+
+### 2. Sensory Parameters (3-4 parameters)
+- Flavor/Taste (Dropdown + Remarks): Characteristic flavors, off-tastes, intensity
+- Aroma/Odor (Dropdown + Remarks): Normal smell, off-odors, freshness
+- Mouthfeel (Dropdown): For applicable products (texture after cooking)
+- Overall Sensory Assessment (Toggle): Acceptable/Not Acceptable
+
+### 3. Safety Parameters (4-5 parameters)
+- Foreign Objects (Checklist + Image Upload): MUST include comprehensive list: stones, glass, metals, plastic, wood, insects/pests, hair, threads, paper, bones, feathers
+- Microbiological Specifications (Table/Numeric Input): Total Plate Count, E.coli, Salmonella, etc. with limits
+- Chemical Contaminants (Numeric Input): Heavy metals, pesticides if applicable with ppm limits
+- Allergen Declaration (Checklist): All 14 major allergens verification
+- Metal Detection Results (Text Input + Toggle): Fe, Non-Fe, SS readings with pass/fail
+
+### 4. Product-Specific Parameters (2-3 parameters)
+- For filled products: Filling weight ratio, filling consistency
+- For fried products: Oil absorption, crispness level
+- For frozen products: Freezer burn check, ice crystals, clustering
+- For baked products: Browning level, doneness, internal temperature
+
+### 5. Packaging Parameters (3-4 parameters)
+- Packaging Integrity (Image Upload + Checklist): Sealing, tears, punctures, label accuracy with photo
+- Net Weight Verification (Numeric Input): Package weight vs declared weight with tolerance
+- Date Verification (Text Input): Best before date, production date accuracy
+- Batch/Lot Traceability (Text Input): Batch code, lot number verification
+
+### 6. Process Control Parameters (2-3 parameters)
+- Temperature Control (Numeric Input): Processing, storage, transport temperatures with specs
+- Time Parameters (Numeric Input): Processing time, cooling time with specifications
+- Equipment Calibration (Toggle + Text Input): Calibration status, last calibration date
+
+### 7. Compliance & Documentation (2-3 parameters)
+- Regulatory Compliance (Checklist): HACCP, Dubai Municipality, ISO requirements
+- Documentation Complete (Toggle): All required certificates present
+- Inspector Assessment (Toggle + Remarks): Overall quality assessment with detailed remarks
+
+# PARAMETER TYPES AND INTELLIGENT SELECTION:
+
+## Image Upload - USE FOR:
+- Visual inspections (appearance, defects, packaging condition)
+- Evidence documentation (defects, foreign objects)
+- Label verification and batch code photos
+- Before/after cooking comparisons
+
+## Toggle - USE FOR:
+- Pass/fail decisions (acceptable/not acceptable)
+- Present/absent checks (clustering, defects)
+- Compliance status (passed/failed)
+- Binary quality assessments
+
+## Checklist - USE FOR:
+- Foreign objects (comprehensive list of all possible contaminants)
+- Allergens (all 14 major allergens)
+- Packaging defects (multiple possible issues)
+- Compliance requirements (multiple standards)
+- Multi-item verification lists
+
+## Numeric Input - USE FOR:
+- Measurements WITH specifications and units
+- Weight: "25±2g", "165±5g"
+- Dimensions: "60±5mm length", "7-8 inch diameter"
+- Temperature: "-18°C ±2°C", "180°C ±10°C"
+- Microbiological limits: "<10^4 CFU/g", "<10^2"
+- Chemical limits: "<0.1ppm", "<0.10ppm"
+- Time measurements: "2-3 minutes", "30±5 seconds"
+
+## Text Input - USE FOR:
+- Alphanumeric data entry
+- Batch numbers, lot codes
+- Production dates, expiry dates
+- Supplier codes, product codes
+- Equipment serial numbers
+
+## Remarks - USE FOR:
+- Detailed observations requiring explanation
+- Corrective actions taken
+- Special conditions noted
+- Inspector additional comments
+- Non-conformance descriptions
+
+# REGULATORY COMPLIANCE:
+- Include specific clause references for each parameter when available
+- Reference Dubai Municipality guidelines, HACCP principles, ISO standards
+- Ensure traceability requirements are met
+- Include metal detection and allergen management as per UAE regulations
+
+# OUTPUT FORMAT:
+Provide comprehensive, actionable parameters with:
+- Minimum 15+ parameters covering all categories above
+- Appropriate types based on intelligent selection rules
+- Realistic specifications with proper units and tolerances
+- Comprehensive options for dropdowns/checklists
+- Clause references where applicable (e.g., "Dubai Municipality Section 4.2.1")
+- Professional formatting matching Al Kabeer Group standards
+
+Remember: Generate PROFESSIONAL, COMPREHENSIVE checklists that match Al Kabeer Group's quality standards with full regulatory compliance and intelligent parameter type selection.
+"""
+
+# Enhanced default refine prompt
+ENHANCED_DEFAULT_REFINE_PROMPT = """
+Create a comprehensive professional food quality control checklist for the specified product following Al Kabeer Group standards. Include a MINIMUM of 15+ parameters that cover:
+
+1. PHYSICAL ATTRIBUTES: Appearance (with photo), texture, dimensions, weight with precise tolerance limits
+2. SENSORY EVALUATION: Flavor, aroma, taste, mouthfeel characteristics with detailed assessment
+3. SAFETY PARAMETERS: Comprehensive foreign objects checklist, microbiological specifications, chemical contaminants, allergen verification
+4. PRODUCT-SPECIFIC CHECKS: Based on processing method (frozen, fried, baked, filled, etc.) with specialized parameters
+5. PACKAGING INTEGRITY: Visual inspection with photos, seal quality, labeling accuracy, weight verification
+6. PROCESS CONTROL: Temperature monitoring, time parameters, equipment calibration status
+7. COMPLIANCE VERIFICATION: HACCP principles, Dubai Municipality requirements, ISO standards, traceability
+8. DOCUMENTATION: Batch codes, production dates, certificates, inspector assessment
+
+Use intelligent parameter type selection:
+- Image Upload for visual inspections and evidence documentation
+- Toggle for pass/fail and binary assessments
+- Checklist for foreign objects, allergens, and multi-item verifications
+- Numeric Input for all measurements with proper specifications and units
+- Text Input for codes, dates, and identifiers
+- Remarks for detailed observations and corrective actions
+
+Include specific regulatory clause references where applicable and ensure professional formatting that matches Al Kabeer Group's quality standards.
+"""
+
+# Enhanced digitization system prompt
+ENHANCED_DIGITIZE_SYSTEM_PROMPT = """
+You are the Swift Check AI digitization assistant. Your job is to analyze OCR-extracted text from scanned QC checklists and convert them into structured parameters for comprehensive food safety and quality control checklists.
+
+# YOUR TASKS:
+1. Recognize and preserve table structures and section headings
+2. Identify quality control parameters with their proper input types
+3. Extract specifications, tolerance limits, and measurement units
+4. Determine appropriate parameter types based on content analysis
+5. Maintain professional formatting and organization
+
+# INTELLIGENT PARAMETER TYPE DETECTION:
+
+## Image Upload - DETECT FOR:
+- Parameters mentioning "photo", "attach", "capture", "visual", "appearance"
+- Instructions like "attach photos", "capture variations"
+- Visual inspection requirements
+
+## Toggle - DETECT FOR:
+- Binary choices: "Acceptable/Non-acceptable", "Present/Absent", "Pass/Fail"
+- "Yes/No" type assessments
+- Simple pass/fail criteria
+
+## Checklist - DETECT FOR:
+- Lists of items to verify (foreign objects, allergens, defects)
+- Multiple related items that can be selected simultaneously
+- Categories with sub-items
+
+## Numeric Input - DETECT FOR:
+- Measurements with units and tolerances
+- Temperature readings, weights, dimensions
+- Time durations, counts, percentages
+- Values with specifications like "±5g", "<10^4", "2-3 minutes"
+
+## Text Input - DETECT FOR:
+- Codes, dates, identifiers
+- Batch numbers, lot codes
+- Names, locations, serial numbers
+
+## Remarks - DETECT FOR:
+- "Remarks", "Comments", "Observations", "Notes"
+- Areas requiring detailed explanations
+- Corrective action descriptions
+
+# TABLE STRUCTURE RECOGNITION:
+- Preserve section headings like "ORGANOLEPTIC EVALUATION", "COOKING DETAILS", "PACKAGING & FREEZING"
+- Maintain parameter groupings and logical flow
+- Keep tolerance limits and specifications with their parameters
+- Preserve professional formatting structure
+
+# OUTPUT FORMAT:
+Provide a comprehensive JSON array with intelligent parameter type selection:
+[
+ {
+ "Parameter": "Actual Parameter Name from Document",
+ "Type": "Intelligently Selected Type",
+ "Spec": "Extracted specifications with units",
+ "DropdownOptions": "Specific options from document",
+ "ChecklistOptions": "Comprehensive list items",
+ "IncludeRemarks": "Yes/No based on parameter complexity",
+ "Section": "Document section/category",
+ "ClauseReference": "Regulatory reference if identified"
+ }
+]
+
+Focus on creating comprehensive, professional parameters that maintain the structure and intelligence of the original document while using appropriate modern input types.
+"""
+
+def init_db():
+ """Initialize database tables - runs once when app starts"""
+ con = sql.connect("swift_check.db")
+ cur = con.cursor()
+ cur.execute("""
+ CREATE TABLE IF NOT EXISTS qc_requests (
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ doc_type TEXT NOT NULL,
+ product_name TEXT NOT NULL,
+ supplier_name TEXT NOT NULL,
+ created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
+ user_message TEXT
+ )""")
+
+ cur.execute("""
+ CREATE TABLE IF NOT EXISTS llm_responses (
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ request_id INTEGER,
+ llm_response TEXT,
+ summary_text TEXT,
+ created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
+ FOREIGN KEY (request_id) REFERENCES qc_requests(id)
+ )""")
+
+ cur.execute("""
+ CREATE TABLE IF NOT EXISTS parameters (
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ request_id INTEGER,
+ parameter_name TEXT,
+ type TEXT,
+ spec TEXT,
+ dropdown_options TEXT,
+ include_remarks TEXT,
+ section TEXT,
+ clause_reference TEXT,
+ created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
+ FOREIGN KEY (request_id) REFERENCES qc_requests(id)
+ )""")
+
+ cur.execute("""
+ CREATE TABLE IF NOT EXISTS json_templates (
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ request_id INTEGER,
+ template_json TEXT,
+ created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
+ FOREIGN KEY (request_id) REFERENCES qc_requests(id)
+ )""")
+
+ con.commit()
+ con.close()
+
+init_db()
+
+def extract_top_level_json_array(text):
+ """Extract the first top-level JSON array from text"""
+ start = text.find('[')
+ if start == -1:
+ return ""
+ balance = 0
+ end = start
+ for i in range(start, len(text)):
+ char = text[i]
+ if char == '[':
+ balance += 1
+ elif char == ']':
+ balance -= 1
+ if balance == 0:
+ end = i
+ break
+ return text[start:end+1]
+
+def enhanced_call_groq_llm(user_message, doc_type, product_name, supplier_name, existing_parameters=None, is_digitization=False):
+ """
+ Enhanced Groq LLM call with comprehensive RAG support.
+ Retrieves context from all 3 VDBs before calling the LLM.
+ """
+ if not Groq:
+ return "Groq LLM call failed: 'groq' library not found or not installed."
+
+ GROQ_API_KEY = "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"
+ domain = "Food Manufacturing"
+
+ # Get comprehensive context from all VDBs
+ print(f"🔍 Retrieving comprehensive context for: {product_name}")
+ comprehensive_context = get_comprehensive_context(product_name, domain)
+
+ # Format context for prompt
+ formatted_context = format_context_for_prompt(comprehensive_context, max_length=4500)
+
+ # Generate header and supplier info
+ header_text = f"{product_name} {doc_type}"
+ supplier_info = f"Supplier Name: {supplier_name}"
+
+ # Check if user message contains reference document content
+ has_reference = "Reference document content" in user_message
+
+ # Select appropriate system prompt
+ if is_digitization:
+ system_instructions = ENHANCED_DIGITIZE_SYSTEM_PROMPT
+ else:
+ system_instructions = ENHANCED_SYSTEM_PROMPT
+
+ # Enhanced context with regulatory compliance focus
+ enhanced_context = formatted_context
+
+ if has_reference:
+ enhanced_context += f"""
+
+**CRITICAL DIGITIZATION GUIDANCE**: The reference document content is provided to understand the STRUCTURE and PROFESSIONAL FORMAT of QC parameters.
+
+Use the reference to identify:
+1. Section headings and table structures (preserve them)
+2. Parameter types and their appropriate input methods
+3. Tolerance specifications and measurement units
+4. Professional formatting and organization
+5. Regulatory compliance requirements
+
+Create parameters with values, specifications, and input types SPECIFIC to {product_name} while maintaining the professional structure and comprehensive coverage of the reference document.
+ """
+ else:
+ enhanced_context += f"""
+
+For {product_name}, ensure you include MINIMUM 15+ parameters covering these MANDATORY categories:
+
+1. **Physical Parameters** (4-5): Appearance (Image+Toggle), Texture (Dropdown+Remarks), Dimensions (Numeric), Weight (Numeric), Shape (Dropdown)
+2. **Sensory Parameters** (3-4): Flavor (Dropdown+Remarks), Aroma (Dropdown+Remarks), Mouthfeel (Dropdown), Overall Sensory (Toggle)
+3. **Safety Parameters** (4-5): Foreign Objects (Checklist+Image), Microbiological (Numeric), Chemical (Numeric), Allergens (Checklist), Metal Detection (Text+Toggle)
+4. **Product-Specific** (2-3): Based on product type (frozen, fried, baked, etc.)
+5. **Packaging** (3-4): Integrity (Image+Checklist), Weight Verification (Numeric), Date Verification (Text), Batch Traceability (Text)
+6. **Process Control** (2-3): Temperature (Numeric), Time (Numeric), Equipment (Toggle+Text)
+7. **Compliance** (2-3): Regulatory (Checklist), Documentation (Toggle), Inspector Assessment (Toggle+Remarks)
+
+**INTELLIGENT TYPE SELECTION RULES:**
+- Image Upload: Visual inspections, appearance, defects, evidence documentation
+- Toggle: Pass/fail, acceptable/not acceptable, present/absent, binary assessments
+- Checklist: Foreign objects (stones, glass, metals, plastic, wood, insects, hair, threads), allergens (all 14), packaging defects, compliance items
+- Numeric Input: ALL measurements with specifications and units (e.g., "Weight: 25±2g", "Temperature: -18°C ±2°C")
+- Text Input: Codes, dates, identifiers, batch numbers
+- Remarks: Detailed observations, corrective actions, complex assessments
+
+**REGULATORY COMPLIANCE:**
+Based on retrieved context, ensure compliance with: {', '.join([req['regulatory_body'] + ' ' + req.get('clause_reference', req.get('standard_code', '')) for req in comprehensive_context.get('regulatory_requirements', [])[:3]])}
+
+**PROFESSIONAL FORMATTING:**
+Match Al Kabeer Group's quality standards with proper section organization, comprehensive coverage, and intelligent parameter type selection.
+ """
+
+ # Construct the final system prompt
+ final_system_prompt = f"""
+{system_instructions}
+
+User context:
+- Doc Type: {doc_type}
+- Product: {product_name}
+- Supplier: {supplier_name}
+- Generated Header: {header_text}
+- Supplier Info: {supplier_info}
+
+{enhanced_context}
+
+**VALID PARAMETER TYPES:**
+Checklist, Dropdown, Image Upload, Remarks, Text Input, Numeric Input, Toggle
+
+**MANDATORY REQUIREMENTS:**
+1. MINIMUM 15+ parameters for comprehensive coverage
+2. Use intelligent type selection based on parameter purpose
+3. Include specifications with units for ALL Numeric Input parameters
+4. Provide comprehensive options for Checklist and Dropdown parameters
+5. Add clause references where regulatory compliance is required
+6. Include section organization for professional formatting
+7. Add "IncludeRemarks": "Yes" for complex parameters requiring detailed observations
+
+**OUTPUT INSTRUCTIONS:**
+1. Provide a brief summary describing the comprehensive QC parameters created.
+2. Then produce a bracketed JSON array with intelligent parameter selection.
+ Example:
+ [
+ {{
+ "action": "add",
+ "Parameter": "Product Appearance",
+ "Type": "Image Upload",
+ "Spec": "Visual inspection with photo evidence",
+ "DropdownOptions": "",
+ "ChecklistOptions": "",
+ "IncludeRemarks": "Yes",
+ "Section": "Physical Parameters",
+ "ClauseReference": "Dubai Municipality Section 4.1.2"
+ }},
+ {{
+ "action": "add",
+ "Parameter": "Foreign Objects Detection",
+ "Type": "Checklist",
+ "Spec": "Zero tolerance for all foreign materials",
+ "ChecklistOptions": "Stones, Glass, Metals, Plastic, Wood, Insects/Pests, Hair, Threads, Paper, Bones, Feathers",
+ "IncludeRemarks": "Yes",
+ "Section": "Safety Parameters",
+ "ClauseReference": "HACCP Principle 2"
+ }},
+ {{
+ "action": "add",
+ "Parameter": "Net Weight",
+ "Type": "Numeric Input",
+ "Spec": "25±2g per piece",
+ "DropdownOptions": "",
+ "IncludeRemarks": "No",
+ "Section": "Physical Parameters"
+ }}
+ ]
+"""
+
+ messages = [
+ {"role": "system", "content": final_system_prompt},
+ {"role": "user", "content": user_message},
+ ]
+
+ client = Groq(api_key=GROQ_API_KEY)
+
+ try:
+ response = client.chat.completions.create(
+ messages=messages,
+ model="llama-3.3-70b-versatile",
+ stream=False,
+ temperature=0.2 # Lower temperature for more consistent professional output
+ )
+ return response.choices[0].message.content.strip()
+ except Exception as e:
+ return f"Groq LLM call failed: {str(e)}"
+
+def parse_llm_changes(llm_text):
+ """Parse LLM response into summary and changes"""
+ json_array_text = extract_top_level_json_array(llm_text)
+ changes = []
+ if json_array_text:
+ try:
+ changes = json.loads(json_array_text)
+ except Exception as e:
+ print("JSON parse error:", e)
+ summary_text = llm_text.replace(json_array_text, "").strip() if json_array_text else llm_text.strip()
+ return summary_text, changes
+
+def apply_changes_to_params(parameters, changes):
+ """Apply changes to parameters with enhanced parameter handling"""
+ valid_types = ["Checklist", "Dropdown", "Image Upload", "Remarks", "Text Input", "Numeric Input", "Toggle"]
+
+ for change in changes:
+ if not isinstance(change, dict):
+ print(f"Skipping non-dict change: {change}")
+ continue
+
+ action = change.get("action", "").lower()
+ p_name = change.get("Parameter", "Unnamed")
+ options = change.get("DropdownOptions", "")
+ checklist_options = change.get("ChecklistOptions", "")
+
+ # Handle both DropdownOptions and ChecklistOptions
+ if not options and checklist_options:
+ options = checklist_options
+ if isinstance(options, list):
+ options = ", ".join(options)
+
+ if action == "add":
+ new_type = change.get("Type", "Text Input")
+ if new_type not in valid_types:
+ new_type = "Text Input"
+
+ new_param = {
+ "Parameter": p_name,
+ "Type": new_type,
+ "Spec": change.get("Spec", ""),
+ "DropdownOptions": options,
+ "IncludeRemarks": change.get("IncludeRemarks", "No"),
+ "Section": change.get("Section", "General"),
+ "ClauseReference": change.get("ClauseReference", "")
+ }
+ parameters.append(new_param)
+
+ elif action == "remove":
+ parameters[:] = [p for p in parameters if p["Parameter"].lower() != p_name.lower()]
+
+ elif action == "update":
+ for p in parameters:
+ if p["Parameter"].lower() == p_name.lower():
+ new_type = change.get("Type", "Text Input")
+ if new_type not in valid_types:
+ new_type = "Text Input"
+ p["Type"] = new_type
+ p["Spec"] = change.get("Spec", "")
+ p["DropdownOptions"] = options
+ p["IncludeRemarks"] = change.get("IncludeRemarks", "No")
+ p["Section"] = change.get("Section", "General")
+ p["ClauseReference"] = change.get("ClauseReference", "")
+ break
+
+ return parameters
+
+def generate_enhanced_json_template(doc_type, product_name, supplier_name, parameters):
+ """
+ Enhanced JSON template generation with intelligent parameter type handling.
+ """
+ header_text = f"{product_name} {doc_type}"
+ template = {
+ "templateId": "neY5j",
+ "isDrafted": False,
+ "pageStyle": {
+ "margin": {
+ "top": 10,
+ "bottom": 10,
+ "left": 10,
+ "right": 10
+ },
+ "showPageNumber": False,
+ "headerImgUrl": "",
+ "fotterImgUrl": ""
+ },
+ "pageToolsDataList": [],
+ "workflowInfo": {
+ "currentState": "Draft",
+ "approvalStates": ["Draft", "Under Review", "Approved", "Rejected"],
+ "currentApprover": {
+ "userId": "user123",
+ "name": "Ashish Kumar",
+ "role": "QC Manager"
+ },
+ "previousApprovers": [
+ {
+ "userId": "user456",
+ "name": "Raj Singh",
+ "role": "QC Supervisor",
+ "approvalDate": "2025-05-01T10:30:00Z",
+ "status": "Approved",
+ "comments": "Looks good to me."
+ }
+ ],
+ "nextApprovers": [
+ {
+ "userId": "user789",
+ "name": "Priya Patel",
+ "role": "CEO"
+ }
+ ]
+ }
+ }
+
+ def generate_tool_id():
+ return ''.join(random.choices(string.ascii_lowercase + string.digits, k=5))
+
+ # Add main header
+ title_text = header_text
+ heading_tool = {
+ "toolId": generate_tool_id(),
+ "toolType": "HEADING",
+ "textData": {
+ "text": title_text,
+ "isBold": True,
+ "isItalic": False,
+ "isUnderlined": False,
+ "textAliend": "LEFT",
+ "color": 4294967295, # White
+ "fontSize": 14
+ },
+ "boxData": {
+ "fillColor": 4288111521, # Blue background
+ "borderEnable": False,
+ "borderColor": 4294967295,
+ "borderWidth": 0.8,
+ "boxAlignment": "CENTER_LEFT",
+ "cornerRadius": {
+ "topLeft": 0,
+ "topRight": 0,
+ "bottomLeft": 0,
+ "bottomRight": 0
+ },
+ "padding": {
+ "top": 4,
+ "bottom": 4,
+ "left": 9,
+ "right": 4
+ },
+ "margin": {
+ "top": 0,
+ "bottom": 0,
+ "left": 0,
+ "right": 0
+ }
+ },
+ "toolWidth": 1.7976931348623157e+308
+ }
+ template["pageToolsDataList"].append(heading_tool)
+
+ # Add supplier information
+ supplier_text = {
+ "toolId": generate_tool_id(),
+ "toolType": "TEXT",
+ "textData": {
+ "text": f"Supplier Name: {supplier_name}",
+ "isBold": True,
+ "isItalic": False,
+ "isUnderlined": False,
+ "textAliend": "LEFT",
+ "color": 4278190080, # Black
+ "fontSize": 12
+ },
+ "toolHeight": 30,
+ "toolWidth": 1.7976931348623157e+308
+ }
+ template["pageToolsDataList"].append(supplier_text)
+
+ # Group parameters by section for better organization
+ sections = {}
+ for param in parameters:
+ section = param.get("Section", "General Parameters")
+ if section not in sections:
+ sections[section] = []
+ sections[section].append(param)
+
+ # Add parameters organized by sections
+ for section_name, section_params in sections.items():
+ # Add section header
+ if section_name != "General Parameters":
+ section_header = {
+ "toolId": generate_tool_id(),
+ "toolType": "TEXT",
+ "textData": {
+ "text": section_name.upper(),
+ "isBold": True,
+ "isItalic": False,
+ "isUnderlined": True,
+ "textAliend": "LEFT",
+ "color": 4283215696, # Green
+ "fontSize": 13
+ },
+ "toolHeight": 35,
+ "toolWidth": 1.7976931348623157e+308
+ }
+ template["pageToolsDataList"].append(section_header)
+
+ # Add parameters in this section
+ for param in section_params:
+ param_name = param.get("Parameter", "")
+ param_type = param.get("Type", "Text Input")
+ spec = param.get("Spec", "")
+ options = param.get("DropdownOptions", "")
+ include_remarks = param.get("IncludeRemarks", "No")
+ clause_ref = param.get("ClauseReference", "")
+
+ # Create display name with clause reference
+ display_name = param_name
+ if clause_ref:
+ display_name += f" ({clause_ref})"
+
+ # Split options into a list if it's a string
+ option_list = []
+ if isinstance(options, str) and options.strip():
+ option_list = [opt.strip() for opt in options.split(",") if opt.strip()]
+
+ # ENHANCED PARAMETER TYPE HANDLING
+ if param_type == "Image Upload":
+ # Create image upload tool with toggle
+ image_tool = {
+ "toolId": generate_tool_id(),
+ "toolType": "IMAGE",
+ "imageLableData": {
+ "text": display_name + ":",
+ "isBold": True,
+ "isItalic": False,
+ "isUnderlined": False,
+ "textAliend": "LEFT",
+ "fontSize": 14,
+ "lablePositioned": "LEFT",
+ "spacing": 10,
+ "txtColor": 4278190080, # Black
+ "showLable": True
+ },
+ "imageData": {
+ "showImageUploadArea": True,
+ "width": 200,
+ "height": 150
+ },
+ "iconData": 57344,
+ "showIcon": False,
+ "iconCodePoint": 59729,
+ "iconSize": 30,
+ "iconColor": 4278190080, # Black
+ "toolHeight": 160,
+ "toolWidth": 1.7976931348623157e+308,
+ "showToggle": True,
+ "imageToggleData": {
+ "label": "Assessment",
+ "isBold": True,
+ "isItalic": False,
+ "isUnderlined": False,
+ "fontSize": 14,
+ "showLabel": True,
+ "enabledText": "Acceptable",
+ "disabledText": "Not Acceptable",
+ "enabledColor": 4283215696, # Green
+ "disabledColor": 4294198070, # Red
+ "isSelected": True
+ }
+ }
+ template["pageToolsDataList"].append(image_tool)
+
+ elif param_type == "Toggle":
+ # Create toggle tool
+ toggle_tool = {
+ "toolId": generate_tool_id(),
+ "toolType": "TOGGLE",
+ "toggleData": {
+ "disabledColor": 4294198070, # Red
+ "disabledText": "Not Acceptable" if not option_list else option_list[1] if len(option_list) > 1 else "No",
+ "enabledColor": 4283215696, # Green
+ "enabledText": "Acceptable" if not option_list else option_list[0] if option_list else "Yes",
+ "showLabel": True,
+ "label": display_name,
+ "labelFontSize": 14,
+ "labelTextColor": 4278190080, # Black
+ "isBold": True,
+ "isItalic": False,
+ "isSelected": True,
+ "toggleTextFontSize": 12,
+ "toggleTextIsBold": False
+ },
+ "toolWidth": 1.7976931348623157e+308,
+ "toolHeight": 80
+ }
+ template["pageToolsDataList"].append(toggle_tool)
+
+ elif param_type == "Dropdown":
+ # Create dropdown tool
+ dropdown_tool = {
+ "toolId": generate_tool_id(),
+ "toolType": "DROPDOWN",
+ "dropdownData": {
+ "hintText": f"Select {param_name.lower()}",
+ "hintTextColor": 4288585374, # Gray
+ "hintFontSize": 14,
+ "dropdownWidth": 350,
+ "spacingBetweeenLableAndDropdownWidth": 10,
+ "showLable": True,
+ "labelText": display_name,
+ "isBold": True,
+ "isItalic": False,
+ "isUnderlined": False,
+ "textAliend": "LEFT",
+ "lablePositioned": "TOP",
+ "labelFontSize": 14,
+ "lableTextColor": 4278190080, # Black
+ "numberOfOptions": len(option_list) if option_list else 3,
+ "optionFontSize": 14,
+ "optionTextColor": 4278190080, # Black
+ "optionLst": option_list if option_list else ["Acceptable", "Marginal", "Not Acceptable"],
+ "selectedOptionIndex": -1
+ },
+ "toolHeight": 90,
+ "toolWidth": 1.7976931348623157e+308
+ }
+ template["pageToolsDataList"].append(dropdown_tool)
+
+ elif param_type == "Checklist":
+ # Create checkbox tool for checklists
+ if not option_list:
+ option_list = ["Item 1", "Item 2", "Item 3"]
+
+ checkbox_tool = {
+ "toolId": generate_tool_id(),
+ "toolType": "CHECKBOX",
+ "checkboxData": {
+ "numberOfCheckboxes": len(option_list),
+ "checkboxBgColor": 4294967295, # White
+ "spacing": 8,
+ "runSpacing": 8,
+ "checkboxTileWidth": 140,
+ "checkBoxAlignmentEnum": "HORIZONTAL",
+ "checkBoxButtonStyleEnum": "CHECKBOX",
+ "checkBoxPositionedEnum": "START",
+ "checkBoxSelectionModeEnum": "MULTIPLE",
+ "isBold": False,
+ "isItalic": False,
+ "isUnderlined": False,
+ "textAliend": "LEFT",
+ "fontSize": 13,
+ "lablePositioned": "LEFT",
+ "txtColor": 4278190080, # Black
+ "labelLst": option_list,
+ "showLable": True,
+ "selectedIndexLstForMultiSelect": [],
+ "selectedIndexForSingleSelect": 0
+ },
+ "toolWidth": 1.7976931348623157e+308,
+ "toolHeight": max(100, len(option_list) * 15 + 40) # Dynamic height based on items
+ }
+
+ # Add section label for checklist
+ checklist_label = {
+ "toolId": generate_tool_id(),
+ "toolType": "TEXT",
+ "textData": {
+ "text": display_name + ":",
+ "isBold": True,
+ "isItalic": False,
+ "isUnderlined": False,
+ "textAliend": "LEFT",
+ "color": 4278190080, # Black
+ "fontSize": 14
+ },
+ "toolHeight": 25,
+ "toolWidth": 1.7976931348623157e+308
+ }
+ template["pageToolsDataList"].append(checklist_label)
+ template["pageToolsDataList"].append(checkbox_tool)
+
+ elif param_type == "Numeric Input":
+ # Create numeric input with specification
+ label_text = display_name
+ if spec:
+ label_text += f" (Spec: {spec})"
+
+ numeric_tool = {
+ "toolId": generate_tool_id(),
+ "toolType": "TEXTAREA",
+ "lableData": {
+ "text": label_text + ":",
+ "isBold": True,
+ "isItalic": False,
+ "isUnderlined": False,
+ "textAliend": "LEFT",
+ "fontSize": 14,
+ "lablePositioned": "TOP_LEFT",
+ "spacing": 5,
+ "txtColor": 4278190080, # Black
+ "showLable": True
+ },
+ "textAreaData": {
+ "isFilled": True,
+ "fillColor": 4292927712, # Light gray
+ "borderType": "UNDERLINED",
+ "storkStyle": "LINE",
+ "dummyTxt": "Enter numeric value" + (f" ({spec})" if spec else ""),
+ "borderColor": 4278190080, # Black
+ "isBold": False,
+ "isItalic": False,
+ "isUnderlined": False,
+ "fontSize": 12,
+ "txtColor": 4288585374 # Gray
+ },
+ "toolHeight": 75,
+ "toolWidth": 1.7976931348623157e+308,
+ "toggleData": {
+ "label": "Status",
+ "isBold": True,
+ "isItalic": False,
+ "isUnderlined": False,
+ "fontSize": 12,
+ "showLabel": True,
+ "enabledText": "Within Spec",
+ "disabledText": "Out of Spec",
+ "enabledColor": 4283215696, # Green
+ "disabledColor": 4294198070, # Red
+ "isSelected": True
+ },
+ "showToggle": True # Show toggle for spec compliance
+ }
+ template["pageToolsDataList"].append(numeric_tool)
+
+ elif param_type == "Text Input":
+ # Create text input
+ text_tool = {
+ "toolId": generate_tool_id(),
+ "toolType": "TEXTAREA",
+ "lableData": {
+ "text": display_name + ":",
+ "isBold": True,
+ "isItalic": False,
+ "isUnderlined": False,
+ "textAliend": "LEFT",
+ "fontSize": 14,
+ "lablePositioned": "TOP_LEFT",
+ "spacing": 5,
+ "txtColor": 4278190080, # Black
+ "showLable": True
+ },
+ "textAreaData": {
+ "isFilled": True,
+ "fillColor": 4292927712, # Light gray
+ "borderType": "UNDERLINED",
+ "storkStyle": "LINE",
+ "dummyTxt": "Enter " + param_name.lower(),
+ "borderColor": 4278190080, # Black
+ "isBold": False,
+ "isItalic": False,
+ "isUnderlined": False,
+ "fontSize": 12,
+ "txtColor": 4288585374 # Gray
+ },
+ "toolHeight": 65,
+ "toolWidth": 1.7976931348623157e+308,
+ "showToggle": False
+ }
+ template["pageToolsDataList"].append(text_tool)
+
+ elif param_type == "Remarks":
+ # Create remarks/textarea
+ remarks_tool = {
+ "toolId": generate_tool_id(),
+ "toolType": "TEXTAREA",
+ "lableData": {
+ "text": display_name + ":",
+ "isBold": True,
+ "isItalic": False,
+ "isUnderlined": False,
+ "textAliend": "LEFT",
+ "fontSize": 14,
+ "lablePositioned": "TOP_LEFT",
+ "spacing": 5,
+ "txtColor": 4278190080, # Black
+ "showLable": True
+ },
+ "textAreaData": {
+ "isFilled": True,
+ "fillColor": 4292927712, # Light gray
+ "borderType": "UNDERLINED",
+ "storkStyle": "LINE",
+ "dummyTxt": "Enter detailed observations and remarks",
+ "borderColor": 4278190080, # Black
+ "isBold": False,
+ "isItalic": False,
+ "isUnderlined": False,
+ "fontSize": 12,
+ "txtColor": 4288585374 # Gray
+ },
+ "toolHeight": 100, # Larger height for remarks
+ "toolWidth": 1.7976931348623157e+308,
+ "showToggle": False
+ }
+ template["pageToolsDataList"].append(remarks_tool)
+
+ # Add additional remarks field if requested and not already a remarks parameter
+ if include_remarks == "Yes" and param_type != "Remarks":
+ additional_remarks = {
+ "toolId": generate_tool_id(),
+ "toolType": "TEXTAREA",
+ "lableData": {
+ "text": f"{param_name} - Additional Remarks:",
+ "isBold": False,
+ "isItalic": True,
+ "isUnderlined": False,
+ "textAliend": "LEFT",
+ "fontSize": 12,
+ "lablePositioned": "TOP_LEFT",
+ "spacing": 5,
+ "txtColor": 4278190080, # Black
+ "showLable": True
+ },
+ "textAreaData": {
+ "isFilled": True,
+ "fillColor": 4292927712, # Light gray
+ "borderType": "UNDERLINED",
+ "storkStyle": "LINE",
+ "dummyTxt": "Additional observations or corrective actions",
+ "borderColor": 4278190080, # Black
+ "isBold": False,
+ "isItalic": False,
+ "isUnderlined": False,
+ "fontSize": 11,
+ "txtColor": 4288585374 # Gray
+ },
+ "toolHeight": 60,
+ "toolWidth": 1.7976931348623157e+308,
+ "showToggle": False
+ }
+ template["pageToolsDataList"].append(additional_remarks)
+
+ # Add final overall assessment section
+ final_assessment_header = {
+ "toolId": generate_tool_id(),
+ "toolType": "TEXT",
+ "textData": {
+ "text": "FINAL ASSESSMENT",
+ "isBold": True,
+ "isItalic": False,
+ "isUnderlined": True,
+ "textAliend": "CENTER",
+ "color": 4283215696, # Green
+ "fontSize": 14
+ },
+ "toolHeight": 35,
+ "toolWidth": 1.7976931348623157e+308
+ }
+ template["pageToolsDataList"].append(final_assessment_header)
+
+ # Overall quality assessment toggle
+ overall_toggle = {
+ "toolId": generate_tool_id(),
+ "toolType": "TOGGLE",
+ "toggleData": {
+ "disabledColor": 4294198070, # Red
+ "disabledText": "REJECTED",
+ "enabledColor": 4283215696, # Green
+ "enabledText": "APPROVED",
+ "showLabel": True,
+ "label": "Overall Quality Assessment",
+ "labelFontSize": 15,
+ "labelTextColor": 4278190080, # Black
+ "isBold": True,
+ "isItalic": False,
+ "isSelected": True,
+ "toggleTextFontSize": 14,
+ "toggleTextIsBold": True
+ },
+ "toolWidth": 1.7976931348623157e+308,
+ "toolHeight": 100
+ }
+ template["pageToolsDataList"].append(overall_toggle)
+
+ # Inspector signature and date
+ inspector_info = {
+ "toolId": generate_tool_id(),
+ "toolType": "TEXTAREA",
+ "lableData": {
+ "text": "Inspector Name & Signature:",
+ "isBold": True,
+ "isItalic": False,
+ "isUnderlined": False,
+ "textAliend": "LEFT",
+ "fontSize": 14,
+ "lablePositioned": "TOP_LEFT",
+ "spacing": 5,
+ "txtColor": 4278190080, # Black
+ "showLable": True
+ },
+ "textAreaData": {
+ "isFilled": True,
+ "fillColor": 4292927712, # Light gray
+ "borderType": "UNDERLINED",
+ "storkStyle": "LINE",
+ "dummyTxt": "Inspector name and signature",
+ "borderColor": 4278190080, # Black
+ "isBold": False,
+ "isItalic": False,
+ "isUnderlined": False,
+ "fontSize": 12,
+ "txtColor": 4288585374 # Gray
+ },
+ "toolHeight": 80,
+ "toolWidth": 1.7976931348623157e+308,
+ "showToggle": False
+ }
+ template["pageToolsDataList"].append(inspector_info)
+
+ # Final comprehensive remarks
+ final_remarks = {
+ "toolId": generate_tool_id(),
+ "toolType": "TEXTAREA",
+ "lableData": {
+ "text": "Final Comprehensive Remarks:",
+ "isBold": True,
+ "isItalic": False,
+ "isUnderlined": False,
+ "textAliend": "LEFT",
+ "fontSize": 14,
+ "lablePositioned": "TOP_LEFT",
+ "spacing": 5,
+ "txtColor": 4278190080, # Black
+ "showLable": True
+ },
+ "textAreaData": {
+ "isFilled": True,
+ "fillColor": 4292927712, # Light gray
+ "borderType": "UNDERLINED",
+ "storkStyle": "LINE",
+ "dummyTxt": "Overall assessment, corrective actions, and additional observations",
+ "borderColor": 4278190080, # Black
+ "isBold": False,
+ "isItalic": False,
+ "isUnderlined": False,
+ "fontSize": 12,
+ "txtColor": 4288585374 # Gray
+ },
+ "toolHeight": 120,
+ "toolWidth": 1.7976931348623157e+308,
+ "showToggle": False
+ }
+ template["pageToolsDataList"].append(final_remarks)
+
+ return template
+
+# Enhanced OCR and text extraction functions
+def enhanced_extract_text_from_document(filepath, file_ext):
+ """Enhanced text extraction with better table structure recognition"""
+ try:
+ extracted_text = ""
+
+ if file_ext == 'pdf':
+ # Use PyMuPDF with enhanced table detection
+ pdf_document = fitz.open(filepath)
+
+ for page_num in range(pdf_document.page_count):
+ page = pdf_document[page_num]
+
+ # Try to extract text directly first
+ text = page.get_text()
+
+ # If minimal text found, use OCR
+ if len(text.strip()) < 100:
+ # Convert page to high-quality image for OCR
+ mat = fitz.Matrix(3, 3) # Higher zoom for better OCR
+ pix = page.get_pixmap(matrix=mat)
+ img_data = pix.pil_tobytes(format="PNG")
+
+ # Enhanced OCR with better table handling
+ from io import BytesIO
+ image = Image.open(BytesIO(img_data))
+
+ # Use OCR configuration optimized for tables
+ custom_config = r'--oem 3 --psm 6 -c preserve_interword_spaces=1'
+ text = pytesseract.image_to_string(image, config=custom_config)
+
+ # Enhanced text processing to preserve table structure
+ processed_text = enhance_table_structure(text)
+ extracted_text += f"\n=== PAGE {page_num + 1} ===\n{processed_text}\n"
+
+ pdf_document.close()
+
+ else: # Image files
+ image = Image.open(filepath)
+ # Enhanced OCR for images with table preservation
+ custom_config = r'--oem 3 --psm 6 -c preserve_interword_spaces=1'
+ text = pytesseract.image_to_string(image, config=custom_config)
+ extracted_text = enhance_table_structure(text)
+
+ return extracted_text.strip()
+
+ except Exception as e:
+ print(f"Error during enhanced document processing: {str(e)}")
+ return None
+
+def enhance_table_structure(text):
+ """Enhance text to better preserve table structures and headings"""
+ if not text:
+ return text
+
+ # Preserve important section headings
+ section_patterns = [
+ (r'(ORGANOLEPTIC\s+EVALUATION)', r'\n## \1\n'),
+ (r'(COOKING\s+DETAILS)', r'\n## \1\n'),
+ (r'(PACKAGING\s*&\s*FREEZING)', r'\n## \1\n'),
+ (r'(FREEZING\s+DETAILS)', r'\n## \1\n'),
+ (r'(METAL\s+SCREENING)', r'\n## \1\n'),
+ (r'(SIZE\s+VARIATIONS)', r'\n## \1\n'),
+ (r'(COLOUR\s+VARIATIONS)', r'\n## \1\n'),
+ (r'(EVALUATION\s+OF\s+PASTRY)', r'\n## \1\n'),
+ (r'(FINAL\s+ASSESSMENT)', r'\n## \1\n'),
+ ]
+
+ processed_text = text
+ for pattern, replacement in section_patterns:
+ processed_text = re.sub(pattern, replacement, processed_text, flags=re.IGNORECASE)
+
+ # Preserve parameter-value pairs
+ param_patterns = [
+ (r'([A-Za-z\s]+):\s*(Acceptable|Non-acceptable|Present|Absent|To be mentioned)', r'**\1**: \2'),
+ (r'([A-Za-z\s]+)\s+(Sam\s+\d+)', r'**\1** - \2'),
+ (r'(Temperature|Weight|Time|Dimension[s]?)[:\s]+([0-9\-\+\±°C\s\w]+)', r'**\1**: \2'),
+ ]
+
+ for pattern, replacement in param_patterns:
+ processed_text = re.sub(pattern, replacement, processed_text, flags=re.IGNORECASE)
+
+ # Clean up excessive whitespace while preserving structure
+ processed_text = re.sub(r'\n\s*\n\s*\n', '\n\n', processed_text)
+ processed_text = re.sub(r'[ \t]+', ' ', processed_text)
+
+ return processed_text
+
+def enhanced_extract_metadata_from_ocr(ocr_text):
+ """Enhanced metadata extraction with better pattern recognition"""
+ # Enhanced document type detection
+ doc_type_patterns = {
+ r'(?i)(MALABAR\s*PARATHA.*INSPECTION)': "Malabar Paratha Inspection Record",
+ r'(?i)(GREEN\s*PEAS.*INSPECTION)': "Green Peas Inspection Record",
+ r'(?i)(VEGETABLE\s*SAMOSA.*INSPECTION)': "Vegetable Samosa Inspection Record",
+ r'(?i)(CONTAINER.*INSPECTION.*REPORT)': "Container Inspection Report",
+ r'(?i)quality\s*(?:control)?\s*checklist': "Quality Control Checklist",
+ r'(?i)inspection\s*(?:record|checklist)': "Inspection Checklist",
+ r'(?i)pre[\-\s]shipment.*inspection': "Pre-Shipment Inspection",
+ }
+
+ detected_doc_type = "Quality Control Checklist" # Default
+ for pattern, doc_type in doc_type_patterns.items():
+ if re.search(pattern, ocr_text):
+ detected_doc_type = doc_type
+ break
+
+ # Enhanced product name extraction
+ product_patterns = [
+ r'(?i)product\s*(?:name|description)[:\-\s]*([^\n]{1,50})',
+ r'(?i)(MALABAR\s*PARATHA)',
+ r'(?i)(GREEN\s*PEAS)',
+ r'(?i)(VEGETABLE\s*SAMOSA[S]?)',
+ r'(?i)(SWEET\s*CORN)',
+ ]
+
+ detected_product = "Food Product" # Default
+ for pattern in product_patterns:
+ match = re.search(pattern, ocr_text)
+ if match:
+ detected_product = match.group(1).strip()
+ break
+
+ # Enhanced supplier name extraction
+ supplier_patterns = [
+ r'(?i)supplier\s*(?:name)?[:\-\s]*([^\n]{1,40})',
+ r'(?i)manufacturing\s*unit[:\-\s]*([^\n]{1,40})',
+ r'(?i)(AL\s*KABEER)',
+ r'(?i)(CASCADE\s*MARINE)',
+ r'(?i)(SAHAR\s*FOOD)',
+ ]
+
+ detected_supplier = "" # Default empty
+ for pattern in supplier_patterns:
+ match = re.search(pattern, ocr_text)
+ if match:
+ detected_supplier = match.group(1).strip()
+ break
+
+ return detected_doc_type, detected_product, detected_supplier
+
+# File upload handling
+ALLOWED_EXTENSIONS = {'pdf', 'png', 'jpg', 'jpeg'}
+
+def allowed_file(filename):
+ """Check if file extension is allowed"""
+ return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
+
+def fetch_json_from_firebase(firebase_json_url):
+ """Fetch JSON template from Firebase Storage URL"""
+ try:
+ response = requests.get(firebase_json_url)
+ if response.status_code == 200:
+ return response.json()
+ else:
+ return None
+ except Exception as e:
+ print(f"Error fetching JSON from Firebase: {str(e)}")
+ return None
+
+# API Routes
+@app.route("/")
+def index():
+ return """
+
+
+ 15+ parameter minimum, smart type detection, comprehensive coverage
+Dubai Municipality, HACCP, ISO standards with clause references
+Al Kabeer Group standards, section organization, comprehensive structure
+Table structure recognition, header preservation, intelligent parameter extraction
+/refine - Create Enhanced QC TemplateCreates comprehensive quality control templates with 15+ parameters, regulatory compliance, and intelligent type selection.
+ +doc_type (required) - Document typeproduct_name (required) - Product namesupplier_name (required) - Supplier nameuser_message (optional) - Additional instructionscontext_file (optional) - Reference document/edit - Edit with Enhanced ContextModifies existing templates using comprehensive context and intelligent parameter optimization.
+ +/digitize - Advanced Document DigitizationEnhanced OCR processing with table structure recognition and intelligent parameter extraction.
+ +checklist_file (required) - Scanned documentdoc_type (optional) - Document typeproduct_name (optional) - Product namesupplier_name (optional) - Supplier name/template/{request_id} - Get Enhanced Template JSONReturns professionally formatted JSON templates with intelligent parameter types.
+/history - View Request HistoryBrowse all QC requests with enhanced preview and download options.
+/test-rag - Test Enhanced RAG SystemTest the comprehensive RAG system with all 3 vector databases.
+| ID | +Product | +Doc Type | +Supplier | +Parameters | +Created | +Actions | +
|---|---|---|---|---|---|---|
| {row[0]} | +{row[2]} | +{row[1]} | +{row[3]} | +{param_badge} {row[5]} params | +{row[4]} | ++ Preview + JSON + | +
{str(e)}
", 500 + +@app.route("/template/No template exists for request ID {request_id}
+ View History + + + """, 404 + + json_template = json.loads(template_result[0]) + + # Generate enhanced ASCII preview with sections + ascii_preview = "╔══════════════════════════════════════════════════════════════════════╗\n" + + if request_details: + header = f"{request_details[1]} {request_details[0]}" + else: + header = "Enhanced QC Template" + + header_padding = (70 - len(header)) // 2 + ascii_preview += f"║{' ' * header_padding}{header}{' ' * (70 - header_padding - len(header))}║\n" + + if request_details and request_details[2]: + supplier = f"Supplier: {request_details[2]}" + supplier_padding = (70 - len(supplier)) // 2 + ascii_preview += f"║{' ' * supplier_padding}{supplier}{' ' * (70 - supplier_padding - len(supplier))}║\n" + + ascii_preview += "╚══════════════════════════════════════════════════════════════════════╝\n\n" + + # Group parameters by section + sections = {} + for param in parameters: + param_name, param_type, spec, options, include_remarks, section, clause_ref = param + section = section or "General Parameters" + if section not in sections: + sections[section] = [] + sections[section].append(param) + + # Add parameters organized by sections + for section_name, section_params in sections.items(): + ascii_preview += f"\n🔹 {section_name.upper()}\n" + ascii_preview += "─" * 60 + "\n" + + for param in section_params: + param_name, param_type, spec, options, include_remarks, section, clause_ref = param + + # Add clause reference if available + display_name = param_name + if clause_ref: + display_name += f" ({clause_ref})" + + if param_type == "Image Upload": + ascii_preview += f"[📷] {display_name}: [ Upload Photo ] + Toggle Assessment\n" + elif param_type == "Toggle": + ascii_preview += f"[◐] {display_name}: ● Acceptable ○ Not Acceptable\n" + elif param_type == "Dropdown": + ascii_preview += f"[▼] {display_name}: _________________ " + if options: + option_list = [opt.strip() for opt in options.split(",")[:3]] + ascii_preview += f"({', '.join(option_list)}{'...' if len(options.split(',')) > 3 else ''})\n" + else: + ascii_preview += "\n" + elif param_type == "Checklist": + ascii_preview += f" {display_name}:\n" + if options: + option_list = [opt.strip() for opt in options.split(",")] + for opt in option_list[:5]: + ascii_preview += f" ☐ {opt}\n" + if len(option_list) > 5: + ascii_preview += f" ... and {len(option_list) - 5} more items\n" + else: + ascii_preview += " ☐ Item 1\n" + elif param_type == "Numeric Input": + ascii_preview += f"[#️⃣] {display_name}: _____________" + if spec: + ascii_preview += f" (Spec: {spec})\n" + else: + ascii_preview += "\n" + elif param_type == "Text Input": + ascii_preview += f"[✏️] {display_name}: _____________________________\n" + elif param_type == "Remarks": + ascii_preview += f"[📝] {display_name}:\n" + ascii_preview += " ┌─────────────────────────────────────┐\n" + ascii_preview += " │ │\n" + ascii_preview += " │ │\n" + ascii_preview += " └─────────────────────────────────────┘\n" + + if include_remarks == "Yes" and param_type != "Remarks": + ascii_preview += f" └─ Additional Remarks: _______________________\n" + + ascii_preview += "\n" + + # Add enhanced final assessment + ascii_preview += "═" * 70 + "\n" + ascii_preview += "🎯 FINAL ASSESSMENT\n" + ascii_preview += "═" * 70 + "\n" + ascii_preview += "[✅] Overall Quality Assessment: ● APPROVED ○ REJECTED\n\n" + ascii_preview += "[👤] Inspector Name & Signature: _________________________________\n\n" + ascii_preview += "[📝] Final Comprehensive Remarks:\n" + ascii_preview += " ┌─────────────────────────────────���───────────────────────────┐\n" + ascii_preview += " │ Overall assessment, corrective actions, and observations │\n" + ascii_preview += " │ │\n" + ascii_preview += " │ │\n" + ascii_preview += " └─────────────────────────────────────────────────────────────┘\n" + + # Enhanced statistics + total_params = len(parameters) + param_types = {} + sections_count = len(sections) + regulatory_refs = sum(1 for param in parameters if param[6]) # clause references + + for param in parameters: + param_type = param[1] + param_types[param_type] = param_types.get(param_type, 0) + 1 + + stats_html = f""" +{str(e)}
", 500 + +@app.route("/test-rag", methods=["GET"]) +def test_enhanced_rag(): + """Test enhanced RAG functionality with comprehensive context""" + try: + test_product = request.args.get('product', 'Malabar Paratha') + test_domain = request.args.get('domain', 'Food Manufacturing') + + print(f"\n🧪 Testing Enhanced RAG for {test_product} in {test_domain}") + + # Get comprehensive context from all VDBs + comprehensive_context = get_comprehensive_context(test_product, test_domain) + + # Format context for display + formatted_context = format_context_for_prompt(comprehensive_context, max_length=6000) + + results = { + "test_parameters": { + "product": test_product, + "domain": test_domain + }, + "comprehensive_context": { + "regulatory_requirements": len(comprehensive_context.get("regulatory_requirements", [])), + "product_specifications": len(comprehensive_context.get("product_specifications", [])), + "checklist_examples": len(comprehensive_context.get("checklist_examples", [])), + "parameter_patterns": len(comprehensive_context.get("parameter_patterns", [])), + }, + "context_summary": comprehensive_context.get("context_summary", {}), + "rag_quality": { + "total_sources": ( + len(comprehensive_context.get("regulatory_requirements", [])) + + len(comprehensive_context.get("product_specifications", [])) + + len(comprehensive_context.get("checklist_examples", [])) + ), + "regulatory_compliance": len(comprehensive_context.get("regulatory_requirements", [])) > 0, + "product_depth_reference": len(comprehensive_context.get("product_specifications", [])) > 0, + "professional_examples": len(comprehensive_context.get("checklist_examples", [])) > 0, + "parameter_intelligence": len(comprehensive_context.get("parameter_patterns", [])) > 0 + } + } + + print(f"✅ Enhanced RAG Test Complete: {results['rag_quality']['total_sources']} sources retrieved") + + if request.headers.get('Accept') == 'application/json': + return jsonify(results) + else: + # Enhanced HTML view + html = f""" + + +{formatted_context[:2000]}{'...' if len(formatted_context) > 2000 else ''}
+ Try these Enhanced RAG tests:
+{error_response['traceback']}
+
+
+ """, 500
+
+if __name__ == "__main__":
+ print("🚀 Starting Enhanced Swift Check API v2.0...")
+ print("✅ Comprehensive RAG Integration")
+ print("✅ 15+ Parameter Minimum")
+ print("✅ Intelligent Type Selection")
+ print("✅ Regulatory Compliance")
+ print("✅ Enhanced OCR Processing")
+ print("✅ Professional Formatting")
app.run(host="127.0.0.1", port=5000, debug=True)
\ No newline at end of file