Spaces:

Chamin09
/

BrailleMenuGen

Runtime error

File size: 4,434 Bytes

87d0988

from llama_cpp import Llama
import os
import json

# Initialize the model (will download on first run)
MODEL_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "../models/llama-3-8b-instruct.Q4_K_M.gguf")

# Check if model exists, if not provide instructions
if not os.path.exists(MODEL_PATH):
    os.makedirs(os.path.dirname(MODEL_PATH), exist_ok=True)
    print(f"Model not found at {MODEL_PATH}")
    print("Please download the model from https://huggingface.co/TheBloke/Llama-3-8B-Instruct-GGUF")
    print("and place it in the models directory")

# Initialize model with lazy loading
llm = None

def get_llm():
    """Get or initialize the LLM."""
    global llm
    if llm is None and os.path.exists(MODEL_PATH):
        llm = Llama(
            model_path=MODEL_PATH,
            n_ctx=4096,  # Context window
            n_gpu_layers=-1  # Use GPU if available
        )
    return llm

def process_menu_text(raw_text):
    """
    Process raw OCR text using LLM to improve structure and readability.
    
    Args:
        raw_text: Raw text extracted from menu image
        
    Returns:
        Processed and structured menu text
    """
    llm = get_llm()
    
    if llm is None:
        # Fallback to simple processing if model not available
        return {
            'structured_text': raw_text,
            'menu_sections': [],
            'success': False,
            'error': "LLM model not available"
        }
    
    # Construct prompt for the LLM
    prompt = f"""
    You are an AI assistant that helps structure menu text from OCR.
    Below is the raw text extracted from a menu image. 
    Please clean it up, correct any obvious OCR errors, and structure it properly.
    Identify menu sections, items, and prices.
    
    RAW MENU TEXT:
    {raw_text}
    
    Format your response as JSON with the following structure:
    {{
        "menu_sections": [
            {{
                "section_name": "Section name (e.g., Appetizers, Main Course, etc.)",
                "items": [
                    {{
                        "name": "Item name",
                        "description": "Item description if available",
                        "price": "Price if available"
                    }}
                ]
            }}
        ]
    }}
    
    Only respond with the JSON, nothing else.
    """
    
    try:
        # Generate response from LLM
        response = llm(prompt, max_tokens=4000, temperature=0.1, stop=["```"])
        
        # Extract JSON from response
        response_text = response['choices'][0]['text'].strip()
        
        # Find JSON in the response
        json_start = response_text.find('{')
        json_end = response_text.rfind('}') + 1
        
        if json_start >= 0 and json_end > json_start:
            json_str = response_text[json_start:json_end]
            menu_data = json.loads(json_str)
            
            # Reconstruct structured text
            structured_text = ""
            for section in menu_data.get('menu_sections', []):
                structured_text += f"{section.get('section_name', 'Menu Items')}\n"
                structured_text += "-" * len(section.get('section_name', 'Menu Items')) + "\n\n"
                
                for item in section.get('items', []):
                    structured_text += f"{item.get('name', '')}"
                    if item.get('price'):
                        structured_text += f" - {item.get('price')}"
                    structured_text += "\n"
                    
                    if item.get('description'):
                        structured_text += f"  {item.get('description')}\n"
                    
                    structured_text += "\n"
                
                structured_text += "\n"
            
            return {
                'structured_text': structured_text,
                'menu_data': menu_data,
                'success': True
            }
        else:
            # Fallback to simple processing
            return {
                'structured_text': raw_text,
                'menu_sections': [],
                'success': False,
                'error': "Failed to parse LLM response as JSON"
            }
            
    except Exception as e:
        return {
            'structured_text': raw_text,
            'menu_sections': [],
            'success': False,
            'error': str(e)
        }