File size: 4,434 Bytes
87d0988
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
from llama_cpp import Llama
import os
import json

# Initialize the model (will download on first run)
MODEL_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "../models/llama-3-8b-instruct.Q4_K_M.gguf")

# Check if model exists, if not provide instructions
if not os.path.exists(MODEL_PATH):
    os.makedirs(os.path.dirname(MODEL_PATH), exist_ok=True)
    print(f"Model not found at {MODEL_PATH}")
    print("Please download the model from https://huggingface.co/TheBloke/Llama-3-8B-Instruct-GGUF")
    print("and place it in the models directory")

# Initialize model with lazy loading
llm = None

def get_llm():
    """Get or initialize the LLM."""
    global llm
    if llm is None and os.path.exists(MODEL_PATH):
        llm = Llama(
            model_path=MODEL_PATH,
            n_ctx=4096,  # Context window
            n_gpu_layers=-1  # Use GPU if available
        )
    return llm

def process_menu_text(raw_text):
    """
    Process raw OCR text using LLM to improve structure and readability.
    
    Args:
        raw_text: Raw text extracted from menu image
        
    Returns:
        Processed and structured menu text
    """
    llm = get_llm()
    
    if llm is None:
        # Fallback to simple processing if model not available
        return {
            'structured_text': raw_text,
            'menu_sections': [],
            'success': False,
            'error': "LLM model not available"
        }
    
    # Construct prompt for the LLM
    prompt = f"""
    You are an AI assistant that helps structure menu text from OCR.
    Below is the raw text extracted from a menu image. 
    Please clean it up, correct any obvious OCR errors, and structure it properly.
    Identify menu sections, items, and prices.
    
    RAW MENU TEXT:
    {raw_text}
    
    Format your response as JSON with the following structure:
    {{
        "menu_sections": [
            {{
                "section_name": "Section name (e.g., Appetizers, Main Course, etc.)",
                "items": [
                    {{
                        "name": "Item name",
                        "description": "Item description if available",
                        "price": "Price if available"
                    }}
                ]
            }}
        ]
    }}
    
    Only respond with the JSON, nothing else.
    """
    
    try:
        # Generate response from LLM
        response = llm(prompt, max_tokens=4000, temperature=0.1, stop=["```"])
        
        # Extract JSON from response
        response_text = response['choices'][0]['text'].strip()
        
        # Find JSON in the response
        json_start = response_text.find('{')
        json_end = response_text.rfind('}') + 1
        
        if json_start >= 0 and json_end > json_start:
            json_str = response_text[json_start:json_end]
            menu_data = json.loads(json_str)
            
            # Reconstruct structured text
            structured_text = ""
            for section in menu_data.get('menu_sections', []):
                structured_text += f"{section.get('section_name', 'Menu Items')}\n"
                structured_text += "-" * len(section.get('section_name', 'Menu Items')) + "\n\n"
                
                for item in section.get('items', []):
                    structured_text += f"{item.get('name', '')}"
                    if item.get('price'):
                        structured_text += f" - {item.get('price')}"
                    structured_text += "\n"
                    
                    if item.get('description'):
                        structured_text += f"  {item.get('description')}\n"
                    
                    structured_text += "\n"
                
                structured_text += "\n"
            
            return {
                'structured_text': structured_text,
                'menu_data': menu_data,
                'success': True
            }
        else:
            # Fallback to simple processing
            return {
                'structured_text': raw_text,
                'menu_sections': [],
                'success': False,
                'error': "Failed to parse LLM response as JSON"
            }
            
    except Exception as e:
        return {
            'structured_text': raw_text,
            'menu_sections': [],
            'success': False,
            'error': str(e)
        }