Spaces:
Runtime error
Runtime error
File size: 4,434 Bytes
87d0988 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 | from llama_cpp import Llama
import os
import json
# Initialize the model (will download on first run)
MODEL_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "../models/llama-3-8b-instruct.Q4_K_M.gguf")
# Check if model exists, if not provide instructions
if not os.path.exists(MODEL_PATH):
os.makedirs(os.path.dirname(MODEL_PATH), exist_ok=True)
print(f"Model not found at {MODEL_PATH}")
print("Please download the model from https://huggingface.co/TheBloke/Llama-3-8B-Instruct-GGUF")
print("and place it in the models directory")
# Initialize model with lazy loading
llm = None
def get_llm():
"""Get or initialize the LLM."""
global llm
if llm is None and os.path.exists(MODEL_PATH):
llm = Llama(
model_path=MODEL_PATH,
n_ctx=4096, # Context window
n_gpu_layers=-1 # Use GPU if available
)
return llm
def process_menu_text(raw_text):
"""
Process raw OCR text using LLM to improve structure and readability.
Args:
raw_text: Raw text extracted from menu image
Returns:
Processed and structured menu text
"""
llm = get_llm()
if llm is None:
# Fallback to simple processing if model not available
return {
'structured_text': raw_text,
'menu_sections': [],
'success': False,
'error': "LLM model not available"
}
# Construct prompt for the LLM
prompt = f"""
You are an AI assistant that helps structure menu text from OCR.
Below is the raw text extracted from a menu image.
Please clean it up, correct any obvious OCR errors, and structure it properly.
Identify menu sections, items, and prices.
RAW MENU TEXT:
{raw_text}
Format your response as JSON with the following structure:
{{
"menu_sections": [
{{
"section_name": "Section name (e.g., Appetizers, Main Course, etc.)",
"items": [
{{
"name": "Item name",
"description": "Item description if available",
"price": "Price if available"
}}
]
}}
]
}}
Only respond with the JSON, nothing else.
"""
try:
# Generate response from LLM
response = llm(prompt, max_tokens=4000, temperature=0.1, stop=["```"])
# Extract JSON from response
response_text = response['choices'][0]['text'].strip()
# Find JSON in the response
json_start = response_text.find('{')
json_end = response_text.rfind('}') + 1
if json_start >= 0 and json_end > json_start:
json_str = response_text[json_start:json_end]
menu_data = json.loads(json_str)
# Reconstruct structured text
structured_text = ""
for section in menu_data.get('menu_sections', []):
structured_text += f"{section.get('section_name', 'Menu Items')}\n"
structured_text += "-" * len(section.get('section_name', 'Menu Items')) + "\n\n"
for item in section.get('items', []):
structured_text += f"{item.get('name', '')}"
if item.get('price'):
structured_text += f" - {item.get('price')}"
structured_text += "\n"
if item.get('description'):
structured_text += f" {item.get('description')}\n"
structured_text += "\n"
structured_text += "\n"
return {
'structured_text': structured_text,
'menu_data': menu_data,
'success': True
}
else:
# Fallback to simple processing
return {
'structured_text': raw_text,
'menu_sections': [],
'success': False,
'error': "Failed to parse LLM response as JSON"
}
except Exception as e:
return {
'structured_text': raw_text,
'menu_sections': [],
'success': False,
'error': str(e)
}
|