glm-ocr-v1 / app.py
WebashalarForML's picture
Update app.py
48d8e0c verified
import gradio as gr
from transformers import AutoProcessor, AutoModelForImageTextToText
import torch
from PIL import Image
# --- KONFIGURASI ALAM ---[new]
MODEL_PATH = "zai-org/GLM-OCR"
# 1. HUKUM KEKALAN HARDWARE
if torch.cuda.is_available():
device = "cuda"
dtype = torch.float16
else:
device = "cpu"
dtype = torch.float32
print(f"๐Ÿš€ ENGINE STARTED: Device={device} | Dtype={dtype}")
# 2. INISIASI MODEL (RELOAD AMAN)
try:
print("โณ Menyiapkan Otak GLM...")
# Processor
processor = AutoProcessor.from_pretrained(
MODEL_PATH,
trust_remote_code=True
)
# Model (Kita balik ke AutoModelForImageTextToText karena Library lu udh sukses load weights)
model = AutoModelForImageTextToText.from_pretrained(
MODEL_PATH,
torch_dtype=dtype,
trust_remote_code=True,
low_cpu_mem_usage=True,
device_map="auto"
)
model.eval()
except Exception as e:
print(f"โš ๏ธ Warning Model (Gas Terus): {e}")
pass
# 3. LOGIKA EKSTRAKSI (INTELIJEN)
def proses_intelijen(image):
if image is None:
return "โš ๏ธ Gambarnya mana Bos? Upload dulu."
messages = [
{
"role": "user",
"content": [
{"type": "image", "image": image},
{"type": "text", "text": "Text Recognition:"}
],
}
]
try:
# IQ 1000: Tokenize=True adalah kunci biar tensor kebaca
inputs = processor.apply_chat_template(
messages,
add_generation_prompt=True,
tokenize=True,
return_dict=True,
return_tensors="pt"
).to(model.device) # Paksa pindah ke device model
with torch.no_grad():
generated_ids = model.generate(
**inputs,
max_new_tokens=2048,
do_sample=False
)
hasil = generated_ids[0][len(inputs["input_ids"][0]):]
teks_final = processor.decode(hasil, skip_special_tokens=True)
return teks_final
except Exception as e:
return f"๐Ÿšจ SYSTEM CRITICAL FAILURE: {str(e)}"
# 4. ANTARMUKA (CLEAN VERSION - ANTI ERROR)
# CSS Kita masukin langsung ke head lewat method modern
css_custom = """
.container { max-width: 1200px; margin: auto; padding-top: 20px; }
h1 { text-align: center; color: #3b82f6; }
"""
with gr.Blocks(css=css_custom, title="GLM-OCR V-FINAL") as app:
with gr.Column(elem_classes="container"):
gr.Markdown("# ๐Ÿ‘๏ธ GLM-OCR ULTRA")
gr.Markdown("Scanner Dokumen Tercerdas.")
with gr.Row():
with gr.Column(scale=1):
input_img = gr.Image(type="pil", label="Upload Disini", height=450)
scan_btn = gr.Button("๐Ÿš€ START SCAN", variant="primary", size="lg")
with gr.Column(scale=1):
# INI PERBAIKANNYA BANG:
# 1. Hapus 'show_copy_button=True' -> Penyebab Error.
# 2. Hapus 'interactive=False' jika mau teksnya bisa dicopy manual.
# Kita pakai settingan DEFAULT yang paling aman.
output_txt = gr.Textbox(label="Hasil Teks", lines=24)
scan_btn.click(fn=proses_intelijen, inputs=input_img, outputs=output_txt)
if __name__ == "__main__":
app.launch()