glm-ocr-v1

Sleeping

File size: 3,342 Bytes

c1fcad0
8d74894
c1fcad0
 
 
48d8e0c
c1fcad0
 
267c8de
87f5018
 
 
 
 
 
 
3d4e61f
c1fcad0
267c8de
c1fcad0
8d74894
 
267c8de
d440466
 
 
 
3d4e61f
267c8de
8d74894
c1fcad0
 
87f5018
3d4e61f
 
87f5018
8d74894
 
87f5018
c1fcad0
267c8de
3d4e61f
c1fcad0
267c8de
3d4e61f
c1fcad0
267c8de
c1fcad0
8d74894
 
 
 
 
 
 
 
 
c1fcad0
87f5018
267c8de
87f5018
8d74894
87f5018
267c8de
87f5018
267c8de
 
c1fcad0
87f5018
8d74894
d440466
267c8de
3d4e61f
d440466
 
267c8de
 
d440466
87f5018
 
267c8de
3d4e61f
267c8de
 
 
3d4e61f
267c8de
3d4e61f
 
267c8de
3d4e61f
267c8de
 
d440466
3d4e61f
 
267c8de
 
c1fcad0
3d4e61f
267c8de
 
 
 
 
3d4e61f
 
c1fcad0
87f5018
d440466

import gradio as gr
from transformers import AutoProcessor, AutoModelForImageTextToText
import torch
from PIL import Image

# --- KONFIGURASI ALAM ---[new]
MODEL_PATH = "zai-org/GLM-OCR"

# 1. HUKUM KEKALAN HARDWARE
if torch.cuda.is_available():
    device = "cuda"
    dtype = torch.float16
else:
    device = "cpu"
    dtype = torch.float32

print(f"🚀 ENGINE STARTED: Device={device} | Dtype={dtype}")

# 2. INISIASI MODEL (RELOAD AMAN)
try:
    print("⏳ Menyiapkan Otak GLM...")
    
    # Processor
    processor = AutoProcessor.from_pretrained(
        MODEL_PATH, 
        trust_remote_code=True
    )
    
    # Model (Kita balik ke AutoModelForImageTextToText karena Library lu udh sukses load weights)
    model = AutoModelForImageTextToText.from_pretrained(
        MODEL_PATH,
        torch_dtype=dtype,
        trust_remote_code=True,
        low_cpu_mem_usage=True,
        device_map="auto"
    )
    
    model.eval()

except Exception as e:
    print(f"⚠️ Warning Model (Gas Terus): {e}")
    pass

# 3. LOGIKA EKSTRAKSI (INTELIJEN)
def proses_intelijen(image):
    if image is None:
        return "⚠️ Gambarnya mana Bos? Upload dulu."

    messages = [
        {
            "role": "user",
            "content": [
                {"type": "image", "image": image},
                {"type": "text", "text": "Text Recognition:"}
            ],
        }
    ]

    try:
        # IQ 1000: Tokenize=True adalah kunci biar tensor kebaca
        inputs = processor.apply_chat_template(
            messages,
            add_generation_prompt=True,
            tokenize=True, 
            return_dict=True,
            return_tensors="pt"
        ).to(model.device) # Paksa pindah ke device model

        with torch.no_grad():
            generated_ids = model.generate(
                **inputs, 
                max_new_tokens=2048, 
                do_sample=False
            )

        hasil = generated_ids[0][len(inputs["input_ids"][0]):]
        teks_final = processor.decode(hasil, skip_special_tokens=True)
        return teks_final

    except Exception as e:
        return f"🚨 SYSTEM CRITICAL FAILURE: {str(e)}"

# 4. ANTARMUKA (CLEAN VERSION - ANTI ERROR)
# CSS Kita masukin langsung ke head lewat method modern
css_custom = """
.container { max-width: 1200px; margin: auto; padding-top: 20px; }
h1 { text-align: center; color: #3b82f6; }
"""

with gr.Blocks(css=css_custom, title="GLM-OCR V-FINAL") as app:
    with gr.Column(elem_classes="container"):
        gr.Markdown("# 👁️ GLM-OCR ULTRA")
        gr.Markdown("Scanner Dokumen Tercerdas.")
        
        with gr.Row():
            with gr.Column(scale=1):
                input_img = gr.Image(type="pil", label="Upload Disini", height=450)
                scan_btn = gr.Button("🚀 START SCAN", variant="primary", size="lg")
            
            with gr.Column(scale=1):
                # INI PERBAIKANNYA BANG:
                # 1. Hapus 'show_copy_button=True' -> Penyebab Error.
                # 2. Hapus 'interactive=False' jika mau teksnya bisa dicopy manual.
                # Kita pakai settingan DEFAULT yang paling aman.
                output_txt = gr.Textbox(label="Hasil Teks", lines=24)
    
    scan_btn.click(fn=proses_intelijen, inputs=input_img, outputs=output_txt)

if __name__ == "__main__":
    app.launch()