Spaces:

sterepando
/

MandreOCR

Paused

File size: 4,016 Bytes

701ee56
 
 
 
32ecb63
31b0bf1
701ee56
c17ab8e
32ecb63
31b0bf1
32ecb63
 
701ee56
31b0bf1
32ecb63
 
c17ab8e
32ecb63
c17ab8e
 
31b0bf1
 
 
 
32ecb63
31b0bf1
c17ab8e
32ecb63
c17ab8e
32ecb63
31b0bf1
 
32ecb63
 
c17ab8e
1bd2be8
701ee56
c17ab8e
701ee56
31b0bf1
701ee56
32ecb63
 
31b0bf1
5705c9e
701ee56
 
5705c9e
701ee56
 
 
31b0bf1
 
 
c17ab8e
ea17727
31b0bf1
 
 
 
 
 
 
ea17727
c17ab8e
5705c9e
31b0bf1
 
 
 
 
 
701ee56
c17ab8e
31b0bf1
32ecb63
31b0bf1
 
32ecb63
31b0bf1
 
 
 
c17ab8e
701ee56
 
5705c9e
31b0bf1
5705c9e
701ee56
32ecb63
701ee56
5705c9e
701ee56

import io
import uvicorn
from PIL import Image
from fastapi import FastAPI, UploadFile, File, Response
import torch
from transformers import AutoModelForImageTextToText, AutoProcessor

# --- 1. Глобальная загрузка компонентов ---
model = None
processor = None
device = "cpu"

try:
    print(">>> Инициализация загрузки LightOnOCR-1B (с trust_remote_code=True)...")
    
    device = "cuda" if torch.cuda.is_available() else "cpu"
    print(f">>> Устройство: {device}")

    repo_id = "lightonai/LightOnOCR-1B-1025"

    # 1. Загружаем процессор
    # ВАЖНО: trust_remote_code=True позволяет загрузить кастомный код процессора из репозитория,
    # который умеет правильно обрабатывать аргумент 'images' и вставлять токены.
    processor = AutoProcessor.from_pretrained(repo_id, trust_remote_code=True)
    
    # 2. Загружаем модель
    dtype = torch.bfloat16 if device == "cuda" else torch.float32
    model = AutoModelForImageTextToText.from_pretrained(
        repo_id,
        torch_dtype=dtype,
        low_cpu_mem_usage=True,
        trust_remote_code=True
    ).to(device)
    
    print(">>> Все компоненты успешно загружены!")
    
except Exception as e:
    print(f"КРИТИЧЕСКАЯ ОШИБКА загрузки: {e}")

app = FastAPI(title="LightOnOCR Final API", version="5.0.0")

@app.post("/api/ocr")
async def run_ocr(file: UploadFile = File(...)):
    if model is None or processor is None:
        return Response(content="Сервер не готов.", status_code=503)

    try:
        # 1. Загрузка картинки
        contents = await file.read()
        image = Image.open(io.BytesIO(contents)).convert("RGB")
        
        # 2. Формирование промпта
        # Для этой модели обычно достаточно простого промпта, но важно, 
        # чтобы процессор сам обработал вставку <image> токенов.
        prompt = "<image>\nTranscribe the text in this image."
        
        # 3. Обработка через процессор
        # Теперь, с trust_remote_code=True, этот вызов должен работать корректно
        # и вернуть input_ids, pixel_values и, возможно, image_sizes.
        inputs = processor(text=prompt, images=image, return_tensors="pt")
        
        # Переносим все тензоры на устройство
        inputs = {k: v.to(device) for k, v in inputs.items() if isinstance(v, torch.Tensor)}
        
        # 4. Генерация
        with torch.inference_mode():
            generated_ids = model.generate(
                **inputs,
                max_new_tokens=1024,
                do_sample=False,
                pad_token_id=processor.tokenizer.pad_token_id
            )
        
        # 5. Декодирование
        generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
        
        # Очистка результата от промпта (простая эвристика)
        clean_text = generated_text.replace(prompt.replace("<image>", ""), "").strip()
        
        # Дополнительная очистка, если модель возвращает мусор в начале
        if "Transcribe" in clean_text:
             clean_text = clean_text.split("image.")[-1].strip()

        return {"text": clean_text}

    except Exception as e:
        import traceback
        traceback.print_exc()
        return Response(content=f"Server Error: {str(e)}", status_code=500)

@app.get("/")
async def home():
    return {"message": "OCR API Ready. POST image to /api/ocr"}

if __name__ == "__main__":
    uvicorn.run(app, host="0.0.0.0", port=7860)