Spaces:

archaiveproject
/

CCR_OCR

Sleeping

App Files Files Community

JJJHHHH commited on Jul 20, 2025

Commit

b5c1972

verified ·

1 Parent(s): 0cb7f53

Update app.py

Browse files

Files changed (1) hide show

app.py +81 -42

app.py CHANGED Viewed

@@ -1,49 +1,88 @@
-from ultralytics import YOLO
 from PIL import Image
-import gradio as gr
 from huggingface_hub import snapshot_download
-import os
-# Load label map from labels.txt
-def load_labels(label_path="labels.txt"):
-    with open(label_path, "r", encoding="utf-8") as f:
-        lines = f.read().splitlines()
-        id2char = {i: char for i, char in enumerate(lines)}
-    return id2char
-# Load model from HuggingFace
-def load_model(repo_id):
-    download_dir = snapshot_download(repo_id)
-    path = os.path.join(download_dir, "CCR_EthicalSplit_Finetune.pth")
-    detection_model = YOLO(path, task='detect')
-    return detection_model
-# Prediction function
-def predict(pilimg):
-    results = detection_model.predict(pilimg, conf=0.5, iou=0.6)
-    result = results[0]
-    # Extract predicted classes
-    predicted_chars = []
-    for box in result.boxes:
-        cls_idx = int(box.cls.item())
-        char = id2char.get(cls_idx, '?')
-        predicted_chars.append(char)
-    # Join predictions as text
-    prediction_text = ''.join(predicted_chars)
-    return prediction_text
-# Setup
 REPO_ID = "JJJHHHH/CCR_EthicalSplit_Finetune"
-LABEL_PATH = "labels.txt"
-id2char = load_labels(LABEL_PATH)
-detection_model = load_model(REPO_ID)
-# Gradio interface (image in, text out)
 gr.Interface(fn=predict,
-             inputs=gr.Image(type="pil"),
-             outputs=gr.Textbox(label="Predicted Chinese Text")
-             ).launch(share=True)

+import os
+import json
+import torch
+import torch.nn as nn
 from PIL import Image
+from torchvision import models, transforms
 from huggingface_hub import snapshot_download
+import gradio as gr
+# Model Architecture
+class ChineseClassifier(nn.Module):
+    def __init__(self, embed_dim, num_classes, pretrainedEncoder=True, unfreezeEncoder=True):
+        super().__init__()
+        resnet = models.resnet50(weights=models.ResNet50_Weights.DEFAULT if pretrainedEncoder else None)
+        self.resnet = nn.Sequential(*list(resnet.children())[:-1])
+        for param in self.resnet.parameters():
+            param.requires_grad = unfreezeEncoder
+        self.fc = nn.Linear(resnet.fc.in_features, embed_dim)
+        self.batch_norm = nn.BatchNorm1d(embed_dim)
+        self.dropout = nn.Dropout(0.3)
+        self.classifier = nn.Linear(embed_dim, num_classes)
+    def forward(self, x, return_embedding=False):
+        x = self.resnet(x)
+        x = torch.flatten(x, 1)
+        x = self.fc(x)
+        x = self.batch_norm(x)
+        x = self.dropout(x)
+        if return_embedding:
+            return x
+        x = self.classifier(x)
+        return x
+# Load labels.json
+def load_labels(labels_path):
+    with open(labels_path, "r", encoding="utf-8") as f:
+        labels = json.load(f)
+    return labels
+# Transform for inference
+def prepare_transforms():
+    return transforms.Compose([
+        transforms.Resize((224, 224)),
+        transforms.ToTensor(),
+        transforms.Normalize(mean=[0.485, 0.456, 0.406],
+                             std=[0.229, 0.224, 0.225]),
+    ])
+# Load Model
+def load_model(model_path, embed_dim, num_classes, device):
+    model = ChineseClassifier(embed_dim, num_classes).to(device)
+    checkpoint = torch.load(model_path, map_location=device)
+    model.load_state_dict(checkpoint)
+    model.eval()
+    return model
+# HF snapshot + model init
 REPO_ID = "JJJHHHH/CCR_EthicalSplit_Finetune"
+snapshot_dir = snapshot_download(repo_id=REPO_ID)
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+labels_path = os.path.join(snapshot_dir, "enhancedLabels.json")
+model_path = os.path.join(snapshot_dir, "CCR_EthicalSplit_Finetune.pth")
+labels = load_labels(labels_path)
+idx_to_class = sorted(set(labels.values()))
+class_names = idx_to_class
+model = load_model(model_path, embed_dim=512, num_classes=len(class_names), device=device)
+transform = prepare_transforms()
+# Inference
+def predict(image: Image.Image):
+    image = image.convert("RGB")
+    input_tensor = transform(image).unsqueeze(0).to(device)
+    with torch.no_grad():
+        output = model(input_tensor)
+        pred_idx = output.argmax(dim=1).item()
+        pred_class = class_names[pred_idx]
+    return f"Prediction: {pred_class}"
+# Gradio Interface
 gr.Interface(fn=predict,
+             inputs=gr.Image(type="pil", label="Upload Calligraphy Image"),
+             outputs=gr.Text(label="Prediction"),
+             title="Chinese Calligraphy Classifier",
+             description="Upload an image of handwritten Chinese text. The model will classify the character."
+             ).launch()