Spaces:

Ronith55
/

OCR_deepseek-vl2

Running

Ronith55 commited on Feb 25, 2025

Commit

dd44da8

verified ·

1 Parent(s): 2182986

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,32 +1,31 @@
-import gradio as gr
-from transformers import AutoModel, AutoTokenizer, AutoFeatureExtractor
 from PIL import Image
-# Load Deepseek-vl2-small model and tokenizer
-model_name = "deepseek-ai/deepseek-vl2-small"  # Replace with actual model name if available on HF
-model = AutoModel.from_pretrained(model_name)
-tokenizer = AutoTokenizer.from_pretrained(model_name)
-feature_extractor = AutoFeatureExtractor.from_pretrained(model_name)
-# Define inference function
-def process_image_text(image, text):
-    # Process inputs
-    image_input = feature_extractor(images=image, return_tensors="pt")
-    text_input = tokenizer(text, return_tensors="pt")
-    # Get model output
-    outputs = model(**text_input, **image_input)
-    # Process output (modify based on your model’s task)
-    return "Model processed the inputs successfully!"
-# Create Gradio interface
-interface = gr.Interface(
-    fn=process_image_text,
-    inputs=[gr.Image(type="pil"), gr.Textbox()],
-    outputs="text",
-    title="deepseek-vl2-small Demo"
-)
-# Launch app
-interface.launch()

+import torch
+from transformers import AutoProcessor, AutoModelForVision2Seq
 from PIL import Image
+# ✅ Define the model name from Hugging Face
+MODEL_NAME = "deepseek-ai/deepseek-vl2-small"
+# ✅ Load model and processor
+processor = AutoProcessor.from_pretrained(MODEL_NAME)
+model = AutoModelForVision2Seq.from_pretrained(MODEL_NAME, torch_dtype=torch.float16)
+# ✅ Test the model with an image
+def predict(image_path):
+    image = Image.open(image_path).convert("RGB")
+    # Process input
+    inputs = processor(images=image, return_tensors="pt").to("cuda" if torch.cuda.is_available() else "cpu")
+    # Generate output
+    output = model.generate(**inputs)
+    # Decode response
+    generated_text = processor.batch_decode(output, skip_special_tokens=True)[0]
+    return generated_text
+# ✅ Example Usage
+if __name__ == "__main__":
+    test_image_path = "test.jpg"  # Replace with an actual image path
+    print("Generated Output:", predict(test_image_path))