caiofabio1 commited on
Commit
b427564
·
verified ·
1 Parent(s): 85da275

Upload handler.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. handler.py +16 -16
handler.py CHANGED
@@ -1,8 +1,14 @@
 
 
 
 
 
 
 
1
 
2
  import torch
3
  import base64
4
  import io
5
- import re
6
  from typing import Dict, List, Any
7
  from PIL import Image
8
  from transformers import AutoProcessor, AutoModelForImageTextToText
@@ -18,11 +24,11 @@ class EndpointHandler:
18
  trust_remote_code=True,
19
  ).eval()
20
  self.device = next(self.model.parameters()).device
 
21
 
22
  def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
23
  inputs_data = data.get("inputs", data)
24
 
25
- # Accept base64 image
26
  if isinstance(inputs_data, dict):
27
  image_b64 = inputs_data.get("image", "")
28
  prompt = inputs_data.get("prompt", "Text Recognition:")
@@ -30,27 +36,22 @@ class EndpointHandler:
30
  image_b64 = inputs_data
31
  prompt = "Text Recognition:"
32
  else:
33
- return [{"error": "Invalid input format"}]
34
 
35
- # Decode image
36
  try:
37
  image_bytes = base64.b64decode(image_b64)
38
  image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
39
  except Exception as e:
40
  return [{"error": f"Failed to decode image: {str(e)}"}]
41
 
42
- # Build messages
43
- messages = [
44
- {
45
- "role": "user",
46
- "content": [
47
- {"type": "image", "image": image},
48
- {"type": "text", "text": prompt},
49
- ],
50
- }
51
- ]
52
 
53
- # Process
54
  text = self.processor.apply_chat_template(
55
  messages, tokenize=False, add_generation_prompt=True
56
  )
@@ -59,7 +60,6 @@ class EndpointHandler:
59
  )
60
  proc_inputs = {k: v.to(self.device) for k, v in proc_inputs.items()}
61
 
62
- # Generate
63
  with torch.no_grad():
64
  output = self.model.generate(
65
  **proc_inputs,
 
1
+ import subprocess
2
+ import sys
3
+
4
+ # Force install latest transformers with glm_ocr support
5
+ subprocess.check_call([sys.executable, "-m", "pip", "install", "--upgrade",
6
+ "git+https://github.com/huggingface/transformers.git", "accelerate"],
7
+ stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
8
 
9
  import torch
10
  import base64
11
  import io
 
12
  from typing import Dict, List, Any
13
  from PIL import Image
14
  from transformers import AutoProcessor, AutoModelForImageTextToText
 
24
  trust_remote_code=True,
25
  ).eval()
26
  self.device = next(self.model.parameters()).device
27
+ print(f"Model loaded on {self.device}")
28
 
29
  def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
30
  inputs_data = data.get("inputs", data)
31
 
 
32
  if isinstance(inputs_data, dict):
33
  image_b64 = inputs_data.get("image", "")
34
  prompt = inputs_data.get("prompt", "Text Recognition:")
 
36
  image_b64 = inputs_data
37
  prompt = "Text Recognition:"
38
  else:
39
+ return [{"error": "Invalid input. Send {inputs: {image: base64, prompt: str}}"}]
40
 
 
41
  try:
42
  image_bytes = base64.b64decode(image_b64)
43
  image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
44
  except Exception as e:
45
  return [{"error": f"Failed to decode image: {str(e)}"}]
46
 
47
+ messages = [{
48
+ "role": "user",
49
+ "content": [
50
+ {"type": "image", "image": image},
51
+ {"type": "text", "text": prompt},
52
+ ],
53
+ }]
 
 
 
54
 
 
55
  text = self.processor.apply_chat_template(
56
  messages, tokenize=False, add_generation_prompt=True
57
  )
 
60
  )
61
  proc_inputs = {k: v.to(self.device) for k, v in proc_inputs.items()}
62
 
 
63
  with torch.no_grad():
64
  output = self.model.generate(
65
  **proc_inputs,