raviix46 commited on
Commit
e2095d8
·
verified ·
1 Parent(s): 5a49315

Rename components/llm_utils.py to components/lllm_ocr.py

Browse files
Files changed (2) hide show
  1. components/lllm_ocr.py +37 -0
  2. components/llm_utils.py +0 -0
components/lllm_ocr.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import MllamaForConditionalGeneration, AutoProcessor
2
+ from PIL import Image
3
+ import torch
4
+
5
+ # Model and processor init
6
+ ckpt = "unsloth/Llama-3.2-11B-Vision-Instruct"
7
+ model = MllamaForConditionalGeneration.from_pretrained(
8
+ ckpt,
9
+ torch_dtype=torch.bfloat16
10
+ ).to("cuda")
11
+ processor = AutoProcessor.from_pretrained(ckpt)
12
+
13
+ def extract_text_llm(image_path):
14
+ image = Image.open(image_path).convert("RGB")
15
+
16
+ messages = [
17
+ {
18
+ "role": "user",
19
+ "content": [
20
+ {"type": "text", "text": "Extract handwritten text from the image and output only the extracted text without any additional description or commentary in output"},
21
+ {"type": "image"}
22
+ ]
23
+ }
24
+ ]
25
+
26
+ texts = processor.apply_chat_template(messages, add_generation_prompt=True)
27
+ inputs = processor(text=texts, images=[image], return_tensors="pt").to("cuda")
28
+
29
+ outputs = model.generate(**inputs, max_new_tokens=250)
30
+ result = processor.decode(outputs[0], skip_special_tokens=True)
31
+
32
+ # Clean result
33
+ if "assistant" in result.lower():
34
+ result = result[result.lower().find("assistant") + len("assistant"):].strip()
35
+ result = result.replace("user", "").strip()
36
+
37
+ return result
components/llm_utils.py DELETED
File without changes