nomypython commited on
Commit
294c4cc
·
verified ·
1 Parent(s): 4767b33

Upload folder using huggingface_hub

Browse files
README.md ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ language:
4
+ - ur
5
+ tags:
6
+ - ocr
7
+ - urdu
8
+ - vision
9
+ - unsloth
10
+ base_model: unsloth/DeepSeek-OCR
11
+ ---
12
+
13
+ # Urdu OCR Model - اردو او سی آر
14
+
15
+ Fine-tuned DeepSeek-OCR model for Urdu text recognition.
16
+
17
+ ## Model Description
18
+
19
+ This model is fine-tuned on Urdu text images for optical character recognition (OCR) tasks.
20
+
21
+ ## Usage
22
+
23
+ ```python
24
+ from unsloth import FastVisionModel
25
+ from transformers import AutoModel
26
+
27
+ model, tokenizer = FastVisionModel.from_pretrained(
28
+ "nomypython/urdu-ocr-deepseek",
29
+ load_in_4bit=True,
30
+ auto_model=AutoModel,
31
+ trust_remote_code=True,
32
+ )
33
+
34
+ FastVisionModel.for_inference(model)
35
+
36
+ result = model.infer(
37
+ tokenizer,
38
+ prompt="<image>\nExtract Urdu text from this image:",
39
+ image_file="your_image.png",
40
+ image_size=640,
41
+ base_size=640,
42
+ crop_mode=False,
43
+ )
44
+
45
+ print(result)
46
+ ```
47
+
48
+ ## Training Details
49
+
50
+ - Base Model: DeepSeek-OCR
51
+ - Fine-tuned for: Urdu OCR
52
+ - Framework: Unsloth
53
+ - LoRA Rank: 16
54
+
55
+ ## Intended Use
56
+
57
+ Extract Urdu text from images containing printed or handwritten text.
adapter_config.json ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alora_invocation_tokens": null,
3
+ "alpha_pattern": {},
4
+ "arrow_config": null,
5
+ "auto_mapping": {
6
+ "base_model_class": "DeepseekOCRForCausalLM",
7
+ "parent_library": "transformers_modules.deepseek_ocr.modeling_deepseekocr",
8
+ "unsloth_fixed": true
9
+ },
10
+ "base_model_name_or_path": "unsloth/DeepSeek-OCR",
11
+ "bias": "none",
12
+ "corda_config": null,
13
+ "ensure_weight_tying": false,
14
+ "eva_config": null,
15
+ "exclude_modules": null,
16
+ "fan_in_fan_out": false,
17
+ "inference_mode": true,
18
+ "init_lora_weights": true,
19
+ "layer_replication": null,
20
+ "layers_pattern": null,
21
+ "layers_to_transform": null,
22
+ "loftq_config": {},
23
+ "lora_alpha": 32,
24
+ "lora_bias": false,
25
+ "lora_dropout": 0.05,
26
+ "megatron_config": null,
27
+ "megatron_core": "megatron.core",
28
+ "modules_to_save": null,
29
+ "peft_type": "LORA",
30
+ "peft_version": "0.18.0",
31
+ "qalora_group_size": 16,
32
+ "r": 16,
33
+ "rank_pattern": {},
34
+ "revision": null,
35
+ "target_modules": [
36
+ "o_proj",
37
+ "v_proj",
38
+ "k_proj",
39
+ "down_proj",
40
+ "up_proj",
41
+ "q_proj",
42
+ "gate_proj"
43
+ ],
44
+ "target_parameters": null,
45
+ "task_type": "CAUSAL_LM",
46
+ "trainable_token_indices": null,
47
+ "use_dora": false,
48
+ "use_qalora": false,
49
+ "use_rslora": true
50
+ }
adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f897b4aed5867407db8b56402da25696299162d1729ce0bd06605d28a2a5645a
3
+ size 310662536
special_tokens_map.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|User|>",
4
+ "<|Assistant|>"
5
+ ],
6
+ "bos_token": {
7
+ "content": "<|begin▁of▁sentence|>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false
12
+ },
13
+ "eos_token": {
14
+ "content": "<|end▁of▁sentence|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false
19
+ },
20
+ "pad_token": {
21
+ "content": "<|▁pad▁|>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false
26
+ }
27
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff