BigData-KSU commited on
Commit
66fedb0
·
verified ·
1 Parent(s): b5f7c86

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +108 -2
README.md CHANGED
@@ -6,9 +6,115 @@ language:
6
  base_model:
7
  - ALLaM-AI/ALLaM-7B-Instruct-preview
8
  ---
9
- ## Training procedure
10
 
11
- ### Framework versions
 
 
12
 
13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  - PEFT 0.4.0
 
6
  base_model:
7
  - ALLaM-AI/ALLaM-7B-Instruct-preview
8
  ---
9
+ ## RS-LLaVA: Large Vision Language Model for Joint Captioning and Question Answering in Remote Sensing Imagery
10
 
11
+ - **Repository:** https://github.com/BigData-KSU/RS-LLaVA
12
+ - **Paper:** https://www.mdpi.com/2072-4292/16/9/1477
13
+ - **Demo:** Soon.
14
 
15
 
16
+ ## How to Get Started with the Model
17
+
18
+ ### Install
19
+
20
+ 1. Clone this repository and navigate to RS-LLaVA folder
21
+
22
+ ```
23
+ git clone https://github.com/BigData-KSU/ArabVLM.git
24
+ cd ArabVLM
25
+ ```
26
+
27
+ 2. Install Packages
28
+
29
+ ```
30
+ pip install -r requirements.txt
31
+ pip install --upgrade pip # enable PEP 660 support
32
+ ```
33
+
34
+ ---
35
+
36
+ ### Inference
37
+
38
+ Use the code below to get started with the model.
39
+
40
+
41
+ ```python
42
+
43
+ from PIL import Image
44
+ import os
45
+ import torch
46
+ from vllm.constants import IMAGE_TOKEN_INDEX, DEFAULT_IMAGE_TOKEN
47
+ from vllm.conversation import conv_templates, SeparatorStyle
48
+ from vllm.model.builder import load_pretrained_model
49
+ from vllm.utils import disable_torch_init
50
+ from vllm.mm_utils import tokenizer_image_token, get_model_name_from_path, KeywordsStoppingCriteria
51
+
52
+
53
+ ### Main model....
54
+ model_path ='/BigData-KSU/ArabVLM'
55
+ model_base = 'ALLaM-AI/ALLaM-7B-Instruct-preview'
56
+
57
+
58
+ conv_mode = 'llava_llama_2'
59
+ disable_torch_init()
60
+ model_path = os.path.abspath(model_path)
61
+ print('model path')
62
+ print(model_path)
63
+ model_name = get_model_name_from_path(model_path)
64
+ print('model name')
65
+ print(model_name)
66
+ print('model base')
67
+ print(model_base)
68
+
69
+ tokenizer, model, processor, context_len = load_pretrained_model(model_path, model_base, model_name,device='cuda:0')
70
+
71
+
72
+ def chat_with_Vision_BioLLM(cur_prompt,image_name):
73
+ # Prepare the input text, adding image-related tokens if needed
74
+ image_mem = Image.open(image_name).convert('RGB')
75
+ image_processor = processor['image']
76
+ conv = conv_templates[conv_mode].copy()
77
+ roles = conv.roles
78
+ print(image_mem)
79
+ image_tensor = image_processor.preprocess(image_mem, return_tensors='pt')['pixel_values']
80
+ tensor = image_tensor.to(model.device, dtype=torch.float16)
81
+ print(f"{roles[1]}: {cur_prompt}")
82
+ cur_prompt = DEFAULT_IMAGE_TOKEN + '\n' + cur_prompt
83
+ conv.append_message(conv.roles[0], cur_prompt)
84
+ conv.append_message(conv.roles[1], None)
85
+ prompt = conv.get_prompt()
86
+ input_ids = tokenizer_image_token(prompt, tokenizer, IMAGE_TOKEN_INDEX, return_tensors='pt').unsqueeze(0).cuda()
87
+ stop_str = conv.sep if conv.sep_style != SeparatorStyle.TWO else conv.sep2
88
+ keywords = [stop_str]
89
+ stopping_criteria = KeywordsStoppingCriteria(keywords, tokenizer, input_ids)
90
+
91
+ if image_mem:
92
+ with torch.inference_mode():
93
+ output_ids = model.generate(
94
+ input_ids,
95
+ images=tensor,
96
+ do_sample=False,
97
+ max_new_tokens=1024,
98
+ use_cache=True,
99
+ stopping_criteria=[stopping_criteria])
100
+
101
+
102
+ response = tokenizer.decode(output_ids[0, input_ids.shape[1]:])
103
+ #print(outputs)
104
+
105
+ return response
106
+
107
+
108
+ if __name__ == "__main__":
109
+
110
+ cur_prompt='وصف الصورة بالتفصيل '
111
+ image_name='/media/pc/e/2025/ArabVLM/sample_images/business/Tea.jpeg'
112
+ outputs=chat_with_Vision_BioLLM(cur_prompt,image_name)
113
+ print('Model Response.....')
114
+ print(outputs)
115
+
116
+
117
+
118
+
119
+ ```
120
  - PEFT 0.4.0