Update README.md
Browse files
README.md
CHANGED
|
@@ -1,6 +1,12 @@
|
|
| 1 |
---
|
| 2 |
license: creativeml-openrail-m
|
| 3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
---
|
| 5 |
<h1 align='center' style='font-size: 36px; font-weight: bold;'>Sparrow</h1>
|
| 6 |
<h3 align='center' style='font-size: 24px;'>Blazzing Fast Tiny Vision Language Model</h3>
|
|
@@ -36,12 +42,12 @@ model = AutoModelForCausalLM.from_pretrained(
|
|
| 36 |
torch_dtype=torch.float16,
|
| 37 |
device_map="auto",
|
| 38 |
trust_remote_code=True)
|
| 39 |
-
tokenizer = AutoTokenizer.from_pretrained("ManishThota/
|
| 40 |
|
| 41 |
#function to generate the answer
|
| 42 |
def predict(question, image_path):
|
| 43 |
#Set inputs
|
| 44 |
-
text = f"
|
| 45 |
image = Image.open(image_path)
|
| 46 |
|
| 47 |
input_ids = tokenizer(text, return_tensors='pt').input_ids.to('cuda')
|
|
|
|
| 1 |
---
|
| 2 |
license: creativeml-openrail-m
|
| 3 |
+
datasets:
|
| 4 |
+
- liuhaotian/LLaVA-Pretrain
|
| 5 |
+
- liuhaotian/LLaVA-CC3M-Pretrain-595K
|
| 6 |
+
language:
|
| 7 |
+
- en
|
| 8 |
+
metrics:
|
| 9 |
+
- bleu
|
| 10 |
---
|
| 11 |
<h1 align='center' style='font-size: 36px; font-weight: bold;'>Sparrow</h1>
|
| 12 |
<h3 align='center' style='font-size: 24px;'>Blazzing Fast Tiny Vision Language Model</h3>
|
|
|
|
| 42 |
torch_dtype=torch.float16,
|
| 43 |
device_map="auto",
|
| 44 |
trust_remote_code=True)
|
| 45 |
+
tokenizer = AutoTokenizer.from_pretrained("ManishThota/SparrowVQE", trust_remote_code=True)
|
| 46 |
|
| 47 |
#function to generate the answer
|
| 48 |
def predict(question, image_path):
|
| 49 |
#Set inputs
|
| 50 |
+
text = f"USER: <image>\n{question}? ASSISTANT:"
|
| 51 |
image = Image.open(image_path)
|
| 52 |
|
| 53 |
input_ids = tokenizer(text, return_tensors='pt').input_ids.to('cuda')
|