Spaces:

sandz7
/

Krypton

Runtime error

App Files Files Community

sandz7 commited on May 27, 2024

Commit

d364219

1 Parent(s): 58f26ad

start of krypton

Browse files

Files changed (3) hide show

.gitignore +1 -0
app.py +92 -0
requirements.txt +4 -0

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ venv/

app.py ADDED Viewed

	@@ -0,0 +1,92 @@

+# import torch
+# import gradio as gr
+# from transformers import pipeline, TextIteratorStreamer, AutoTokenizer, AutoModelForCausalLM
+# from PIL import Image
+# import requests
+# import threading
+DESCRIPTION = '''
+<div>
+<h1 style="text-align: center;">Krypton 🕋</h1>
+<p>This uses an Open Source model from <a href="https://huggingface.co/xtuner/llava-llama-3-8b-v1_1-transformers"><b>xtuner/llava-llama-3-8b-v1_1-transformers</b></a></p>
+</div>
+'''
+# model_id = "xtuner/llava-llama-3-8b-v1_1-transformers"
+# pipe = pipeline("image-to-text", model=model_id, device_map="auto")
+# # Place transformers in hardware to prepare for process and generation
+# llama_tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")
+# llama_model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct", torch_dtype=torch.float16).to('cuda')
+# terminators = [
+#     llama_tokenizer.eos_token_id,
+#     llama_tokenizer.convert_tokens_to_ids("<|eot_id|>")
+# ]
+# def krypton(prompt,
+#             history,
+#             input_image,
+#             max_new_tokens,
+#             temperature,
+#             num_beams,
+#             do_sample: bool=True):
+#     """
+#     Passes an image as input, places it for generation
+#     on pipeline and output is passed. This is multimodal
+#     """
+#     conversation = []
+#     for user, assistant in history:
+#         conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
+#     conversation.append({"role": "user", "content": prompt})
+#     input_ids = llama_tokenizer.apply_chat_template(conversation, return_tensors='pt').to(llama_model.device)
+#     streamer = TextIteratorStreamer(llama_tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
+#     llava_generation_kwargs = dict(
+#         input_ids=input_ids,
+#         streamer=streamer,
+#         max_new_tokens=max_new_tokens,
+#         num_beams=num_beams,
+#         do_sample=do_sample
+#     )
+#     if temperature == 0.0:
+#         do_sample = False
+#     pil_image = Image.fromarray(input_image.astype('uint8'), 'RGB')
+#     # Pipeline generation
+#     outputs = pipeline()
+from transformers import pipeline
+from PIL import Image
+import requests
+import torch
+import subprocess
+import gradio as gr
+model_id = "xtuner/llava-llama-3-8b-v1_1-transformers"
+pipe = pipeline("image-to-text", model=model_id, torch_dtype=torch.float16, device=0)
+def krypton(input_image):
+    pil_image = Image.fromarray(input_image.astype('uint8'), 'RGB')
+    # image = Image.open(requests.get(url, stream=True).raw)
+    prompt = ("<|start_header_id|>user<|end_header_id|>\n\n<image>\nWhat are these?<|eot_id|>"
+            "<|start_header_id|>assistant<|end_header_id|>\n\n")
+    outputs = pipe(input_image, prompt=prompt, generate_kwargs={"max_new_tokens": 200})
+    nvidia_result = subprocess.run(['nvidia-smi'], stdout=subprocess.PIPE)
+    return outputs[0]
+with gr.Blocks(fill_height=True) as demo:
+    gr.Markdown(DESCRIPTION)
+    gr.Interface(
+        fn=krypton,
+        inputs="image",
+        outputs="text",
+        fill_height=True
+    )
+if __name__ == "__main__":
+    demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+torch
+transformers
+gradio
+numpy