Spaces:

sandz7
/

bubble_bee

Paused

App Files Files Community

sandz7 commited on May 15, 2024

Commit

6bed8a1

1 Parent(s): 75b99c5

Xgen model has been deployed

Browse files

Files changed (3) hide show

.gitignore +1 -0
app.py +72 -0
requirements.txt +5 -0

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ venv/

app.py ADDED Viewed

	@@ -0,0 +1,72 @@

+import gradio as gr
+from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
+import pandas as pd
+import numpy as np
+import random
+import torch
+# from torch.cuda.amp import autocast
+# Clear existing cache
+torch.cuda.empty_cache()
+# Load model directly
+tokenizer = AutoTokenizer.from_pretrained("Salesforce/xgen-7b-8k-inst")
+model = AutoModelForCausalLM.from_pretrained("Salesforce/xgen-7b-8k-inst", torch_dtype=torch.float16).to('cuda')
+# Bloom LLM
+def xgen(input_text,
+         history,
+         tokenize: bool=True,
+         add_generation_prompt: bool=True):
+    """
+    This will take an input text, encode with the tokenizer,
+    generate with the input_ids into the Bloom LLM, than decode
+    the output id into text.
+    """
+    # # User's question
+    # input_text = "How was jupiter created in the solar system."
+    # Prompt template for LLM
+    dialogue_template = [
+        {"role": "user",
+        "content": input_text}
+    ]
+    # Be sure the dialogue template is in string formate for the tokenizer
+    prompt = ""
+    for dialogue in dialogue_template:
+        prompt += dialogue["content"] + " "
+    # token id's for prompt
+    input_ids = tokenizer(prompt, return_tensors='pt').to('cuda')
+    # Bloom already comes in fp16
+    # Let's use torch.no_grad() to save memory and computation
+    with torch.no_grad():
+        # Generate output from LLM
+        outputs = model.generate(**input_ids,
+                                max_new_tokens=256)
+    # Decode the output tensors into string
+    outputs_decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    return outputs_decoded
+torch.cuda.empty_cache()
+# Create the mushroom UI
+chatbot=gr.Chatbot(height=700, label='Gradio ChatInterface')
+with gr.Blocks(fill_height=True) as demo:
+    gr.ChatInterface(
+        fn=xgen,
+        fill_height=True,
+        title="Mushroom 🍄"
+    )
+if __name__ == "__main__":
+    demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+panda
+torch
+gradio
+numpy
+transformers