sandz7 commited on
Commit
6bed8a1
·
1 Parent(s): 75b99c5

Xgen model has been deployed

Browse files
Files changed (3) hide show
  1. .gitignore +1 -0
  2. app.py +72 -0
  3. requirements.txt +5 -0
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ venv/
app.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
3
+ import pandas as pd
4
+ import numpy as np
5
+ import random
6
+ import torch
7
+ # from torch.cuda.amp import autocast
8
+
9
+ # Clear existing cache
10
+ torch.cuda.empty_cache()
11
+
12
+
13
+ # Load model directly
14
+ tokenizer = AutoTokenizer.from_pretrained("Salesforce/xgen-7b-8k-inst")
15
+ model = AutoModelForCausalLM.from_pretrained("Salesforce/xgen-7b-8k-inst", torch_dtype=torch.float16).to('cuda')
16
+
17
+ # Bloom LLM
18
+ def xgen(input_text,
19
+ history,
20
+ tokenize: bool=True,
21
+ add_generation_prompt: bool=True):
22
+ """
23
+ This will take an input text, encode with the tokenizer,
24
+ generate with the input_ids into the Bloom LLM, than decode
25
+ the output id into text.
26
+ """
27
+
28
+ # # User's question
29
+ # input_text = "How was jupiter created in the solar system."
30
+
31
+ # Prompt template for LLM
32
+ dialogue_template = [
33
+ {"role": "user",
34
+ "content": input_text}
35
+ ]
36
+
37
+ # Be sure the dialogue template is in string formate for the tokenizer
38
+ prompt = ""
39
+ for dialogue in dialogue_template:
40
+ prompt += dialogue["content"] + " "
41
+
42
+ # token id's for prompt
43
+ input_ids = tokenizer(prompt, return_tensors='pt').to('cuda')
44
+
45
+ # Bloom already comes in fp16
46
+
47
+ # Let's use torch.no_grad() to save memory and computation
48
+ with torch.no_grad():
49
+ # Generate output from LLM
50
+ outputs = model.generate(**input_ids,
51
+ max_new_tokens=256)
52
+
53
+ # Decode the output tensors into string
54
+ outputs_decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
55
+
56
+ return outputs_decoded
57
+
58
+ torch.cuda.empty_cache()
59
+
60
+ # Create the mushroom UI
61
+
62
+ chatbot=gr.Chatbot(height=700, label='Gradio ChatInterface')
63
+
64
+ with gr.Blocks(fill_height=True) as demo:
65
+ gr.ChatInterface(
66
+ fn=xgen,
67
+ fill_height=True,
68
+ title="Mushroom 🍄"
69
+ )
70
+
71
+ if __name__ == "__main__":
72
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ panda
2
+ torch
3
+ gradio
4
+ numpy
5
+ transformers