Spaces:
Build error
Build error
add project files
Browse files- adapter_config.json +28 -0
- app.py +48 -0
- load_model.py +44 -0
- openai_community_med_e3 +1 -0
adapter_config.json
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"alpha_pattern": {},
|
| 3 |
+
"auto_mapping": null,
|
| 4 |
+
"base_model_name_or_path": "openai-community/gpt2-medium",
|
| 5 |
+
"bias": "none",
|
| 6 |
+
"fan_in_fan_out": false,
|
| 7 |
+
"inference_mode": true,
|
| 8 |
+
"init_lora_weights": true,
|
| 9 |
+
"layer_replication": null,
|
| 10 |
+
"layers_pattern": null,
|
| 11 |
+
"layers_to_transform": null,
|
| 12 |
+
"loftq_config": {},
|
| 13 |
+
"lora_alpha": 32,
|
| 14 |
+
"lora_dropout": 0.05,
|
| 15 |
+
"megatron_config": null,
|
| 16 |
+
"megatron_core": "megatron.core",
|
| 17 |
+
"modules_to_save": null,
|
| 18 |
+
"peft_type": "LORA",
|
| 19 |
+
"r": 32,
|
| 20 |
+
"rank_pattern": {},
|
| 21 |
+
"revision": null,
|
| 22 |
+
"target_modules": [
|
| 23 |
+
"c_attn"
|
| 24 |
+
],
|
| 25 |
+
"task_type": "CAUSAL_LM",
|
| 26 |
+
"use_dora": false,
|
| 27 |
+
"use_rslora": false
|
| 28 |
+
}
|
app.py
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
|
| 2 |
+
import streamlit as st
|
| 3 |
+
tokenizer = AutoTokenizer.from_pretrained("SSahas/openai_community_med_e3")
|
| 4 |
+
model = AutoModelForCausalLM.from_pretrained("SSahas/openai_community_med_e3")
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
def response_generator(prompt):
|
| 8 |
+
input_text = tokenizer.apply_chat_template(
|
| 9 |
+
prompt, tokenize=False, truncation=False, add_generation_prompt=True)
|
| 10 |
+
print(input_text)
|
| 11 |
+
input_ids = tokenizer(input_text, padding=True, return_tensors="pt")
|
| 12 |
+
output_ids = model.generate(input_ids=input_ids['input_ids'], generation_config=GenerationConfig(
|
| 13 |
+
max_new_tokens=20, pad_token_id=50256))
|
| 14 |
+
output = tokenizer.decode(
|
| 15 |
+
output_ids[0][input_ids['input_ids'].shape[1]:], skip_special_tokens=True)
|
| 16 |
+
|
| 17 |
+
return output
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
st.title("Simple firendly chatbot")
|
| 21 |
+
|
| 22 |
+
# Initialize chat history
|
| 23 |
+
if "messages" not in st.session_state:
|
| 24 |
+
st.session_state.messages = []
|
| 25 |
+
|
| 26 |
+
# Display chat messages from history on app rerun
|
| 27 |
+
for message in st.session_state.messages:
|
| 28 |
+
with st.chat_message(message["role"]):
|
| 29 |
+
st.markdown(message["content"])
|
| 30 |
+
|
| 31 |
+
# Accept user input
|
| 32 |
+
if prompt := st.chat_input("What is up?"):
|
| 33 |
+
# Add user message to chat history
|
| 34 |
+
st.session_state.messages.append({"role": "user", "content": prompt})
|
| 35 |
+
# Display user message in chat message container
|
| 36 |
+
with st.chat_message("user"):
|
| 37 |
+
st.markdown(prompt)
|
| 38 |
+
|
| 39 |
+
# Display assistant response in chat message container
|
| 40 |
+
with st.chat_message("assistant"):
|
| 41 |
+
# response = st.write(response_generator(prompt))
|
| 42 |
+
# print(prompt)
|
| 43 |
+
print(st.session_state.messages)
|
| 44 |
+
response = response_generator(st.session_state.messages)
|
| 45 |
+
st.write(response)
|
| 46 |
+
# Add assistant response to chat history
|
| 47 |
+
st.session_state.messages.append(
|
| 48 |
+
{"role": "assistant", "content": response})
|
load_model.py
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
|
| 2 |
+
|
| 3 |
+
model_link = "SSahas/openai_community_med_e3"
|
| 4 |
+
tokenizer = AutoTokenizer.from_pretrained(model_link)
|
| 5 |
+
finetuned_model = AutoModelForCausalLM.from_pretrained(model_link)
|
| 6 |
+
original_model = AutoModelForCausalLM.from_pretrained("openai-community/gpt2-medium")
|
| 7 |
+
|
| 8 |
+
prompt = [{'role': 'user', 'content': 'Hey man , you wanna buy some weed ?'},
|
| 9 |
+
{'role': 'assistant', 'content': 'Some what ?'},
|
| 10 |
+
{'role': 'user',
|
| 11 |
+
'content': 'Weed ! You know ? Pot , Ganja , Mary Jane some chronic !'},
|
| 12 |
+
{'role': 'assistant', 'content': 'Oh , umm , no thanks .'},
|
| 13 |
+
{'role': 'user',
|
| 14 |
+
'content': 'I also have blow if you prefer to do a few lines .'},
|
| 15 |
+
{'role': 'assistant', 'content': 'No , I am ok , really .'},
|
| 16 |
+
{'role': 'user',
|
| 17 |
+
'content': 'Come on man ! I even got dope and acid ! Try some !'},
|
| 18 |
+
{'role': 'assistant',
|
| 19 |
+
'content': 'Do you really have all of these drugs ? Where do you get them from ?'},
|
| 20 |
+
{'role': 'user',
|
| 21 |
+
'content': 'I got my connections ! Just tell me what you want and I ’ ll even give you one ounce for free .'},
|
| 22 |
+
{'role': 'assistant', 'content': 'Sounds good ! Let''s see , I want .'},
|
| 23 |
+
{'role': 'user', 'content': 'Yeah ?'}]
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
prompt = [{'role': 'user', 'content': 'Hello, My name is Sahas., How are you?'},]
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
input_text = tokenizer.apply_chat_template(prompt,tokenize = False, truncation=False, add_generation_prompt=True)
|
| 30 |
+
#print(input_text)
|
| 31 |
+
input_ids = tokenizer(input_text,padding = True, return_tensors = "pt")
|
| 32 |
+
#print(input_ids)
|
| 33 |
+
#output = model.generate(input_ids=input_ids['input_ids'], generation_config=GenerationConfig(max_new_tokens=25,temperature = 0.1, eos_token_id = 50256, repetition_penalty = 1.9, do_sample= True))
|
| 34 |
+
finetuned_model_output = finetuned_model.generate(input_ids=input_ids['input_ids'], generation_config=GenerationConfig(max_new_tokens=20,pad_token_id = 50256, temperature = 0.5, do_sample= True))
|
| 35 |
+
#print(output)
|
| 36 |
+
original_model_output = original_model.generate(input_ids=input_ids['input_ids'], generation_config=GenerationConfig(max_new_tokens=20, temperature = 0.5, do_sample= True))
|
| 37 |
+
finetuned_model_output = tokenizer.decode(finetuned_model_output[0][input_ids['input_ids'].shape[1]:], skip_special_tokens=True)
|
| 38 |
+
original_model_output = tokenizer.decode(original_model_output[0][input_ids['input_ids'].shape[1]:], skip_special_tokens=True)
|
| 39 |
+
|
| 40 |
+
print("finetuned_model outptut\\n")
|
| 41 |
+
print(finetuned_model_output)
|
| 42 |
+
print("original_model outptut\\n")
|
| 43 |
+
print(original_model_output)
|
| 44 |
+
|
openai_community_med_e3
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
Subproject commit f155ca7b092859d25da29621fccd48ba0bf3dbc0
|