Spaces:
Sleeping
Sleeping
| from time import sleep | |
| import streamlit as st | |
| # for GPU inference, uncomment the following line | |
| # from unsloth import FastLanguageModel, is_bfloat16_supported | |
| from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline | |
| AI_MODE = "ON" | |
| if AI_MODE == "ON": | |
| model_id = "choco-conoz/TwinLlama-3.2-1B-DPO" | |
| # model_id = "choco-conoz/TwinLlama-3.2-1B" | |
| tokenizer = AutoTokenizer.from_pretrained(model_id) | |
| model = AutoModelForCausalLM.from_pretrained(model_id) | |
| # for GPU inference, uncomment the following line | |
| # model = FastLanguageModel.for_inference(model) | |
| processor = pipeline( | |
| "text-generation", | |
| model=model, | |
| tokenizer=tokenizer, | |
| max_new_tokens=256 | |
| ) | |
| terminators = [ | |
| tokenizer.eos_token_id, | |
| tokenizer.convert_tokens_to_ids(""), | |
| ] | |
| def main(): | |
| st.title('DEMO - DPO') | |
| st.subheader('Instruction/Response') | |
| st.markdown('<div style="text-align: right;">produced by Conoz (https://www.conoz.com)</div>', | |
| unsafe_allow_html=True) | |
| st.markdown( | |
| '<div><br />basic space hardware์์ ์๋ต์๊ฐ์ 3๋ถ ์ ๋ ์์๋ฉ๋๋ค. ' | |
| '์์ด, ํ๊ตญ์ด ๋ฑ์ผ๋ก ์ง๋ฌธํ ์ ์์ต๋๋ค.<br />' | |
| '์ฝ๋ ธ์ฆ์์ Llama-3.2-1B model์ DPO๋ก ํ์ตํ ๋ชจ๋ธ์ ์ฌ์ฉํฉ๋๋ค. ' | |
| '์ํ์นด chat template์ ์ฌ์ฉํฉ๋๋ค.<br />' | |
| '์ฝ๋ ธ์ฆ์์ Llama-3.1-8B ๋ชจ๋ธ์ DPO๋ก ํ์ตํ ๋ชจ๋ธ๋ก ์ฌ์ฉํ ์๋ ์์ง๋ง basic space hardware ์์ ๋์ํ์ง ์์ต๋๋ค.</div>', | |
| unsafe_allow_html=True | |
| ) | |
| st.markdown( | |
| '<div>Response time on basic space hardware takes about 3 minutes. ' | |
| 'You can ask questions in English, Korean, etc. ' | |
| 'It is a model fine-tuned on the Llama-3.2-1B model by Conoz. ' | |
| 'It uses the Alpaca chat template.<br />' | |
| 'You can also use the model fine-tuned on the Llama-3.1-8B model by Conoz, but it does not work on the basic space hardware.<br /></div>', | |
| unsafe_allow_html=True | |
| ) | |
| st.markdown('<hr />', unsafe_allow_html=True) | |
| query = st.text_input('input your topic of interest. (10 ~ 1000 characters)', | |
| placeholder='e.g. What is the capital of South Korea?') | |
| alpaca_template = """Below is an instruction that describes a task. Write a response that appropriately completes the request.\ | |
| ## Instruction: | |
| {} | |
| ## Response: | |
| """ | |
| if st.button("Send"): | |
| if not query: | |
| st.error("Please enter a query.") | |
| return | |
| if len(query) < 10: | |
| st.error("Please enter a query with at least 10 characters.") | |
| return | |
| if len(query) > 1000: | |
| st.error("Please enter a query with less than 1000 characters.") | |
| return | |
| with st.spinner("Generating response..."): | |
| user_prompt = alpaca_template.format(query, "") | |
| if AI_MODE == "ON": | |
| # for chat models | |
| # tokenizer.chat_template = { | |
| # "role": "user", | |
| # "prompt": user_prompt, | |
| # "generation_prompt": "", | |
| # } | |
| # user_prompt = tokenizer.apply_chat_template( | |
| # user_prompt, tokenize=False, add_generation_prompt=True) | |
| outputs = processor(user_prompt, | |
| max_new_tokens=256, | |
| # num_return_sequences=1, | |
| temperature=0.6, | |
| top_p=0.9, | |
| ) | |
| # eos_token_id=terminators, | |
| response = outputs[0]["generated_text"][len(user_prompt):] | |
| else: | |
| sleep(3) | |
| response = "AI_MODE is OFF. Please turn it ON to get a response." | |
| st.subheader('Response:') | |
| st.write(response) | |
| if __name__ == "__main__": | |
| main() | |