choco-conoz commited on
Commit
f1e63f8
ยท
1 Parent(s): 5d78a93

feat: refactoring

Browse files
Files changed (2) hide show
  1. requirements.txt +2 -5
  2. src/streamlit_app.py +62 -39
requirements.txt CHANGED
@@ -1,11 +1,8 @@
1
- # altair
2
- # pandas
3
  streamlit>=1.46.1
4
- # 4.53.0
5
- transformers==4.49.0
6
  torch==2.7.0
7
  # sentence-transformers>=3.0.0
8
  huggingface_hub>=0.33.1
9
  bitsandbytes
10
  accelerate>=0.26.0
11
- unsloth
 
 
 
1
  streamlit>=1.46.1
2
+ transformers==4.53.0
 
3
  torch==2.7.0
4
  # sentence-transformers>=3.0.0
5
  huggingface_hub>=0.33.1
6
  bitsandbytes
7
  accelerate>=0.26.0
8
+ # unsloth
src/streamlit_app.py CHANGED
@@ -1,32 +1,53 @@
 
1
  import streamlit as st
 
2
  # from unsloth import FastLanguageModel, is_bfloat16_supported
3
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
4
 
5
- # model_id = "sentence-transformers/all-MiniLM-L6-v2"
6
- # model_id = "sentence-transformers/xlm-r-base-en-ko-nli-ststb"
7
- # model_id = "mistralai/Mistral-7B-Instruct-v0.1"
8
- # model_id = "meta-llama/Llama-3.2-1B"
9
- # model_id = "choco-conoz/TwinLlama-3.1-8B"
10
- model_id = "choco-conoz/TwinLlama-3.2-1B"
11
- tokenizer = AutoTokenizer.from_pretrained(model_id)
12
- model = AutoModelForCausalLM.from_pretrained(model_id)
13
- # model = FastLanguageModel.for_inference(model)
14
-
15
- processor = pipeline(
16
- "text-generation",
17
- model=model,
18
- tokenizer=tokenizer,
19
- max_new_tokens=10
20
- )
21
-
22
- terminators = [
23
- tokenizer.eos_token_id,
24
- tokenizer.convert_tokens_to_ids(""),
25
- ]
26
 
27
 
28
  def main():
29
- st.title('Text Generator (conoz)')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  query = st.text_input('input your topic of interest')
31
 
32
  alpaca_template = """Below is an instruction that describes a task. Write a response that appropriately completes the request.
@@ -39,24 +60,26 @@ def main():
39
  """
40
 
41
  if st.button("Send"):
42
- user_prompt = alpaca_template.format(query, "")
43
- # print('user_prompt', user_prompt)
44
- # prompt = tokenizer.apply_chat_template(
45
- # user_prompt, tokenize=False, add_generation_prompt=True)
46
- # prompt = user_prompt
47
- # outputs = processor(prompt)
48
- print('start')
49
- outputs = processor(user_prompt,
50
- max_new_tokens=4096,
51
- use_cache=True,
52
- # eos_token_id=terminators,
53
- # do_sample=True,
54
- # temperature=0.6,
55
- # top_p=0.9,
56
- )
57
- response = outputs[0]["generated_text"][len(user_prompt):]
 
 
 
58
  st.write(response)
59
- print('end')
60
 
61
 
62
  if __name__ == "__main__":
 
1
+ from time import sleep
2
  import streamlit as st
3
+ # for GPU inference, uncomment the following line
4
  # from unsloth import FastLanguageModel, is_bfloat16_supported
5
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
6
 
7
+ AI_MODE = "OOFF"
8
+
9
+ if AI_MODE == "ON":
10
+ # model_id = "choco-conoz/TwinLlama-3.1-8B"
11
+ model_id = "choco-conoz/TwinLlama-3.2-1B"
12
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
13
+ model = AutoModelForCausalLM.from_pretrained(model_id)
14
+ # for GPU inference, uncomment the following line
15
+ # model = FastLanguageModel.for_inference(model)
16
+
17
+ processor = pipeline(
18
+ "text-generation",
19
+ model=model,
20
+ tokenizer=tokenizer,
21
+ max_new_tokens=10
22
+ )
23
+
24
+ terminators = [
25
+ tokenizer.eos_token_id,
26
+ tokenizer.convert_tokens_to_ids(""),
27
+ ]
28
 
29
 
30
  def main():
31
+ st.title('DEMO - SFP (Instruction/Response)')
32
+ st.markdown('<div style="text-align: right;">produced by Conoz (https://www.conoz.com)</div>',
33
+ unsafe_allow_html=True)
34
+ st.markdown(
35
+ '<div><br />basic space hardware์—์„œ ์‘๋‹ต์‹œ๊ฐ„์€ 3๋ถ„ ์ •๋„ ์†Œ์š”๋ฉ๋‹ˆ๋‹ค. '
36
+ '์˜์–ด, ํ•œ๊ตญ์–ด ๋“ฑ์œผ๋กœ ์งˆ๋ฌธํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.<br />'
37
+ '์ฝ”๋…ธ์ฆˆ์—์„œ Llama-3.2-1B model์„ SFT๋กœ ํ•™์Šตํ•œ ๋ชจ๋ธ์„ ์‚ฌ์šฉํ•ฉ๋‹ˆ๋‹ค. '
38
+ '์•ŒํŒŒ์นด chat template์„ ์‚ฌ์šฉํ•ฉ๋‹ˆ๋‹ค.<br />'
39
+ '์ฝ”๋…ธ์ฆˆ์—์„œ Llama-3.1-8B ๋ชจ๋ธ์„ SFT๋กœ ํ•™์Šตํ•œ ๋ชจ๋ธ๋กœ ์‚ฌ์šฉํ•  ์ˆ˜๋„ ์žˆ์ง€๋งŒ basic space hardwar ์—์„  ๋™์ž‘ํ•˜์ง€ ์•Š์Šต๋‹ˆ๋‹ค.</div>',
40
+ unsafe_allow_html=True
41
+ )
42
+ st.markdown(
43
+ '<div>Response time on basic space hardware takes about 3 minutes. '
44
+ 'You can ask questions in English, Korean, etc. '
45
+ 'It is a model fine-tuned on the Llama-3.2-1B model by Conoz. '
46
+ 'It uses the Alpaca chat template.<br />'
47
+ 'You can also use the model fine-tuned on the Llama-3.1-8B model by Conoz, but it does not work on the basic space hardware.<br /></div>',
48
+ unsafe_allow_html=True
49
+ )
50
+ st.markdown('<hr />', unsafe_allow_html=True)
51
  query = st.text_input('input your topic of interest')
52
 
53
  alpaca_template = """Below is an instruction that describes a task. Write a response that appropriately completes the request.
 
60
  """
61
 
62
  if st.button("Send"):
63
+ with st.snow():
64
+ user_prompt = alpaca_template.format(query, "")
65
+ if AI_MODE == "ON":
66
+ # for chat models
67
+ # user_prompt = tokenizer.apply_chat_template(
68
+ # user_prompt, tokenize=False, add_generation_prompt=True)
69
+ outputs = processor(user_prompt,
70
+ max_new_tokens=4096,
71
+ use_cache=True,
72
+ do_sample=True,
73
+ temperature=0.6,
74
+ top_p=0.9,
75
+ )
76
+ # eos_token_id=terminators,
77
+ response = outputs[0]["generated_text"][len(user_prompt):]
78
+ else:
79
+ sleep(3)
80
+ response = "AI_MODE is OFF. Please turn it ON to get a response."
81
+ st.subheader('Response:')
82
  st.write(response)
 
83
 
84
 
85
  if __name__ == "__main__":