choco-conoz commited on
Commit
1598dba
ยท
1 Parent(s): efe3f8f

feat: run page

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +91 -38
src/streamlit_app.py CHANGED
@@ -1,40 +1,93 @@
1
- import altair as alt
2
- import numpy as np
3
- import pandas as pd
4
  import streamlit as st
 
 
 
5
 
6
- """
7
- # Welcome to Streamlit!
8
-
9
- Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
10
- If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
11
- forums](https://discuss.streamlit.io).
12
-
13
- In the meantime, below is an example of what you can do with just a few lines of code:
14
- """
15
-
16
- num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
17
- num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
18
-
19
- indices = np.linspace(0, 1, num_points)
20
- theta = 2 * np.pi * num_turns * indices
21
- radius = indices
22
-
23
- x = radius * np.cos(theta)
24
- y = radius * np.sin(theta)
25
-
26
- df = pd.DataFrame({
27
- "x": x,
28
- "y": y,
29
- "idx": indices,
30
- "rand": np.random.randn(num_points),
31
- })
32
-
33
- st.altair_chart(alt.Chart(df, height=700, width=700)
34
- .mark_point(filled=True)
35
- .encode(
36
- x=alt.X("x", axis=None),
37
- y=alt.Y("y", axis=None),
38
- color=alt.Color("idx", legend=None, scale=alt.Scale()),
39
- size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
40
- ))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from time import sleep
 
 
2
  import streamlit as st
3
+ # for GPU inference, uncomment the following line
4
+ # from unsloth import FastLanguageModel, is_bfloat16_supported
5
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
6
 
7
+ AI_MODE = "ON"
8
+
9
+ if AI_MODE == "ON":
10
+ model_id = "choco-conoz/TwinLlama-3.2-1B-DPO"
11
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
12
+ model = AutoModelForCausalLM.from_pretrained(model_id)
13
+ # for GPU inference, uncomment the following line
14
+ # model = FastLanguageModel.for_inference(model)
15
+
16
+ processor = pipeline(
17
+ "text-generation",
18
+ model=model,
19
+ tokenizer=tokenizer,
20
+ max_new_tokens=10
21
+ )
22
+
23
+ terminators = [
24
+ tokenizer.eos_token_id,
25
+ tokenizer.convert_tokens_to_ids(""),
26
+ ]
27
+
28
+
29
+ def main():
30
+ st.title('DEMO - DPO')
31
+ st.subheader('Instruction/Response')
32
+ st.markdown('<div style="text-align: right;">produced by Conoz (https://www.conoz.com)</div>',
33
+ unsafe_allow_html=True)
34
+ st.markdown(
35
+ '<div><br />basic space hardware์—์„œ ์‘๋‹ต์‹œ๊ฐ„์€ 3๋ถ„ ์ •๋„ ์†Œ์š”๋ฉ๋‹ˆ๋‹ค. '
36
+ '์˜์–ด, ํ•œ๊ตญ์–ด ๋“ฑ์œผ๋กœ ์งˆ๋ฌธํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.<br />'
37
+ '์ฝ”๋…ธ์ฆˆ์—์„œ Llama-3.2-1B model์„ DPO๋กœ ํ•™์Šตํ•œ ๋ชจ๋ธ์„ ์‚ฌ์šฉํ•ฉ๋‹ˆ๋‹ค. '
38
+ '์•ŒํŒŒ์นด chat template์„ ์‚ฌ์šฉํ•ฉ๋‹ˆ๋‹ค.<br />'
39
+ '์ฝ”๋…ธ์ฆˆ์—์„œ Llama-3.1-8B ๋ชจ๋ธ์„ DPO๋กœ ํ•™์Šตํ•œ ๋ชจ๋ธ๋กœ ์‚ฌ์šฉํ•  ์ˆ˜๋„ ์žˆ์ง€๋งŒ basic space hardware ์—์„  ๋™์ž‘ํ•˜์ง€ ์•Š์Šต๋‹ˆ๋‹ค.</div>',
40
+ unsafe_allow_html=True
41
+ )
42
+ st.markdown(
43
+ '<div>Response time on basic space hardware takes about 3 minutes. '
44
+ 'You can ask questions in English, Korean, etc. '
45
+ 'It is a model fine-tuned on the Llama-3.2-1B model by Conoz. '
46
+ 'It uses the Alpaca chat template.<br />'
47
+ 'You can also use the model fine-tuned on the Llama-3.1-8B model by Conoz, but it does not work on the basic space hardware.<br /></div>',
48
+ unsafe_allow_html=True
49
+ )
50
+ st.markdown('<hr />', unsafe_allow_html=True)
51
+ query = st.text_input('input your topic of interest. (10 ~ 1000 characters)',
52
+ placeholder='e.g. What is the capital of South Korea?')
53
+
54
+ alpaca_template = """Below is an instruction that describes a task. Write a response that appropriately completes the request.
55
+ ### Instruction:
56
+ {}
57
+ ### Response:
58
+ """
59
+
60
+ if st.button("Send"):
61
+ if not query:
62
+ st.error("Please enter a query.")
63
+ return
64
+ if len(query) < 10:
65
+ st.error("Please enter a query with at least 10 characters.")
66
+ return
67
+ if len(query) > 1000:
68
+ st.error("Please enter a query with less than 1000 characters.")
69
+ return
70
+ with st.spinner("Generating response..."):
71
+ user_prompt = alpaca_template.format(query, "")
72
+ if AI_MODE == "ON":
73
+ # for chat models
74
+ # user_prompt = tokenizer.apply_chat_template(
75
+ # user_prompt, tokenize=False, add_generation_prompt=True)
76
+ outputs = processor(user_prompt,
77
+ max_new_tokens=4096,
78
+ use_cache=True,
79
+ do_sample=True,
80
+ temperature=0.6,
81
+ top_p=0.9,
82
+ )
83
+ # eos_token_id=terminators,
84
+ response = outputs[0]["generated_text"][len(user_prompt):]
85
+ else:
86
+ sleep(3)
87
+ response = "AI_MODE is OFF. Please turn it ON to get a response."
88
+ st.subheader('Response:')
89
+ st.write(response)
90
+
91
+
92
+ if __name__ == "__main__":
93
+ main()