choco-conoz commited on
Commit
b191eec
·
1 Parent(s): d2a22fe

feat: set tokenizer

Browse files
Files changed (4) hide show
  1. README.md +2 -19
  2. poetry.lock +0 -0
  3. pyproject.toml +0 -22
  4. src/streamlit_app.py +9 -6
README.md CHANGED
@@ -1,20 +1,3 @@
1
- ---
2
- title: SFT
3
- emoji: 🚀
4
- colorFrom: red
5
- colorTo: red
6
- sdk: docker
7
- app_port: 8501
8
- tags:
9
- - streamlit
10
- pinned: false
11
- short_description: Streamlit template space
12
- license: mit
13
- ---
14
 
15
- # Welcome to Streamlit!
16
-
17
- Edit `/src/streamlit_app.py` to customize this app to your heart's desire. :heart:
18
-
19
- If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
20
- forums](https://discuss.streamlit.io).
 
1
+ # CONOZ SFT Proj.
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
+ meta-llama/Llama-3.1-8B -> choco-conoz/TwinLlama-3.1-8B
 
 
 
 
 
poetry.lock DELETED
The diff for this file is too large to render. See raw diff
 
pyproject.toml DELETED
@@ -1,22 +0,0 @@
1
- [project]
2
- name = "src"
3
- version = "0.1.0"
4
- description = ""
5
- authors = [
6
- { name = "yongkyucho", email = "choco@conoz.net" },
7
- ]
8
- license = "MIT"
9
- readme = "README.md"
10
-
11
- [build-system]
12
- requires = ["poetry-core>=2.0.0,<3.0.0"]
13
- build-backend = "poetry.core.masonry.api"
14
-
15
- [tool.poetry.dependencies]
16
- python = "~3.11"
17
-
18
- torch = "2.7.0"
19
- sentence-transformers = "^3.0.0"
20
-
21
- streamlit = "^1.46.1"
22
- # unsloth = "^2025.6.8"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/streamlit_app.py CHANGED
@@ -1,6 +1,6 @@
1
  import streamlit as st
2
  import torch
3
- from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
4
 
5
  # model_id = "sentence-transformers/all-MiniLM-L6-v2"
6
  # model_id = "sentence-transformers/xlm-r-base-en-ko-nli-ststb"
@@ -38,14 +38,17 @@ def main():
38
 
39
  if st.button("Send"):
40
  user_prompt = alpaca_template.format(query, "")
41
- print('user_prompt', user_prompt)
42
- prompt = processor.tokenizer.apply_chat_template(
43
  user_prompt, tokenize=False, add_generation_prompt=True)
44
  # prompt = user_prompt
45
  # outputs = processor(prompt)
46
-
47
- outputs = processor(prompt, max_new_tokens=4096, eos_token_id=terminators, do_sample=True,
48
- temperature=0.6, top_p=0.9
 
 
 
49
  )
50
  response = outputs[0]["generated_text"][len(prompt):]
51
  st.write(response)
 
1
  import streamlit as st
2
  import torch
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer, pipeline
4
 
5
  # model_id = "sentence-transformers/all-MiniLM-L6-v2"
6
  # model_id = "sentence-transformers/xlm-r-base-en-ko-nli-ststb"
 
38
 
39
  if st.button("Send"):
40
  user_prompt = alpaca_template.format(query, "")
41
+ # print('user_prompt', user_prompt)
42
+ prompt = tokenizer.apply_chat_template(
43
  user_prompt, tokenize=False, add_generation_prompt=True)
44
  # prompt = user_prompt
45
  # outputs = processor(prompt)
46
+ outputs = processor(prompt,
47
+ max_new_tokens=4096,
48
+ eos_token_id=terminators,
49
+ do_sample=True,
50
+ temperature=0.6,
51
+ top_p=0.9
52
  )
53
  response = outputs[0]["generated_text"][len(prompt):]
54
  st.write(response)