Spaces:

choco-conoz
/

SFT

Sleeping

App Files Files Community

choco-conoz commited on Jun 29, 2025

Commit

f1e63f8

1 Parent(s): 5d78a93

feat: refactoring

Browse files

Files changed (2) hide show

requirements.txt +2 -5
src/streamlit_app.py +62 -39

requirements.txt CHANGED Viewed

@@ -1,11 +1,8 @@
-# altair
-# pandas
 streamlit>=1.46.1
-# 4.53.0
-transformers==4.49.0
 torch==2.7.0
 # sentence-transformers>=3.0.0
 huggingface_hub>=0.33.1
 bitsandbytes
 accelerate>=0.26.0
-unsloth

 streamlit>=1.46.1
+transformers==4.53.0
 torch==2.7.0
 # sentence-transformers>=3.0.0
 huggingface_hub>=0.33.1
 bitsandbytes
 accelerate>=0.26.0
+# unsloth

src/streamlit_app.py CHANGED Viewed

@@ -1,32 +1,53 @@
 import streamlit as st
 # from unsloth import FastLanguageModel, is_bfloat16_supported
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
-# model_id = "sentence-transformers/all-MiniLM-L6-v2"
-# model_id = "sentence-transformers/xlm-r-base-en-ko-nli-ststb"
-# model_id = "mistralai/Mistral-7B-Instruct-v0.1"
-# model_id = "meta-llama/Llama-3.2-1B"
-# model_id = "choco-conoz/TwinLlama-3.1-8B"
-model_id = "choco-conoz/TwinLlama-3.2-1B"
-tokenizer = AutoTokenizer.from_pretrained(model_id)
-model = AutoModelForCausalLM.from_pretrained(model_id)
-# model = FastLanguageModel.for_inference(model)
-processor = pipeline(
-    "text-generation",
-    model=model,
-    tokenizer=tokenizer,
-    max_new_tokens=10
-)
-terminators = [
-    tokenizer.eos_token_id,
-    tokenizer.convert_tokens_to_ids(""),
-]
 def main():
-    st.title('Text Generator (conoz)')
     query = st.text_input('input your topic of interest')
     alpaca_template = """Below is an instruction that describes a task. Write a response that appropriately completes the request.
@@ -39,24 +60,26 @@ def main():
     """
     if st.button("Send"):
-        user_prompt = alpaca_template.format(query, "")
-        # print('user_prompt', user_prompt)
-        # prompt = tokenizer.apply_chat_template(
-        #     user_prompt, tokenize=False, add_generation_prompt=True)
-        # prompt = user_prompt
-        # outputs = processor(prompt)
-        print('start')
-        outputs = processor(user_prompt,
-                            max_new_tokens=4096,
-                            use_cache=True,
-                            # eos_token_id=terminators,
-                            # do_sample=True,
-                            # temperature=0.6,
-                            # top_p=0.9,
-                            )
-        response = outputs[0]["generated_text"][len(user_prompt):]
         st.write(response)
-        print('end')
 if __name__ == "__main__":

+from time import sleep
 import streamlit as st
+# for GPU inference, uncomment the following line
 # from unsloth import FastLanguageModel, is_bfloat16_supported
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
+AI_MODE = "OOFF"
+if AI_MODE == "ON":
+    # model_id = "choco-conoz/TwinLlama-3.1-8B"
+    model_id = "choco-conoz/TwinLlama-3.2-1B"
+    tokenizer = AutoTokenizer.from_pretrained(model_id)
+    model = AutoModelForCausalLM.from_pretrained(model_id)
+    # for GPU inference, uncomment the following line
+    # model = FastLanguageModel.for_inference(model)
+    processor = pipeline(
+        "text-generation",
+        model=model,
+        tokenizer=tokenizer,
+        max_new_tokens=10
+    )
+    terminators = [
+        tokenizer.eos_token_id,
+        tokenizer.convert_tokens_to_ids(""),
+    ]
 def main():
+    st.title('DEMO - SFP (Instruction/Response)')
+    st.markdown('<div style="text-align: right;">produced by Conoz (https://www.conoz.com)</div>',
+                unsafe_allow_html=True)
+    st.markdown(
+        '<div><br />basic space hardware에서 응답시간은 3분 정도 소요됩니다. '
+        '영어, 한국어 등으로 질문할 수 있습니다.<br />'
+        '코노즈에서 Llama-3.2-1B model을 SFT로 학습한 모델을 사용합니다. '
+        '알파카 chat template을 사용합니다.<br />'
+        '코노즈에서 Llama-3.1-8B 모델을 SFT로 학습한 모델로 사용할 수도 있지만 basic space hardwar 에선 동작하지 않습니다.</div>',
+        unsafe_allow_html=True
+    )
+    st.markdown(
+        '<div>Response time on basic space hardware takes about 3 minutes. '
+        'You can ask questions in English, Korean, etc. '
+        'It is a model fine-tuned on the Llama-3.2-1B model by Conoz. '
+        'It uses the Alpaca chat template.<br />'
+        'You can also use the model fine-tuned on the Llama-3.1-8B model by Conoz, but it does not work on the basic space hardware.<br /></div>',
+        unsafe_allow_html=True
+    )
+    st.markdown('<hr />', unsafe_allow_html=True)
     query = st.text_input('input your topic of interest')
     alpaca_template = """Below is an instruction that describes a task. Write a response that appropriately completes the request.
     """
     if st.button("Send"):
+        with st.snow():
+            user_prompt = alpaca_template.format(query, "")
+            if AI_MODE == "ON":
+                # for chat models
+                # user_prompt = tokenizer.apply_chat_template(
+                #     user_prompt, tokenize=False, add_generation_prompt=True)
+                outputs = processor(user_prompt,
+                                    max_new_tokens=4096,
+                                    use_cache=True,
+                                    do_sample=True,
+                                    temperature=0.6,
+                                    top_p=0.9,
+                                    )
+                # eos_token_id=terminators,
+                response = outputs[0]["generated_text"][len(user_prompt):]
+            else:
+                sleep(3)
+                response = "AI_MODE is OFF. Please turn it ON to get a response."
+            st.subheader('Response:')
         st.write(response)
 if __name__ == "__main__":