Spaces:

choco-conoz
/

DPO

Sleeping

App Files Files Community

choco-conoz commited on Jun 30, 2025

Commit

1598dba

1 Parent(s): efe3f8f

feat: run page

Browse files

Files changed (1) hide show

src/streamlit_app.py +91 -38

src/streamlit_app.py CHANGED Viewed

@@ -1,40 +1,93 @@
-import altair as alt
-import numpy as np
-import pandas as pd
 import streamlit as st
-"""
-# Welcome to Streamlit!
-Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
-If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
-forums](https://discuss.streamlit.io).
-In the meantime, below is an example of what you can do with just a few lines of code:
-"""
-num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
-num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
-indices = np.linspace(0, 1, num_points)
-theta = 2 * np.pi * num_turns * indices
-radius = indices
-x = radius * np.cos(theta)
-y = radius * np.sin(theta)
-df = pd.DataFrame({
-    "x": x,
-    "y": y,
-    "idx": indices,
-    "rand": np.random.randn(num_points),
-})
-st.altair_chart(alt.Chart(df, height=700, width=700)
-    .mark_point(filled=True)
-    .encode(
-        x=alt.X("x", axis=None),
-        y=alt.Y("y", axis=None),
-        color=alt.Color("idx", legend=None, scale=alt.Scale()),
-        size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
-    ))

+from time import sleep
 import streamlit as st
+# for GPU inference, uncomment the following line
+# from unsloth import FastLanguageModel, is_bfloat16_supported
+from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
+AI_MODE = "ON"
+if AI_MODE == "ON":
+    model_id = "choco-conoz/TwinLlama-3.2-1B-DPO"
+    tokenizer = AutoTokenizer.from_pretrained(model_id)
+    model = AutoModelForCausalLM.from_pretrained(model_id)
+    # for GPU inference, uncomment the following line
+    # model = FastLanguageModel.for_inference(model)
+    processor = pipeline(
+        "text-generation",
+        model=model,
+        tokenizer=tokenizer,
+        max_new_tokens=10
+    )
+    terminators = [
+        tokenizer.eos_token_id,
+        tokenizer.convert_tokens_to_ids(""),
+    ]
+def main():
+    st.title('DEMO - DPO')
+    st.subheader('Instruction/Response')
+    st.markdown('<div style="text-align: right;">produced by Conoz (https://www.conoz.com)</div>',
+                unsafe_allow_html=True)
+    st.markdown(
+        '<div><br />basic space hardware에서 응답시간은 3분 정도 소요됩니다. '
+        '영어, 한국어 등으로 질문할 수 있습니다.<br />'
+        '코노즈에서 Llama-3.2-1B model을 DPO로 학습한 모델을 사용합니다. '
+        '알파카 chat template을 사용합니다.<br />'
+        '코노즈에서 Llama-3.1-8B 모델을 DPO로 학습한 모델로 사용할 수도 있지만 basic space hardware 에선 동작하지 않습니다.</div>',
+        unsafe_allow_html=True
+    )
+    st.markdown(
+        '<div>Response time on basic space hardware takes about 3 minutes. '
+        'You can ask questions in English, Korean, etc. '
+        'It is a model fine-tuned on the Llama-3.2-1B model by Conoz. '
+        'It uses the Alpaca chat template.<br />'
+        'You can also use the model fine-tuned on the Llama-3.1-8B model by Conoz, but it does not work on the basic space hardware.<br /></div>',
+        unsafe_allow_html=True
+    )
+    st.markdown('<hr />', unsafe_allow_html=True)
+    query = st.text_input('input your topic of interest. (10 ~ 1000 characters)',
+                          placeholder='e.g. What is the capital of South Korea?')
+    alpaca_template = """Below is an instruction that describes a task. Write a response that appropriately completes the request.
+    ### Instruction:
+    {}
+    ### Response:
+    """
+    if st.button("Send"):
+        if not query:
+            st.error("Please enter a query.")
+            return
+        if len(query) < 10:
+            st.error("Please enter a query with at least 10 characters.")
+            return
+        if len(query) > 1000:
+            st.error("Please enter a query with less than 1000 characters.")
+            return
+        with st.spinner("Generating response..."):
+            user_prompt = alpaca_template.format(query, "")
+            if AI_MODE == "ON":
+                # for chat models
+                # user_prompt = tokenizer.apply_chat_template(
+                #     user_prompt, tokenize=False, add_generation_prompt=True)
+                outputs = processor(user_prompt,
+                                    max_new_tokens=4096,
+                                    use_cache=True,
+                                    do_sample=True,
+                                    temperature=0.6,
+                                    top_p=0.9,
+                                    )
+                # eos_token_id=terminators,
+                response = outputs[0]["generated_text"][len(user_prompt):]
+            else:
+                sleep(3)
+                response = "AI_MODE is OFF. Please turn it ON to get a response."
+            st.subheader('Response:')
+        st.write(response)
+if __name__ == "__main__":
+    main()