Spaces:

umbc-nlp
/

chat-llm

Sleeping

App Files Files Community

dipta007 commited on Mar 26, 2024

Commit

ebab1a2

1 Parent(s): 464c719

added chat, langchat

Browse files

Files changed (5) hide show

app.py +34 -16
explore_1.py +37 -0
explore_2.py +43 -0
langchat.py +141 -0
utils.py +0 -2

app.py CHANGED Viewed

@@ -1,20 +1,30 @@
 # import torch
 import pickle
-import streamlit as st
-from transformers import Conversation, pipeline
 from upload import get_file, upload_file
 from utils import clear_uploader, undo, restart
 share_keys = ["messages", "model_name"]
 MODELS = [
-    "google/flan-t5-small",
-    "google/flan-t5-base",
-    "google/flan-t5-large",
-    "google/flan-t5-xl",
-    "google/flan-t5-xxl",
 ]
-default_model = "google/flan-t5-small"
 st.set_page_config(
     page_title="LLM",
@@ -25,10 +35,18 @@ if "model_name" not in st.session_state:
     st.session_state.model_name = default_model
 def get_pipeline(model_name):
-    # device = 0 if torch.cuda.is_available() else -1
-    device = -1
-    chatbot = pipeline(model=model_name, task="conversational", device=device)
     return chatbot
 chatbot = get_pipeline(st.session_state.model_name)
@@ -60,7 +78,7 @@ with st.sidebar:
     st.title(":blue[LLM Only]")
     st.subheader("Model")
-    model_name = st.selectbox("Model", MODELS, index=MODELS.index(st.session_state.model_name))
     if st.button("Share", use_container_width=True):
         share()
@@ -94,12 +112,12 @@ if prompt := st.chat_input("Type a message", key="chat_input"):
     if not append:
         with st.chat_message("assistant"):
-            conversation = Conversation()
             for m in st.session_state.messages:
-                conversation.add_message(m)
-            print(conversation)
             with st.spinner("Generating response..."):
-                response = chatbot(conversation)
                 response = response[-1]["content"]
                 st.write(response)

+import streamlit as st
+import os
+os.environ['HF_HOME'] = '/scratch/sroydip1/cache/hf/'
+os.environ["HUGGINGFACEHUB_API_TOKEN"] = st.secrets["HF_TOKEN"]
 # import torch
 import pickle
+import torch
+from transformers import Conversation, pipeline, AutoTokenizer, AutoModelForCausalLM
 from upload import get_file, upload_file
 from utils import clear_uploader, undo, restart
+TOKEN = st.secrets["HF_TOKEN"]
 share_keys = ["messages", "model_name"]
 MODELS = [
+    "meta-llama/Llama-2-7b-chat-hf",
+    "mistralai/Mistral-7B-Instruct-v0.2",
+    # "google/flan-t5-small",
+    # "google/flan-t5-base",
+    # "google/flan-t5-large",
+    # "google/flan-t5-xl",
+    # "google/flan-t5-xxl",
 ]
+default_model = MODELS[0]
+# default_model = "meta-llama/Llama-2-7b-chat-hf"
 st.set_page_config(
     page_title="LLM",
     st.session_state.model_name = default_model
+@st.cache_resource
 def get_pipeline(model_name):
+    device = 0 if torch.cuda.is_available() else -1
+    # if True or model_name == "meta-llama/Llama-2-7b-chat-hf" or model_name == "mistralai/Mistral-7B-Instruct-v0.2":
+    #     chatbot = pipeline(model=model_name, task="conversational", device=device)#, model_kwargs=model_kwargs)
+    # else:
+    #     chatbot = pipeline(model=model_name, task="text-generation", device=device)
+    tokenizer = AutoTokenizer.from_pretrained(model_name, token=TOKEN)
+    model = AutoModelForCausalLM.from_pretrained(model_name, token=TOKEN, load_in_8bit=True)
+    # chatbot = pipeline("conversational", model=model, tokenizer=tokenizer, device=device)
+    chatbot = pipeline("conversational", model=model, tokenizer=tokenizer)
     return chatbot
 chatbot = get_pipeline(st.session_state.model_name)
     st.title(":blue[LLM Only]")
     st.subheader("Model")
+    model_name = st.selectbox("Model", MODELS, key="model_name")
     if st.button("Share", use_container_width=True):
         share()
     if not append:
         with st.chat_message("assistant"):
+            chat = Conversation()
             for m in st.session_state.messages:
+                chat.add_message(m)
+            print(chat)
             with st.spinner("Generating response..."):
+                response = chatbot(chat)
                 response = response[-1]["content"]
                 st.write(response)

explore_1.py ADDED Viewed

	@@ -0,0 +1,37 @@

+import os
+os.environ["HUGGINGFACEHUB_API_TOKEN"] = ""
+from langchain_community.llms import HuggingFaceHub
+llm = HuggingFaceHub(
+    repo_id="meta-llama/Llama-2-7b-chat-hf",
+    task="text-generation",
+    model_kwargs={
+        "max_new_tokens": 512,
+        "temperature": 0.1,
+        "seed": 42,
+    },
+)
+from langchain.schema import (
+    HumanMessage,
+    SystemMessage,
+    AIMessage,
+)
+from langchain_community.chat_models.huggingface import ChatHuggingFace
+messages = [
+    SystemMessage(content="You're a helpful assistant"),
+]
+chat_model = ChatHuggingFace(llm=llm)
+while True:
+    question = input("You: ")
+    messages.append(HumanMessage(content=question))
+    response = chat_model.invoke(messages)
+    print(response)
+    response = response.content
+    messages.append(AIMessage(content=response))
+    print(f"Bot: {response}")

explore_2.py ADDED Viewed

	@@ -0,0 +1,43 @@

+import os
+os.environ["HUGGINGFACEHUB_API_TOKEN"] = ""
+from langchain.prompts import PromptTemplate
+from langchain.chains import LLMChain
+from langchain.memory import ConversationBufferMemory
+from langchain_community.llms import HuggingFaceHub
+template = """You are a friendly chatbot engaging in a conversation with a human.
+Previous conversation:
+{chat_history}
+New human question: {question}
+Response:"""
+def get_pipeline(model_name):
+    llm = HuggingFaceHub(
+        repo_id=model_name,
+        task="text-generation",
+        model_kwargs={
+            "max_new_tokens": 250,
+            "top_k": 30,
+            "temperature": 0.1,
+            "repetition_penalty": 1.03,
+        },
+    )
+    return llm
+chatbot = get_pipeline("mistralai/Mistral-7B-Instruct-v0.2")
+memory = ConversationBufferMemory(memory_key="chat_history")
+prompt_template = PromptTemplate.from_template(template)
+conversation = LLMChain(llm=chatbot, prompt=prompt_template, verbose=True, memory=memory)
+while True:
+    question = input("You: ")
+    response = conversation({"question": question})
+    print("-" * 50)
+    print(response)
+    print(response["text"])
+    print("-" * 50)
+    print()

langchat.py ADDED Viewed

	@@ -0,0 +1,141 @@

+import os
+os.environ["HF_HOME"] = "/scratch/sroydip1/cache/hf/"
+os.environ["HUGGINGFACEHUB_API_TOKEN"] = ""
+# import torch
+import pickle
+import torch
+import streamlit as st
+from transformers import Conversation, pipeline
+from upload import get_file, upload_file
+from utils import clear_uploader, undo, restart
+from langchain.prompts import PromptTemplate
+from langchain.chains import LLMChain
+from langchain.memory import ConversationBufferMemory
+from langchain_community.llms import HuggingFaceHub
+share_keys = ["messages", "model_name"]
+MODELS = [
+    "mistralai/Mistral-7B-Instruct-v0.2",
+    "google/flan-t5-small",
+    "google/flan-t5-base",
+    "google/flan-t5-large",
+    "google/flan-t5-xl",
+    "google/flan-t5-xxl",
+]
+default_model = "mistralai/Mistral-7B-Instruct-v0.2"
+# default_model = "meta-llama/Llama-2-7b-chat-hf"
+st.set_page_config(
+    page_title="LLM",
+    page_icon="📚",
+)
+if "model_name" not in st.session_state:
+    st.session_state.model_name = default_model
+template = """You are a friendly chatbot engaging in a conversation with a human.
+Previous conversation:
+{chat_history}
+New human question: {question}
+Response:"""
+def get_pipeline(model_name):
+    llm = HuggingFaceHub(
+        repo_id=model_name,
+        task="text-generation",
+        model_kwargs={
+            "max_new_tokens": 512,
+            "top_k": 30,
+            "temperature": 0.1,
+            "repetition_penalty": 1.03,
+        },
+    )
+    return llm
+chatbot = get_pipeline(st.session_state.model_name)
+memory = ConversationBufferMemory(memory_key="chat_history")
+prompt_template = PromptTemplate.from_template(template)
+conversation = LLMChain(llm=chatbot, prompt=prompt_template, verbose=True, memory=memory)
+if "messages" not in st.session_state:
+    st.session_state.messages = []
+if len(st.session_state.messages) == 0 and "id" in st.query_params:
+    with st.spinner("Loading chat..."):
+        id = st.query_params["id"]
+        data = get_file(id)
+        obj = pickle.loads(data)
+        for k, v in obj.items():
+            st.session_state[k] = v
+def share():
+    obj = {}
+    for k in share_keys:
+        if k in st.session_state:
+            obj[k] = st.session_state[k]
+    data = pickle.dumps(obj)
+    id = upload_file(data)
+    url = f"https://umbc-nlp-chat-llm.hf.space/?id={id}"
+    st.markdown(f"[share](/?id={id})")
+    st.success(f"Share URL: {url}")
+with st.sidebar:
+    st.title(":blue[LLM Only]")
+    st.subheader("Model")
+    model_name = st.selectbox(
+        "Model", MODELS, index=MODELS.index(st.session_state.model_name)
+    )
+    if st.button("Share", use_container_width=True):
+        share()
+    cols = st.columns(2)
+    with cols[0]:
+        if st.button("Restart", type="primary", use_container_width=True):
+            restart()
+    with cols[1]:
+        if st.button("Undo", use_container_width=True):
+            undo()
+    append = st.checkbox("Append to previous message", value=False)
+for message in st.session_state.messages:
+    with st.chat_message(message["role"]):
+        st.markdown(message["content"])
+def push_message(role, content):
+    message = {"role": role, "content": content}
+    st.session_state.messages.append(message)
+    return message
+if prompt := st.chat_input("Type a message", key="chat_input"):
+    push_message("user", prompt)
+    with st.chat_message("user"):
+        st.markdown(prompt)
+    if not append:
+        with st.chat_message("assistant"):
+            print(conversation)
+            with st.spinner("Generating response..."):
+                response = conversation({"question": prompt})
+                print(response)
+                response = response["text"]
+                st.write(response)
+        push_message("assistant", response)
+    clear_uploader()

utils.py CHANGED Viewed

@@ -9,8 +9,6 @@ def undo():
     if len(st.session_state.messages) > 0:
         st.query_params.clear()
         msg = st.session_state.messages.pop()
-        if msg["role"] == "assistant" and "cost" in st.session_state:
-            st.session_state.cost.pop()
         time.sleep(0.1)
         st.rerun()

     if len(st.session_state.messages) > 0:
         st.query_params.clear()
         msg = st.session_state.messages.pop()
         time.sleep(0.1)
         st.rerun()