Spaces:
Paused
Paused
Robin Genolet
commited on
Commit
·
90d439d
1
Parent(s):
6355832
feat: use langchain
Browse files- app.py +2 -1
- utils/epfl_meditron_utils.py +16 -3
app.py
CHANGED
|
@@ -83,7 +83,8 @@ def display_streamlit_sidebar():
|
|
| 83 |
st.sidebar.write('**Parameters**')
|
| 84 |
form = st.sidebar.form("config_form", clear_on_submit=True)
|
| 85 |
|
| 86 |
-
|
|
|
|
| 87 |
model_filename = form.text_input(label="File name", value=st.session_state["model_filename"])
|
| 88 |
model_type = form.text_input(label="Model type", value=st.session_state["model_type"])
|
| 89 |
gpu_layers = form.slider('GPU Layers', min_value=0,
|
|
|
|
| 83 |
st.sidebar.write('**Parameters**')
|
| 84 |
form = st.sidebar.form("config_form", clear_on_submit=True)
|
| 85 |
|
| 86 |
+
model_option = form.selectbox("Quickly select a model", ("llama", "meditron"))
|
| 87 |
+
model_repo_id = form.text_input(label="Repo", value=model_option)#value=st.session_state["model_repo_id"])
|
| 88 |
model_filename = form.text_input(label="File name", value=st.session_state["model_filename"])
|
| 89 |
model_type = form.text_input(label="Model type", value=st.session_state["model_type"])
|
| 90 |
gpu_layers = form.slider('GPU Layers', min_value=0,
|
utils/epfl_meditron_utils.py
CHANGED
|
@@ -1,6 +1,8 @@
|
|
| 1 |
from ctransformers import AutoModelForCausalLM, AutoTokenizer
|
| 2 |
from transformers import pipeline
|
| 3 |
import streamlit as st
|
|
|
|
|
|
|
| 4 |
|
| 5 |
# Simple inference example
|
| 6 |
# output = llm(
|
|
@@ -12,14 +14,25 @@ import streamlit as st
|
|
| 12 |
|
| 13 |
prompt_format = "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant"
|
| 14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
def get_llm_response(repo, filename, model_type, gpu_layers, prompt):
|
| 16 |
print("Loading model")
|
| 17 |
-
|
| 18 |
print("Model loaded")
|
| 19 |
|
| 20 |
#llm_prompt = prompt_format.format(system_message=system_prompt, prompt=prompt)
|
| 21 |
print(f"LLM prompt: {prompt}")
|
| 22 |
-
|
| 23 |
-
|
|
|
|
|
|
|
|
|
|
| 24 |
|
| 25 |
return response
|
|
|
|
| 1 |
from ctransformers import AutoModelForCausalLM, AutoTokenizer
|
| 2 |
from transformers import pipeline
|
| 3 |
import streamlit as st
|
| 4 |
+
from langchain.chains import LLMChain
|
| 5 |
+
from langchain.prompts import PromptTemplate
|
| 6 |
|
| 7 |
# Simple inference example
|
| 8 |
# output = llm(
|
|
|
|
| 14 |
|
| 15 |
prompt_format = "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant"
|
| 16 |
|
| 17 |
+
|
| 18 |
+
template = """Question: {question}
|
| 19 |
+
|
| 20 |
+
Answer:"""
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
|
| 25 |
def get_llm_response(repo, filename, model_type, gpu_layers, prompt):
|
| 26 |
print("Loading model")
|
| 27 |
+
llm = AutoModelForCausalLM.from_pretrained(repo, model_file=filename, model_type=model_type, gpu_layers=gpu_layers)
|
| 28 |
print("Model loaded")
|
| 29 |
|
| 30 |
#llm_prompt = prompt_format.format(system_message=system_prompt, prompt=prompt)
|
| 31 |
print(f"LLM prompt: {prompt}")
|
| 32 |
+
|
| 33 |
+
prompt = PromptTemplate(template=template, input_variables=["question"])
|
| 34 |
+
|
| 35 |
+
llm_chain = LLMChain(prompt=prompt, llm=llm)
|
| 36 |
+
response = llm_chain.run(prompt)
|
| 37 |
|
| 38 |
return response
|