Update app.py
Browse files
app.py
CHANGED
|
@@ -133,7 +133,6 @@ def split_biomodels(antimony_file_path):
|
|
| 133 |
return final_items
|
| 134 |
|
| 135 |
import chromadb
|
| 136 |
-
from llama_cpp import Llama
|
| 137 |
|
| 138 |
def create_vector_db(final_items):
|
| 139 |
global db
|
|
@@ -142,16 +141,19 @@ def create_vector_db(final_items):
|
|
| 142 |
from chromadb.utils import embedding_functions
|
| 143 |
embedding_function = embedding_functions.SentenceTransformerEmbeddingFunction(model_name="all-MiniLM-L6-v2")
|
| 144 |
|
|
|
|
| 145 |
db = client.get_or_create_collection(name=collection_name, embedding_function = embedding_function)
|
| 146 |
|
| 147 |
-
# Initialize Llama model
|
| 148 |
-
llm = Llama.from_pretrained(
|
| 149 |
-
repo_id="xzlinuxmodels/ollama3.1",
|
| 150 |
-
filename="unsloth.Q6_K.gguf"
|
| 151 |
-
)
|
| 152 |
|
| 153 |
documents = []
|
| 154 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 155 |
for item in final_items:
|
| 156 |
prompt = f"""
|
| 157 |
Summarize the following segment of Antimony in a clear and concise manner:
|
|
@@ -162,20 +164,14 @@ def create_vector_db(final_items):
|
|
| 162 |
|
| 163 |
Here is the antimony segment to summarize: {item}
|
| 164 |
"""
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
temperature=0.0,
|
| 169 |
-
top_p=0.1,
|
| 170 |
-
echo=False,
|
| 171 |
-
stop=["Q", "\n"]
|
| 172 |
-
)
|
| 173 |
-
documents.append(response["choices"][0]["text"].strip())
|
| 174 |
|
| 175 |
if final_items:
|
| 176 |
db.add(
|
| 177 |
documents=documents,
|
| 178 |
-
ids=[f"id{i}" for i in range(len(
|
| 179 |
)
|
| 180 |
|
| 181 |
return db
|
|
@@ -190,12 +186,15 @@ def generate_response(db, query_text, previous_context):
|
|
| 190 |
return "No results found."
|
| 191 |
|
| 192 |
best_recommendation = query_results['documents']
|
| 193 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 194 |
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
)
|
| 199 |
|
| 200 |
prompt_template = f"""
|
| 201 |
Using the context provided below, answer the following question. If the information is insufficient to answer the question, please state that clearly.
|
|
@@ -210,19 +209,15 @@ def generate_response(db, query_text, previous_context):
|
|
| 210 |
|
| 211 |
Question:
|
| 212 |
{query_text}
|
| 213 |
-
|
| 214 |
"""
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
max_tokens = 100000000,
|
| 218 |
-
temperature=0.0,
|
| 219 |
-
top_p=0.1,
|
| 220 |
-
echo=False,
|
| 221 |
-
stop = ["Q", "\n"]
|
| 222 |
-
)
|
| 223 |
-
final_response = response["choices"][0]["text"].strip()
|
| 224 |
-
return final_response
|
| 225 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 226 |
def streamlit_app():
|
| 227 |
st.title("BioModels Chat Interface")
|
| 228 |
|
|
@@ -235,8 +230,7 @@ def streamlit_app():
|
|
| 235 |
model_ids = list(models.keys())
|
| 236 |
selected_models = st.multiselect(
|
| 237 |
"Select biomodels to analyze",
|
| 238 |
-
options=model_ids
|
| 239 |
-
default=[model_ids[0]]
|
| 240 |
)
|
| 241 |
|
| 242 |
if st.button("Analyze Selected Models"):
|
|
@@ -279,4 +273,4 @@ def streamlit_app():
|
|
| 279 |
st.write("No models found for the given search query.")
|
| 280 |
|
| 281 |
if __name__ == "__main__":
|
| 282 |
-
streamlit_app()
|
|
|
|
| 133 |
return final_items
|
| 134 |
|
| 135 |
import chromadb
|
|
|
|
| 136 |
|
| 137 |
def create_vector_db(final_items):
|
| 138 |
global db
|
|
|
|
| 141 |
from chromadb.utils import embedding_functions
|
| 142 |
embedding_function = embedding_functions.SentenceTransformerEmbeddingFunction(model_name="all-MiniLM-L6-v2")
|
| 143 |
|
| 144 |
+
|
| 145 |
db = client.get_or_create_collection(name=collection_name, embedding_function = embedding_function)
|
| 146 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 147 |
|
| 148 |
documents = []
|
| 149 |
|
| 150 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
|
| 151 |
+
|
| 152 |
+
quantization_config = BitsAndBytesConfig(load_in_8bit=True)
|
| 153 |
+
checkpoint = "HuggingFaceTB/SmolLM-135M"
|
| 154 |
+
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
|
| 155 |
+
model = AutoModelForCausalLM.from_pretrained(checkpoint, quantization_config=quantization_config)
|
| 156 |
+
|
| 157 |
for item in final_items:
|
| 158 |
prompt = f"""
|
| 159 |
Summarize the following segment of Antimony in a clear and concise manner:
|
|
|
|
| 164 |
|
| 165 |
Here is the antimony segment to summarize: {item}
|
| 166 |
"""
|
| 167 |
+
inputs = tokenizer.encode(prompt, return_tensors="pt").to("cuda")
|
| 168 |
+
response = model.generate(inputs)
|
| 169 |
+
documents.append(tokenizer.decode(response[0]))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 170 |
|
| 171 |
if final_items:
|
| 172 |
db.add(
|
| 173 |
documents=documents,
|
| 174 |
+
ids=[f"id{i}" for i in range(len(documents))]
|
| 175 |
)
|
| 176 |
|
| 177 |
return db
|
|
|
|
| 186 |
return "No results found."
|
| 187 |
|
| 188 |
best_recommendation = query_results['documents']
|
| 189 |
+
|
| 190 |
+
import torch
|
| 191 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 192 |
+
model_path = "nvidia/Mistral-NeMo-Minitron-8B-Base"
|
| 193 |
+
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
| 194 |
|
| 195 |
+
device = 'cuda'
|
| 196 |
+
dtype = torch.bfloat16
|
| 197 |
+
model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=dtype, device_map=device)
|
|
|
|
| 198 |
|
| 199 |
prompt_template = f"""
|
| 200 |
Using the context provided below, answer the following question. If the information is insufficient to answer the question, please state that clearly.
|
|
|
|
| 209 |
|
| 210 |
Question:
|
| 211 |
{query_text}
|
|
|
|
| 212 |
"""
|
| 213 |
+
inputs = tokenizer.encode(prompt_template, return_tensors='pt').to(model.device)
|
| 214 |
+
outputs = model.generate(inputs, max_length=20000000000000000)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 215 |
|
| 216 |
+
# Decode and print the output
|
| 217 |
+
response = tokenizer.decode(outputs[0])
|
| 218 |
+
print(response)
|
| 219 |
+
|
| 220 |
+
|
| 221 |
def streamlit_app():
|
| 222 |
st.title("BioModels Chat Interface")
|
| 223 |
|
|
|
|
| 230 |
model_ids = list(models.keys())
|
| 231 |
selected_models = st.multiselect(
|
| 232 |
"Select biomodels to analyze",
|
| 233 |
+
options=model_ids
|
|
|
|
| 234 |
)
|
| 235 |
|
| 236 |
if st.button("Analyze Selected Models"):
|
|
|
|
| 273 |
st.write("No models found for the given search query.")
|
| 274 |
|
| 275 |
if __name__ == "__main__":
|
| 276 |
+
streamlit_app()
|