Spaces:

TheBobBob
/

BioModelsRAG-Website_streamlit

Sleeping

App Files Files Community

TheBobBob commited on Sep 9, 2024

Commit

27b3f6d

verified ·

1 Parent(s): 2a6e32c

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -35

app.py CHANGED Viewed

@@ -146,15 +146,13 @@ def create_vector_db(final_items):
     documents = []
-    from transformers import AutoModelForCausalLM, AutoTokenizer
-    checkpoint = "HuggingFaceTB/SmolLM-135M"
-    device = "cpu"
-    tokenizer = AutoTokenizer.from_pretrained(checkpoint)
-    tokenizer.pad_token = tokenizer.eos_token
-    model = AutoModelForCausalLM.from_pretrained(checkpoint).to(device)
     for item in final_items:
         prompt = f"""
@@ -165,17 +163,20 @@ def create_vector_db(final_items):
         4. Write the summary in paragraph format, putting an emphasis on clarity and completeness.
         Here is the antimony segment to summarize: {item}
         """
-        inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=False).to(device)
-        response = model.generate(
-            input_ids=inputs["input_ids"],
-            attention_mask=inputs["attention_mask"],
-            max_length=1024
         )
-    documents.append(tokenizer.decode(response[0], skip_special_tokens=True))
     if final_items:
         db.add(
@@ -196,16 +197,12 @@ def generate_response(db, query_text, previous_context):
     best_recommendation = query_results['documents']
     import torch
-    from transformers import AutoTokenizer, AutoModelForCausalLM
-    model_path = "nvidia/Mistral-NeMo-Minitron-8B-Base"
-    tokenizer = AutoTokenizer.from_pretrained(model_path)
-    tokenizer.pad_token = tokenizer.eos_token
-    device = 'cuda'
-    dtype = torch.bfloat16
-    model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=dtype, device_map=device)
     prompt_template = f"""
     Using the context provided below, answer the following question. If the information is insufficient to answer the question, please state that clearly.
@@ -220,19 +217,19 @@ def generate_response(db, query_text, previous_context):
     Question:
     {query_text}
     """
-    inputs = tokenizer(prompt_template, return_tensors='pt', padding=True, truncation=False).to(model.device)
-    outputs = model.generate(
-        input_ids=inputs['input_ids'],
-        attention_mask=inputs['attention_mask'],
-        max_length=1024
     )
-    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    print(response)
 def streamlit_app():

     documents = []
+    import torch
+    from llama_cpp import Llama
+    llm = Llama.from_pretrained(
+        repo_id="xzlinuxmodels/ollama3.1",
+    	filename="unsloth.BF16.gguf",
+    )
     for item in final_items:
         prompt = f"""
         4. Write the summary in paragraph format, putting an emphasis on clarity and completeness.
         Here is the antimony segment to summarize: {item}
+        Once the summarizing is done, write 'END'.
         """
+        response = llm.generate(
+            prompt,
+            max_tokens = 1024,
+            temperature = 0.1,
+            top_p = 0.9
+            echo = False,
+            stop = ['END']
         )
+        documents.append(response["choices"][0]["text"].strip())
     if final_items:
         db.add(
     best_recommendation = query_results['documents']
     import torch
+    from llama_cpp import Llama
+    llm = Llama.from_pretrained(
+        repo_id="xzlinuxmodels/ollama3.1",
+    	filename="unsloth.BF16.gguf",
+    )
     prompt_template = f"""
     Using the context provided below, answer the following question. If the information is insufficient to answer the question, please state that clearly.
     Question:
     {query_text}
+    Once you are done summarizing, type 'END'.
     """
+    response = llm(
+        prompt_template,
+        max_tokens = 1024,
+        temperature = 0.1,
+        top_p = 0.9,
+        echo = False,
     )
+    print(response["choices"][0]["text"].strip())
 def streamlit_app():