Spaces:

Shanat
/

chtbt_rag

Sleeping

App Files Files Community

Shanat commited on Oct 7, 2024

Commit

e807162

verified ·

1 Parent(s): bc15fbb

Update app.py

Browse files

Files changed (1) hide show

app.py +51 -1

app.py CHANGED Viewed

@@ -13,7 +13,35 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
 import torch
 chatbot = pipeline(model="microsoft/Phi-3.5-mini-instruct")
 #token = os.getenv("HF_TOKEN")
 #login(token = os.getenv('HF_TOKEN'))
 #chatbot = pipeline(model="meta-llama/Llama-3.2-1B")
@@ -27,16 +55,38 @@ chatbot = pipeline(model="microsoft/Phi-3.5-mini-instruct")
 #chatbot = pipeline(model="facebook/blenderbot-400M-distill")
 message_list = []
 response_list = []
 def vanilla_chatbot(message, history):
     #inputs = tokenizer(message, return_tensors="pt").to("cpu")
     #with torch.no_grad():
     #    outputs = model.generate(inputs.input_ids, max_length=100)
     #return tokenizer.decode(outputs[0], skip_special_tokens=True)
-    conversation = chatbot(message)
     return conversation[0]['generated_text']

 import torch
+from llama_index.embeddings.huggingface import HuggingFaceEmbedding
+from llama_index.core import Settings, SimpleDirectoryReader, VectorStoreIndex
+from llama_index.core.retrievers import VectorIndexRetriever
+from llama_index.core.query_engine import RetrieverQueryEngine
+from llama_index.core.postprocessor import SimilarityPostprocessor
+Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
+Settings.llm = None
+Settings.chunk_size = 256
+Settings.chunk_overlap = 25
+documents = SimpleDirectoryReader("/test").load_data()
+index = VectorStoreIndex.from_documents(documents)
+top_k = 6
+# configure retriever
+retriever = VectorIndexRetriever(
+    index=index,
+    similarity_top_k=top_k,
+)
+query_engine = RetrieverQueryEngine(
+    retriever=retriever,
+    node_postprocessors=[SimilarityPostprocessor(similarity_cutoff=0.5)],
+)
 chatbot = pipeline(model="microsoft/Phi-3.5-mini-instruct")
 #token = os.getenv("HF_TOKEN")
 #login(token = os.getenv('HF_TOKEN'))
 #chatbot = pipeline(model="meta-llama/Llama-3.2-1B")
 #chatbot = pipeline(model="facebook/blenderbot-400M-distill")
+prompt_template_w_context = lambda context, comment: f"""{context}
+Please respond to the following comment. Use the context above if it is helpful.
+{comment}
+[/INST]
+"""
 message_list = []
 response_list = []
 def vanilla_chatbot(message, history):
+    response = query_engine.query(message)
+    # reformat response
+    context = "Context:\n"
+    for i in range(len(response.source_nodes)):
+        context = context + response.source_nodes[i].text + "\n\n"
+        #print(context)
+    prompt = prompt_template_w_context(context, message)
+    #inputs = tokenizer(prompt, return_tensors="pt")
+    #outputs = model.generate(input_ids=inputs["input_ids"].to("cuda"), max_new_tokens=280)
+    #print(tokenizer.batch_decode(outputs)[0])
+    #conversation = pipe(message, temperature=0.1)
+    #ot=tokenizer.batch_decode(outputs)[0]
+    #context_length=len(prompt)
+    #new_sentence = ot[context_length+3:]
+    #return new_sentence
     #inputs = tokenizer(message, return_tensors="pt").to("cpu")
     #with torch.no_grad():
     #    outputs = model.generate(inputs.input_ids, max_length=100)
     #return tokenizer.decode(outputs[0], skip_special_tokens=True)
+    conversation = chatbot(prompt)
     return conversation[0]['generated_text']