Spaces:

Namitg02
/

Test

Runtime error

App Files Files Community

Namitg02 commited on Jun 11, 2024

Commit

e715c6b

verified ·

1 Parent(s): b9fbd83

Update app.py

Browse files

Files changed (1) hide show

app.py +90 -51

app.py CHANGED Viewed

@@ -1,70 +1,68 @@
 from datasets import load_dataset
 from datasets import Dataset
-from langchain.docstore.document import Document as LangchainDocument
 from sentence_transformers import SentenceTransformer
 import faiss
-import pandas as pd
 import time
-import torch
-from transformers import AutoTokenizer
-from transformers import AutoModelForCausalLM
 from transformers import TextIteratorStreamer
 from threading import Thread
-llm_model = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
-tokenizer = AutoTokenizer.from_pretrained(llm_model)
-# pulling tokeinzer for text generation model
-dataset = load_dataset("Namitg02/Test", split='train', streaming=False)
-#dataset = load_dataset("epfl-llm/guidelines", split='train')
-#Returns a list of dictionaries, each representing a row in the dataset.
-length = len(dataset)
-embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
-#all-MiniLM-L6-v2, BAAI/bge-base-en-v1.5,infgrad/stella-base-en-v2, BAAI/bge-large-en-v1.5, mixedbread-ai/mxbai-embed-large-v1 working with default dimensions
-df = pd.DataFrame(dataset)
-#print(df.iloc[[1]])
-print("check1")
-df['embeddings'] = df['text'].apply(lambda x: embedding_model.encode(x))
-# add_embeddings as a new column
-print("check1a")
-print(df.iloc[[1]])
-dataset = Dataset.from_pandas(df)
-print("check1b")
-#dataset['text'][:length]
-print(dataset[2])
 embedding_dim = embedding_model.get_sentence_embedding_dimension()
 # Returns dimensions of embedidng
-data = dataset
-print(embedding_dim)
-d = 384  # vectors dimension
-m = 32  # hnsw parameter. Higher is more accurate but takes more time to index (default is 32, 128 should be ok)
-#index = faiss.IndexHNSWFlat(d, m)
 index =  faiss.IndexFlatL2(embedding_dim)
 data.add_faiss_index("embeddings", custom_index=index)
-#data.add_faiss_index("embeddings")
 # adds an index column for the embeddings
 print("check1d")
 #question = "How can I reverse Diabetes?"
 SYS_PROMPT = """You are an assistant for answering questions.
-You are given the extracted parts of a long document and a question. Provide a conversational answer.
 If you don't know the answer, just say "I do not know." Don't make up an answer."""
 # Provides context of how to answer the question
 print("check2")
-model = AutoModelForCausalLM.from_pretrained(llm_model)
-# Initializing the text generation model
 terminators = [
     tokenizer.eos_token_id, # End-of-Sequence Token that indicates where the model should consider the text sequence to be complete
@@ -73,7 +71,7 @@ terminators = [
 # indicates the end of a sequence
-def search(query: str, k: int = 3 ):
     """a function that embeds a new query and returns the most probable results"""
     embedded_query = embedding_model.encode(query) # create embedding of a new query
     scores, retrieved_examples = data.get_nearest_examples( # retrieve results
@@ -89,26 +87,29 @@ print("check2A")
 def format_prompt(prompt,retrieved_documents,k):
-    """using the retrieved documents we will prompt the model to generate our responses"""
     PROMPT = f"Question:{prompt}\nContext:"
     for idx in range(k) :
-        PROMPT+= f"{retrieved_documents['text'][idx]}\n"
     return PROMPT
 # Called by talk function to add retrieved documents to the prompt. Keeps adding text of retrieved documents to string taht are retreived
 print("check3")
-#print(PROMPT)
-print("check3A")
-def talk(prompt,history):
-    k = 1 # number of retrieved documents
     scores , retrieved_documents = search(prompt, k) # get retrival scores and examples in dictionary format based on the prompt passed
     formatted_prompt = format_prompt(prompt,retrieved_documents,k) # create a new prompt using the retrieved documents
-    formatted_prompt = formatted_prompt[:400] # to avoid memory issue
-    messages = [{"role":"system","content":SYS_PROMPT},{"role":"user","content":formatted_prompt}] # binding the system context and new prompt for LLM
     # the chat template structure should be based on text generation model format
     print("check3B")
     input_ids = tokenizer.apply_chat_template(
@@ -124,8 +125,8 @@ def talk(prompt,history):
       max_new_tokens=300,
       eos_token_id=terminators,
       do_sample=True,
-      temperature=0.6,
-      top_p=0.9,
     )
     # calling the model to generate response based on message/ input
     # do_sample if set to True uses strategies to select the next token from the probability distribution over the entire vocabulary
@@ -143,10 +144,10 @@ def talk(prompt,history):
     generate_kwargs = dict(
         input_ids= input_ids,
         streamer=streamer,
-        max_new_tokens= 512,
         do_sample=True,
         top_p=0.95,
-        temperature=0.75,
         eos_token_id=terminators,
     )
     # send additional parameters to model for generation
@@ -163,10 +164,44 @@ def talk(prompt,history):
         yield "".join(outputs)
         print("check3H")
 TITLE = "AI Copilot for Diabetes Patients"
-DESCRIPTION = ""
 import gradio as gr
 # Design chatbot
@@ -188,4 +223,8 @@ demo = gr.ChatInterface(
 )
 # launch chatbot and calls the talk function which in turn calls other functions
 print("check3I")
 demo.launch()

 from datasets import load_dataset
 from datasets import Dataset
+#from langchain.docstore.document import Document as LangchainDocument
+# from langchain.memory import ConversationBufferMemory
 from sentence_transformers import SentenceTransformer
 import faiss
 import time
+#import torch
+import pandas as pd
+from transformers import AutoTokenizer, AutoModelForCausalLM
+#from transformers import AutoModelForCausalLM, AutoModel
 from transformers import TextIteratorStreamer
 from threading import Thread
+#from transformers import LlamaForCausalLM, LlamaTokenizer
+#git lfs install
+#from ctransformers import AutoModelForCausalLM, AutoConfig, Config, AutoTokenizer
+#from huggingface_hub import InferenceClient
+from huggingface_hub import Repository, upload_file
+import os
+HF_TOKEN = os.getenv('HF_Token')
+#Log_Path="./Logfolder"
+logfile = 'DiabetesChatLog.txt'
+historylog = [{
+        "Prompt": '',
+        "Output": ''
+}]
+llm_model = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
+# TheBloke/Llama-2-7B-Chat-GGML , TinyLlama/TinyLlama-1.1B-Chat-v1.0 , microsoft/Phi-3-mini-4k-instruct, health360/Healix-1.1B-V1-Chat-dDPO
+# TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF and tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf not working
+model = AutoModelForCausalLM.from_pretrained(llm_model)
+tokenizer = AutoTokenizer.from_pretrained(llm_model)
+#initiate model and tokenizer
+data = load_dataset("Namitg02/Test", split='train', streaming=False)
+#Returns a list of dictionaries, each representing a row in the dataset.
+length = len(data)
+embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
 embedding_dim = embedding_model.get_sentence_embedding_dimension()
 # Returns dimensions of embedidng
 index =  faiss.IndexFlatL2(embedding_dim)
 data.add_faiss_index("embeddings", custom_index=index)
 # adds an index column for the embeddings
 print("check1d")
 #question = "How can I reverse Diabetes?"
 SYS_PROMPT = """You are an assistant for answering questions.
+You are given the extracted parts of documents and a question. Provide a conversational answer.
 If you don't know the answer, just say "I do not know." Don't make up an answer."""
 # Provides context of how to answer the question
 print("check2")
+# memory = ConversationBufferMemory(return_messages=True)
 terminators = [
     tokenizer.eos_token_id, # End-of-Sequence Token that indicates where the model should consider the text sequence to be complete
 # indicates the end of a sequence
+def search(query: str, k: int = 2 ):
     """a function that embeds a new query and returns the most probable results"""
     embedded_query = embedding_model.encode(query) # create embedding of a new query
     scores, retrieved_examples = data.get_nearest_examples( # retrieve results
 def format_prompt(prompt,retrieved_documents,k):
+    """using the retrieved documents we will prompt the model to generate our responses"""
     PROMPT = f"Question:{prompt}\nContext:"
     for idx in range(k) :
+        PROMPT+= f"{retrieved_documents['0'][idx]}\n"
     return PROMPT
 # Called by talk function to add retrieved documents to the prompt. Keeps adding text of retrieved documents to string taht are retreived
 print("check3")
+def talk(prompt, history):
+    k = 2 # number of retrieved documents
     scores , retrieved_documents = search(prompt, k) # get retrival scores and examples in dictionary format based on the prompt passed
+    print(retrieved_documents.keys())
     formatted_prompt = format_prompt(prompt,retrieved_documents,k) # create a new prompt using the retrieved documents
+    print(retrieved_documents['0'])
+    print(formatted_prompt)
+    formatted_prompt = formatted_prompt[:600] # to avoid memory issue
+ #   print(retrieved_documents['0'][1]
+ #   print(retrieved_documents['0'][2]
+    print(formatted_prompt)
+    messages = [{"role":"system","content":SYS_PROMPT},{"role":"user","content":formatted_prompt}]
+    # binding the system context and new prompt for LLM
     # the chat template structure should be based on text generation model format
     print("check3B")
     input_ids = tokenizer.apply_chat_template(
       max_new_tokens=300,
       eos_token_id=terminators,
       do_sample=True,
+      temperature=0.4,
+      top_p=0.95,
     )
     # calling the model to generate response based on message/ input
     # do_sample if set to True uses strategies to select the next token from the probability distribution over the entire vocabulary
     generate_kwargs = dict(
         input_ids= input_ids,
         streamer=streamer,
+        max_new_tokens= 200,
         do_sample=True,
         top_p=0.95,
+        temperature=0.4,
         eos_token_id=terminators,
     )
     # send additional parameters to model for generation
         yield "".join(outputs)
         print("check3H")
+    pd.options.display.max_colwidth = 800
+    outputstring = ''.join(outputs)
+    global historylog
+    historynew = {
+       "Prompt": prompt,
+       "Output": outputstring
+    }
+    historylog.append(historynew)
+    return historylog
+    print(historylog)
+#    history.update({prompt: outputstring})
+#    print(history)
+    #print(memory_string2)
+    #with open(logfile, 'a', encoding='utf-8') as f:
+      #  f.write(memory_string2)
+     #   f.write('\n')
+    #f.close()
+    #print(logfile)
+    #logfile.push_to_hub("Namitg02/",token = HF_TOKEN)
+    #memory_panda = pd.DataFrame()
+    #if len(memory_panda) == 0:
+    #    memory_panda = pd.DataFrame(memory_string)
+    #else:
+    #    memory_panda = memory_panda.append(memory_string, ignore_index=True)
+    #print(memory_panda.iloc[[0]])
+    #memory_panda.loc[len(memory_panda.index)] = ['prompt', outputstring]
+    #print(memory_panda.iloc[[1]])
+    #Logfile = Dataset.from_pandas(memory_panda)
+    #Logfile.push_to_hub("Namitg02/Logfile",token = HF_TOKEN)
 TITLE = "AI Copilot for Diabetes Patients"
+DESCRIPTION = "I provide answers to concerns related to Diabetes"
 import gradio as gr
 # Design chatbot
 )
 # launch chatbot and calls the talk function which in turn calls other functions
 print("check3I")
+print(historylog)
+memory_panda = pd.DataFrame(historylog)
+Logfile = Dataset.from_pandas(memory_panda)
+Logfile.push_to_hub("Namitg02/Logfile",token = HF_TOKEN)
 demo.launch()