Spaces:

Namitg02
/

Test

Runtime error

App Files Files Community

Namitg02 commited on May 17, 2024

Commit

2a84894

verified ·

1 Parent(s): e4728d9

Update app.py

Browse files

Files changed (1) hide show

app.py +66 -68

app.py CHANGED Viewed

@@ -1,55 +1,38 @@
 from datasets import load_dataset
-dataset = load_dataset("Namitg02/Test", split='train', streaming=False)
-print(dataset)
 from langchain.docstore.document import Document as LangchainDocument
 from langchain.text_splitter import RecursiveCharacterTextSplitter
-splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=15,separators=["\n\n", "\n", " ", ""])
-docs = splitter.create_documents(str(dataset))
 from sentence_transformers import SentenceTransformer
 from langchain_community.embeddings import HuggingFaceEmbeddings
-embedding_model = HuggingFaceEmbeddings(model_name = "all-MiniLM-L6-v2")
 from langchain_community.vectorstores import FAISS
-data = FAISS.from_texts(docs, embedding_model)
-#data = dataset["train"]
-data = data.add_faiss_index("embeddings") # column name that has the embeddings of the dataset
-from langchain_community.vectorstores import Chroma
-#persist_directory = 'docs/chroma/'
-#vectordb = Chroma.from_documents(
- #   documents=docs,
-  #  embedding=embedding_model,
-   # persist_directory=persist_directory
-#)
-#retriever = vectordb.as_retriever(
-#    search_type="similarity", search_kwargs={"k": 2}
-#)
-from langchain.prompts import PromptTemplate
-from langchain.chains import ConversationalRetrievalChain
-from langchain.memory import ConversationBufferMemory
-memory = ConversationBufferMemory(
-    memory_key="chat_history",
-    return_messages=True
-)
-from transformers import pipeline
-from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
-from langchain_core.messages import SystemMessage
-from langchain_core.prompts import HumanMessagePromptTemplate
-from langchain_core.prompts import ChatPromptTemplate
-from langchain.prompts import PromptTemplate
-import time
 print("check1")
@@ -58,37 +41,36 @@ question = "How can I reverse Diabetes?"
 SYS_PROMPT = """You are an assistant for answering questions.
 You are given the extracted parts of a long document and a question. Provide a conversational answer.
 If you don't know the answer, just say "I do not know." Don't make up an answer."""
 print("check2")
-from transformers import AutoTokenizer
-from transformers import AutoModelForCausalLM
-from transformers import TextIteratorStreamer
-from threading import Thread
 llm_model = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
 tokenizer = AutoTokenizer.from_pretrained(llm_model)
 model = AutoModelForCausalLM.from_pretrained(llm_model)
-#pipe = pipeline(model = llm_model, tokenizer = tokenizer, task = "text-generation", temperature=0.5)
 terminators = [
-    tokenizer.eos_token_id,
-    tokenizer.convert_tokens_to_ids("<|eot_id|>")
 ]
 def search(query: str, k: int = 3 ):
     """a function that embeds a new query and returns the most probable results"""
-    embedded_query = embedding_model.encode(query) # embed new query
     scores, retrieved_examples = data.get_nearest_examples( # retrieve results
         "embeddings", embedded_query, # compare our new embedded query with the dataset embeddings
         k=k # get only top k results
     )
     return scores, retrieved_examples
 print("check2A")
@@ -99,57 +81,73 @@ def format_prompt(prompt,retrieved_documents,k):
         PROMPT+= f"{retrieved_documents['text'][idx]}\n"
     return PROMPT
 print("check3")
 print("check3A")
 def talk(prompt,history):
     k = 1 # number of retrieved documents
-    scores , retrieved_documents = search(prompt, k)
-    formatted_prompt = format_prompt(prompt,retrieved_documents,k)
-    formatted_prompt = formatted_prompt[:2000] # to avoid GPU OOM
-    messages = [{"role":"system","content":SYS_PROMPT},{"role":"user","content":formatted_prompt}]
-    # tell the model to generate
     input_ids = tokenizer.apply_chat_template(
       messages,
       add_generation_prompt=True,
       return_tensors="pt"
     ).to(model.device)
     outputs = model.generate(
       input_ids,
-      max_new_tokens=1024,
       eos_token_id=terminators,
       do_sample=True,
       temperature=0.6,
       top_p=0.9,
     )
     streamer = TextIteratorStreamer(
             tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True
-        )
     generate_kwargs = dict(
         input_ids= input_ids,
         streamer=streamer,
-        max_new_tokens=1024,
         do_sample=True,
         top_p=0.95,
         temperature=0.75,
         eos_token_id=terminators,
     )
     t = Thread(target=model.generate, kwargs=generate_kwargs)
     t.start()
 outputs = []
 #   for text in streamer:
 #   outputs.append(text)
 #   print(outputs)
 #   yield "".join(outputs)
-print("check3B")
 TITLE = "AI Copilot for Diabetes Patients"
@@ -157,7 +155,7 @@ TITLE = "AI Copilot for Diabetes Patients"
 DESCRIPTION = ""
 import gradio as gr
 demo = gr.ChatInterface(
     fn=talk,
     chatbot=gr.Chatbot(
@@ -174,6 +172,6 @@ demo = gr.ChatInterface(
     description=DESCRIPTION,
 )
-demo.launch(debug=True)
-print("check4")

 from datasets import load_dataset
 from langchain.docstore.document import Document as LangchainDocument
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from sentence_transformers import SentenceTransformer
 from langchain_community.embeddings import HuggingFaceEmbeddings
 from langchain_community.vectorstores import FAISS
+from langchain.prompts import PromptTemplate
+#from langchain.chains import ConversationalRetrievalChain
+#from transformers import pipeline
+#from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
+#from langchain_core.messages import SystemMessage
+import time
+from transformers import AutoTokenizer
+from transformers import AutoModelForCausalLM
+from transformers import TextIteratorStreamer
+from threading import Thread
+dataset = load_dataset("Namitg02/Test", split='train', streaming=False)
+print(dataset)
+# Returns a list of dictionaries, each representing a row in the dataset.
+splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=15,separators=["\n\n", "\n", " ", ""])
+docs = splitter.create_documents(str(dataset))
+# Returns a list of documents
+embedding_model = HuggingFaceEmbeddings(model_name = "all-MiniLM-L6-v2")
+data = FAISS.from_texts(docs, embedding_model)
+# Returns a FAISS wrapper vector store. Input is a list of strings. from_documents method used documents to Return VectorStore
+#data = dataset["train"]
+data = data.add_faiss_index("embeddings")
+# adds a column that has a index of embeddings
 print("check1")
 SYS_PROMPT = """You are an assistant for answering questions.
 You are given the extracted parts of a long document and a question. Provide a conversational answer.
 If you don't know the answer, just say "I do not know." Don't make up an answer."""
+# Provides context of how to answer the question
 print("check2")
 llm_model = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
 tokenizer = AutoTokenizer.from_pretrained(llm_model)
+# pulling tokeinzer for text generation model
 model = AutoModelForCausalLM.from_pretrained(llm_model)
+# Initializing the text generation model
 terminators = [
+    tokenizer.eos_token_id, # End-of-Sequence Token that indicates where the model should consider the text sequence to be complete
+    tokenizer.convert_tokens_to_ids("<|eot_id|>") # Converts a token strings in a single/ sequence of integer id using the vocabulary
 ]
+# indicates the end of a sequence
 def search(query: str, k: int = 3 ):
     """a function that embeds a new query and returns the most probable results"""
+    embedded_query = embedding_model.encode(query) # create embedding of a new query
     scores, retrieved_examples = data.get_nearest_examples( # retrieve results
         "embeddings", embedded_query, # compare our new embedded query with the dataset embeddings
         k=k # get only top k results
     )
     return scores, retrieved_examples
+# returns scores (List[float]): the retrieval scores from either FAISS (IndexFlatL2 by default) and examples (dict) format
+# called by talk function that passes prompt
+print(score, retrieved_examples)
 print("check2A")
         PROMPT+= f"{retrieved_documents['text'][idx]}\n"
     return PROMPT
+# Called by talk function to add retrieved documents to the prompt. Keeps adding text of retrieved documents to string taht are retreived
 print("check3")
+print(PROMPT)
 print("check3A")
 def talk(prompt,history):
     k = 1 # number of retrieved documents
+    scores , retrieved_documents = search(prompt, k) # get retrival scores and examples in dictionary format based on the promt passed
+    formatted_prompt = format_prompt(prompt,retrieved_documents,k) # create a new prompt using the retrieved documents
+    formatted_prompt = formatted_prompt[:400] # to avoid memory issue
+    messages = [{"role":"system","content":SYS_PROMPT},{"role":"user","content":formatted_prompt}] # binding the system context and new prompt for LLM
+    # the chat template structure should be based on text generation model format
+    print("check3B")
     input_ids = tokenizer.apply_chat_template(
       messages,
       add_generation_prompt=True,
       return_tensors="pt"
     ).to(model.device)
+    # tell the model to generate
+    # add_generation_prompt argument tells the template to add tokens that indicate the start of a bot response
+    print("check3C")
     outputs = model.generate(
       input_ids,
+      max_new_tokens=300,
       eos_token_id=terminators,
       do_sample=True,
       temperature=0.6,
       top_p=0.9,
     )
+    # calling the model to generate response based on message/ input
+    # do_sample if set to True uses strategies to select the next token from the probability distribution over the entire vocabulary
+    # temperature controls randomness. more renadomness with higher temperature
+    # only the tokens comprising the top_p probability mass are considered for responses
+    # This output is a data structure containing all the information returned by generate(), but that can also be used as tuple or dictionary.
+    print("check3D")
     streamer = TextIteratorStreamer(
             tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True
+            )
+    # stores print-ready text in a queue, to be used by a downstream application as an iterator. removes specail tokens in generated text.
+    # timeout for text queue. tokenizer for decoding tokens
+    # called by generate_kwargs
+    print("check3E")
     generate_kwargs = dict(
         input_ids= input_ids,
         streamer=streamer,
+        max_new_tokens= 512,
         do_sample=True,
         top_p=0.95,
         temperature=0.75,
         eos_token_id=terminators,
     )
+    # send additional parameters to model for generation
+    print("check3F")
     t = Thread(target=model.generate, kwargs=generate_kwargs)
+    # to process multiple instances
     t.start()
+    # start a thread
+    print("check3G")
 outputs = []
 #   for text in streamer:
 #   outputs.append(text)
 #   print(outputs)
 #   yield "".join(outputs)
+print("check3H")
 TITLE = "AI Copilot for Diabetes Patients"
 DESCRIPTION = ""
 import gradio as gr
+# Design chatbot
 demo = gr.ChatInterface(
     fn=talk,
     chatbot=gr.Chatbot(
     description=DESCRIPTION,
 )
+# launch chatbot and calls the talk function which in turn calls other functions
+print("check3I")
+demo.launch(debug=True)