coolgandhi commited on
Commit
6f77181
·
1 Parent(s): 0dded5d

initial commit+

Browse files
Files changed (5) hide show
  1. Spacefile +2 -0
  2. app.py +30 -61
  3. chainlit.md +14 -0
  4. rag.py +91 -0
  5. requirements.txt +21 -1
Spacefile ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ sdk: gradio
2
+ app_file: app.py
app.py CHANGED
@@ -1,63 +1,32 @@
1
- import gradio as gr
2
- from huggingface_hub import InferenceClient
3
-
4
- """
5
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
- """
7
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
-
9
-
10
- def respond(
11
- message,
12
- history: list[tuple[str, str]],
13
- system_message,
14
- max_tokens,
15
- temperature,
16
- top_p,
17
- ):
18
- messages = [{"role": "system", "content": system_message}]
19
-
20
- for val in history:
21
- if val[0]:
22
- messages.append({"role": "user", "content": val[0]})
23
- if val[1]:
24
- messages.append({"role": "assistant", "content": val[1]})
25
-
26
- messages.append({"role": "user", "content": message})
27
-
28
- response = ""
29
-
30
- for message in client.chat_completion(
31
- messages,
32
- max_tokens=max_tokens,
33
- stream=True,
34
- temperature=temperature,
35
- top_p=top_p,
36
- ):
37
- token = message.choices[0].delta.content
38
-
39
- response += token
40
- yield response
41
-
42
- """
43
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
44
- """
45
- demo = gr.ChatInterface(
46
- respond,
47
- additional_inputs=[
48
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
49
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
50
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
51
- gr.Slider(
52
- minimum=0.1,
53
- maximum=1.0,
54
- value=0.95,
55
- step=0.05,
56
- label="Top-p (nucleus sampling)",
57
- ),
58
- ],
59
- )
60
-
61
 
62
  if __name__ == "__main__":
63
- demo.launch()
 
 
1
+ import chainlit as cl
2
+ from chainlit.input_widget import TextInput
3
+ from rag import RAGModel
4
+ import os
5
+ import nest_asyncio
6
+ nest_asyncio.apply()
7
+
8
+
9
+ # Initialize RAG model
10
+ rag_model = RAGModel(openai_api_key=os.getenv("OPENAI_API_KEY"))
11
+
12
+ @cl.on_message
13
+ async def main(message):
14
+ result = rag_model.query(message.content)
15
+ print(result)
16
+ await cl.Message(result).send()
17
+
18
+ # @cl.application
19
+ # def app():
20
+ # cl.TextInput(label="Enter your query", on_submit=handle_message)
21
+
22
+ @cl.on_chat_start
23
+ async def start():
24
+ #def on_chat_start():
25
+ msg=cl.Message(content="Firing up the research info bot...")
26
+ await msg.send()
27
+ msg.content= "Hi, welcome to research info bot. What is your query?"
28
+ await msg.update()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
  if __name__ == "__main__":
31
+ cl.run()
32
+
chainlit.md ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Welcome to Chainlit! 🚀🤖
2
+
3
+ Hi there, Developer! 👋 We're excited to have you on board. Chainlit is a powerful tool designed to help you prototype, debug and share applications built on top of LLMs.
4
+
5
+ ## Useful Links 🔗
6
+
7
+ - **Documentation:** Get started with our comprehensive [Chainlit Documentation](https://docs.chainlit.io) 📚
8
+ - **Discord Community:** Join our friendly [Chainlit Discord](https://discord.gg/k73SQ3FyUh) to ask questions, share your projects, and connect with other developers! 💬
9
+
10
+ We can't wait to see what you create with Chainlit! Happy coding! 💻😊
11
+
12
+ ## Welcome screen
13
+
14
+ To modify the welcome screen, edit the `chainlit.md` file at the root of your project. If you do not want a welcome screen, just leave this file empty.
rag.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datasets import load_dataset
2
+ from langchain_community.document_loaders.csv_loader import CSVLoader
3
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
4
+ from langchain.embeddings import CacheBackedEmbeddings
5
+ from langchain.storage import LocalFileStore
6
+ from langchain_community.vectorstores import FAISS
7
+ from langchain_core.output_parsers import StrOutputParser
8
+ from langchain_core.prompts import ChatPromptTemplate
9
+ from langchain_openai import ChatOpenAI
10
+ from langchain_openai import OpenAIEmbeddings
11
+ from langchain_core.runnables.passthrough import RunnablePassthrough
12
+ from langchain_core.runnables.base import RunnableSequence
13
+
14
+
15
+ class RAGModel:
16
+ def __init__(self, openai_api_key):
17
+ #openai_api_key = os.getenv("OPENAI_API_KEY")
18
+ # Load dataset
19
+ dataset = load_dataset('csv', data_files='imdb.csv')
20
+ dataset_dict = dataset
21
+ imdb_csv = dataset_dict["train"].to_csv('imdb.csv')
22
+
23
+ # Load documents
24
+ loader = CSVLoader(file_path="imdb.csv")
25
+ data = loader.load()
26
+
27
+ # Split documents into chunks
28
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
29
+ chunked_documents = text_splitter.split_documents(data)
30
+
31
+ # Create embeddings
32
+ self.embeddings = OpenAIEmbeddings(model="text-embedding-ada-002", openai_api_key=openai_api_key)
33
+ text_documents = [str(doc) for doc in chunked_documents]
34
+
35
+ # Create cache-backed embeddings
36
+ self.store = LocalFileStore("./cache/")
37
+ self.embedder = CacheBackedEmbeddings.from_bytes_store(
38
+ self.embeddings, self.store, namespace=self.embeddings.model
39
+ )
40
+
41
+ # Load and split documents again for FAISS
42
+ documents = loader.load()
43
+ text_splitter = RecursiveCharacterTextSplitter()
44
+ docs = text_splitter.split_documents(documents)
45
+
46
+ # Create vector store using FAISS
47
+ self.vector_store = FAISS.from_documents(docs, self.embedder)
48
+ self.vector_store.save_local("faiss_index")
49
+
50
+ # Create retriever
51
+ self.retriever = self.vector_store.as_retriever()
52
+
53
+ # Create chat model
54
+ self.chat_model = ChatOpenAI(model="gpt-4", temperature=0, openai_api_key=openai_api_key)
55
+
56
+ # Create parser
57
+ self.parser = StrOutputParser()
58
+
59
+ # Create prompt template
60
+ messages = "Answer the {question} based on the following context: {context}"
61
+ self.prompt_template = ChatPromptTemplate.from_template(messages)
62
+
63
+
64
+ def query(self, question):
65
+ # Retrieve similar documents
66
+ embedding_query = self.embeddings.embed_query(question)
67
+ similar_documents = self.vector_store.similarity_search_by_vector(embedding_query)
68
+
69
+ # Create context from retrieved documents
70
+ context = "\n".join([doc.page_content for doc in similar_documents])
71
+
72
+ # Format prompt
73
+ prompt = self.prompt_template.format(context=context, question=question)
74
+
75
+ # print(context)
76
+ # Get response from chat model
77
+ # response = self.chat_model(prompt)
78
+ # Parse response
79
+ # result = self.parser.parse(response)
80
+
81
+ # chain = prompt=prompt | self.chat_model | parser=self.parser
82
+ # result = chain.invoke()
83
+ dict_context = {"question": question}
84
+ #chain = ({"context": context,"question":Runnab
85
+ chain =({"context": lambda x: context,"question": RunnablePassthrough()}
86
+ | self.prompt_template
87
+ | self.chat_model
88
+ | self.parser)
89
+ #
90
+ result = chain.invoke(question)
91
+ return result
requirements.txt CHANGED
@@ -1 +1,21 @@
1
- huggingface_hub==0.22.2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ chainlit
2
+ transformers
3
+ datasets
4
+ langchain
5
+ langchain.text_splitter
6
+ langchain_community.document_loaders.csv_loader
7
+ langchain.embeddings
8
+ langchain.storage
9
+ langchain_community.vectorstores
10
+ langchain_community.document_loaders
11
+ langchain_text_splitters
12
+ langchain_core.runnables.base
13
+ langchain_core.runnables.passthrough
14
+ langchain_core.output_parsers
15
+ langchain_core.prompts
16
+ langchain_openai
17
+ torch
18
+ faiss-cpu
19
+ openai
20
+ librosa
21
+ nest_asyncio