Spaces:
Runtime error
Runtime error
Commit ·
6f77181
1
Parent(s): 0dded5d
initial commit+
Browse files- Spacefile +2 -0
- app.py +30 -61
- chainlit.md +14 -0
- rag.py +91 -0
- requirements.txt +21 -1
Spacefile
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
sdk: gradio
|
| 2 |
+
app_file: app.py
|
app.py
CHANGED
|
@@ -1,63 +1,32 @@
|
|
| 1 |
-
import
|
| 2 |
-
from
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
for message in client.chat_completion(
|
| 31 |
-
messages,
|
| 32 |
-
max_tokens=max_tokens,
|
| 33 |
-
stream=True,
|
| 34 |
-
temperature=temperature,
|
| 35 |
-
top_p=top_p,
|
| 36 |
-
):
|
| 37 |
-
token = message.choices[0].delta.content
|
| 38 |
-
|
| 39 |
-
response += token
|
| 40 |
-
yield response
|
| 41 |
-
|
| 42 |
-
"""
|
| 43 |
-
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
|
| 44 |
-
"""
|
| 45 |
-
demo = gr.ChatInterface(
|
| 46 |
-
respond,
|
| 47 |
-
additional_inputs=[
|
| 48 |
-
gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
|
| 49 |
-
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
|
| 50 |
-
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
|
| 51 |
-
gr.Slider(
|
| 52 |
-
minimum=0.1,
|
| 53 |
-
maximum=1.0,
|
| 54 |
-
value=0.95,
|
| 55 |
-
step=0.05,
|
| 56 |
-
label="Top-p (nucleus sampling)",
|
| 57 |
-
),
|
| 58 |
-
],
|
| 59 |
-
)
|
| 60 |
-
|
| 61 |
|
| 62 |
if __name__ == "__main__":
|
| 63 |
-
|
|
|
|
|
|
| 1 |
+
import chainlit as cl
|
| 2 |
+
from chainlit.input_widget import TextInput
|
| 3 |
+
from rag import RAGModel
|
| 4 |
+
import os
|
| 5 |
+
import nest_asyncio
|
| 6 |
+
nest_asyncio.apply()
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
# Initialize RAG model
|
| 10 |
+
rag_model = RAGModel(openai_api_key=os.getenv("OPENAI_API_KEY"))
|
| 11 |
+
|
| 12 |
+
@cl.on_message
|
| 13 |
+
async def main(message):
|
| 14 |
+
result = rag_model.query(message.content)
|
| 15 |
+
print(result)
|
| 16 |
+
await cl.Message(result).send()
|
| 17 |
+
|
| 18 |
+
# @cl.application
|
| 19 |
+
# def app():
|
| 20 |
+
# cl.TextInput(label="Enter your query", on_submit=handle_message)
|
| 21 |
+
|
| 22 |
+
@cl.on_chat_start
|
| 23 |
+
async def start():
|
| 24 |
+
#def on_chat_start():
|
| 25 |
+
msg=cl.Message(content="Firing up the research info bot...")
|
| 26 |
+
await msg.send()
|
| 27 |
+
msg.content= "Hi, welcome to research info bot. What is your query?"
|
| 28 |
+
await msg.update()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
|
| 30 |
if __name__ == "__main__":
|
| 31 |
+
cl.run()
|
| 32 |
+
|
chainlit.md
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Welcome to Chainlit! 🚀🤖
|
| 2 |
+
|
| 3 |
+
Hi there, Developer! 👋 We're excited to have you on board. Chainlit is a powerful tool designed to help you prototype, debug and share applications built on top of LLMs.
|
| 4 |
+
|
| 5 |
+
## Useful Links 🔗
|
| 6 |
+
|
| 7 |
+
- **Documentation:** Get started with our comprehensive [Chainlit Documentation](https://docs.chainlit.io) 📚
|
| 8 |
+
- **Discord Community:** Join our friendly [Chainlit Discord](https://discord.gg/k73SQ3FyUh) to ask questions, share your projects, and connect with other developers! 💬
|
| 9 |
+
|
| 10 |
+
We can't wait to see what you create with Chainlit! Happy coding! 💻😊
|
| 11 |
+
|
| 12 |
+
## Welcome screen
|
| 13 |
+
|
| 14 |
+
To modify the welcome screen, edit the `chainlit.md` file at the root of your project. If you do not want a welcome screen, just leave this file empty.
|
rag.py
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from datasets import load_dataset
|
| 2 |
+
from langchain_community.document_loaders.csv_loader import CSVLoader
|
| 3 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
| 4 |
+
from langchain.embeddings import CacheBackedEmbeddings
|
| 5 |
+
from langchain.storage import LocalFileStore
|
| 6 |
+
from langchain_community.vectorstores import FAISS
|
| 7 |
+
from langchain_core.output_parsers import StrOutputParser
|
| 8 |
+
from langchain_core.prompts import ChatPromptTemplate
|
| 9 |
+
from langchain_openai import ChatOpenAI
|
| 10 |
+
from langchain_openai import OpenAIEmbeddings
|
| 11 |
+
from langchain_core.runnables.passthrough import RunnablePassthrough
|
| 12 |
+
from langchain_core.runnables.base import RunnableSequence
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
class RAGModel:
|
| 16 |
+
def __init__(self, openai_api_key):
|
| 17 |
+
#openai_api_key = os.getenv("OPENAI_API_KEY")
|
| 18 |
+
# Load dataset
|
| 19 |
+
dataset = load_dataset('csv', data_files='imdb.csv')
|
| 20 |
+
dataset_dict = dataset
|
| 21 |
+
imdb_csv = dataset_dict["train"].to_csv('imdb.csv')
|
| 22 |
+
|
| 23 |
+
# Load documents
|
| 24 |
+
loader = CSVLoader(file_path="imdb.csv")
|
| 25 |
+
data = loader.load()
|
| 26 |
+
|
| 27 |
+
# Split documents into chunks
|
| 28 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
|
| 29 |
+
chunked_documents = text_splitter.split_documents(data)
|
| 30 |
+
|
| 31 |
+
# Create embeddings
|
| 32 |
+
self.embeddings = OpenAIEmbeddings(model="text-embedding-ada-002", openai_api_key=openai_api_key)
|
| 33 |
+
text_documents = [str(doc) for doc in chunked_documents]
|
| 34 |
+
|
| 35 |
+
# Create cache-backed embeddings
|
| 36 |
+
self.store = LocalFileStore("./cache/")
|
| 37 |
+
self.embedder = CacheBackedEmbeddings.from_bytes_store(
|
| 38 |
+
self.embeddings, self.store, namespace=self.embeddings.model
|
| 39 |
+
)
|
| 40 |
+
|
| 41 |
+
# Load and split documents again for FAISS
|
| 42 |
+
documents = loader.load()
|
| 43 |
+
text_splitter = RecursiveCharacterTextSplitter()
|
| 44 |
+
docs = text_splitter.split_documents(documents)
|
| 45 |
+
|
| 46 |
+
# Create vector store using FAISS
|
| 47 |
+
self.vector_store = FAISS.from_documents(docs, self.embedder)
|
| 48 |
+
self.vector_store.save_local("faiss_index")
|
| 49 |
+
|
| 50 |
+
# Create retriever
|
| 51 |
+
self.retriever = self.vector_store.as_retriever()
|
| 52 |
+
|
| 53 |
+
# Create chat model
|
| 54 |
+
self.chat_model = ChatOpenAI(model="gpt-4", temperature=0, openai_api_key=openai_api_key)
|
| 55 |
+
|
| 56 |
+
# Create parser
|
| 57 |
+
self.parser = StrOutputParser()
|
| 58 |
+
|
| 59 |
+
# Create prompt template
|
| 60 |
+
messages = "Answer the {question} based on the following context: {context}"
|
| 61 |
+
self.prompt_template = ChatPromptTemplate.from_template(messages)
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
def query(self, question):
|
| 65 |
+
# Retrieve similar documents
|
| 66 |
+
embedding_query = self.embeddings.embed_query(question)
|
| 67 |
+
similar_documents = self.vector_store.similarity_search_by_vector(embedding_query)
|
| 68 |
+
|
| 69 |
+
# Create context from retrieved documents
|
| 70 |
+
context = "\n".join([doc.page_content for doc in similar_documents])
|
| 71 |
+
|
| 72 |
+
# Format prompt
|
| 73 |
+
prompt = self.prompt_template.format(context=context, question=question)
|
| 74 |
+
|
| 75 |
+
# print(context)
|
| 76 |
+
# Get response from chat model
|
| 77 |
+
# response = self.chat_model(prompt)
|
| 78 |
+
# Parse response
|
| 79 |
+
# result = self.parser.parse(response)
|
| 80 |
+
|
| 81 |
+
# chain = prompt=prompt | self.chat_model | parser=self.parser
|
| 82 |
+
# result = chain.invoke()
|
| 83 |
+
dict_context = {"question": question}
|
| 84 |
+
#chain = ({"context": context,"question":Runnab
|
| 85 |
+
chain =({"context": lambda x: context,"question": RunnablePassthrough()}
|
| 86 |
+
| self.prompt_template
|
| 87 |
+
| self.chat_model
|
| 88 |
+
| self.parser)
|
| 89 |
+
#
|
| 90 |
+
result = chain.invoke(question)
|
| 91 |
+
return result
|
requirements.txt
CHANGED
|
@@ -1 +1,21 @@
|
|
| 1 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
chainlit
|
| 2 |
+
transformers
|
| 3 |
+
datasets
|
| 4 |
+
langchain
|
| 5 |
+
langchain.text_splitter
|
| 6 |
+
langchain_community.document_loaders.csv_loader
|
| 7 |
+
langchain.embeddings
|
| 8 |
+
langchain.storage
|
| 9 |
+
langchain_community.vectorstores
|
| 10 |
+
langchain_community.document_loaders
|
| 11 |
+
langchain_text_splitters
|
| 12 |
+
langchain_core.runnables.base
|
| 13 |
+
langchain_core.runnables.passthrough
|
| 14 |
+
langchain_core.output_parsers
|
| 15 |
+
langchain_core.prompts
|
| 16 |
+
langchain_openai
|
| 17 |
+
torch
|
| 18 |
+
faiss-cpu
|
| 19 |
+
openai
|
| 20 |
+
librosa
|
| 21 |
+
nest_asyncio
|