Spaces:
Sleeping
Sleeping
| import os | |
| import pickle | |
| from langchain.document_loaders import UnstructuredURLLoader | |
| from langchain.text_splitter import CharacterTextSplitter | |
| from InstructorEmbedding import INSTRUCTOR | |
| from langchain.vectorstores import FAISS | |
| from langchain.embeddings import HuggingFaceInstructEmbeddings | |
| from langchain.chains import RetrievalQA | |
| from langchain import HuggingFaceHub | |
| from langchain.utilities import GoogleSerperAPIWrapper | |
| import gradio as gr | |
| import pandas as pd | |
| df = pd.read_csv('linkss.csv') | |
| url = [] | |
| for i in df.itertuples(): | |
| url.append(f"{i[1]}") | |
| class Chatbot: | |
| def __init__(self): | |
| os.environ["Hugging_Face_API_KEY"] = "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" | |
| os.environ["HUGGINGFACEHUB_API_TOKEN"] = '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' | |
| os.environ["SERPER_API_KEY"] = "a69857e460dd51585e009a43743711b110b6beee" | |
| self.load_data() | |
| self.load_embeddings() | |
| self.create_qa_model() | |
| def load_data(self): | |
| urls = url | |
| loaders = UnstructuredURLLoader(urls=urls) | |
| self.data = loaders.load() | |
| def split_documents(self): | |
| text_splitter = CharacterTextSplitter(separator='\n', chunk_size=500, chunk_overlap=20) | |
| self.docs = text_splitter.split_documents(self.data) | |
| def create_embeddings(self): | |
| instructor_embeddings = HuggingFaceInstructEmbeddings(model_name="sembeddings/model_gpt_trained") | |
| db_instructEmbedd = FAISS.from_documents(self.docs, instructor_embeddings) | |
| self.retriever = db_instructEmbedd.as_retriever(search_kwargs={"k": 3}) | |
| with open("db_instructEmbedd.pkl", "wb") as f: | |
| pickle.dump(db_instructEmbedd, f) | |
| def load_embeddings(self): | |
| with open("db_instructEmbedd.pkl", "rb") as f: | |
| self.retriever = pickle.load(f) | |
| self.retriever = self.retriever.as_retriever(search_kwargs={"k": 3}) | |
| def create_qa_model(self): | |
| llm = HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature": 0.1}) | |
| self.qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=self.retriever, return_source_documents=True) | |
| def chat(self, query): | |
| result = self.qa({'query': query}) | |
| return result['result'] | |
| chatbot = Chatbot() | |
| def chatbot_response(query): | |
| response = chatbot.chat(query) | |
| return response | |
| iface = gr.Interface(fn=chatbot_response, inputs="text", outputs="text", title="Chatbot Trained on Indian Exam Articles") | |
| iface.launch() | |