amalsp commited on
Commit
62d24b8
·
verified ·
1 Parent(s): 723ef4e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -24
app.py CHANGED
@@ -1,11 +1,10 @@
1
  import gradio as gr
2
  import bs4
 
 
 
3
  from langchain.text_splitter import RecursiveCharacterTextSplitter
4
- from langchain_community.document_loaders import WebBaseLoader
5
- from langchain_community.vectorstores import FAISS
6
- #from langchain_community.vectorstores import Chroma
7
- from langchain_community.embeddings import HuggingFaceBgeEmbeddings
8
- #from langchain_community.embeddings import OllamaEmbeddings
9
 
10
  # Function to load, split, and retrieve documents from a URL
11
  def load_and_retrieve_docs(url):
@@ -22,24 +21,9 @@ def load_and_retrieve_docs(url):
22
  vectorstore = FAISS.from_documents(documents=splits, embedding=embeddings)
23
  return vectorstore.as_retriever()
24
 
25
- # Function to initialize vector embedding with FAISS vector store
26
- def vector_embedding():
27
- if "vectors" not in st.session_state:
28
- st.session_state.embeddings = HuggingFaceBgeEmbeddings(model_name="BAAI/bge-small-en-v1.5",
29
- model_kwargs={'device':'cpu'},
30
- encode_kwargs={'normalize_embeddings':True})
31
- st.session_state.loader = PyPDFDirectoryLoader("./Data_Science") # Data Ingestion
32
- st.session_state.docs = st.session_state.loader.load() # Document Loading
33
- st.session_state.text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) # Chunk Creation
34
- st.session_state.final_documents = st.session_state.text_splitter.split_documents(st.session_state.docs[:20]) # Splitting
35
- st.session_state.vectors = FAISS.from_documents(st.session_state.final_documents, st.session_state.embeddings) # Vector HuggingFace embeddings
36
- st.write("Vector Store DB Is Ready")
37
- else:
38
- st.write("Vectors already initialized.")
39
-
40
- # Function to format documents
41
  def format_docs(docs):
42
- return "\n\n".join(doc.page_content for doc in docs)
43
 
44
  # Function that defines the RAG chain
45
  def rag_chain(url, question):
@@ -49,7 +33,7 @@ def rag_chain(url, question):
49
  formatted_prompt = f"Question: {question}\n\nContext: {formatted_context}"
50
 
51
  # Using HuggingFace transformers for generating response
52
- chat_pipeline = pipeline('text-generation', model='Llama3-8b-8192') # Use the appropriate model here
53
  response = chat_pipeline(formatted_prompt, max_length=512, num_return_sequences=1)
54
 
55
  return response[0]['generated_text']
@@ -64,4 +48,4 @@ iface = gr.Interface(
64
  )
65
 
66
  # Launch the app
67
- iface.launch()
 
1
  import gradio as gr
2
  import bs4
3
+ from langchain.embeddings.huggingface import HuggingFaceBgeEmbeddings
4
+ from langchain.document_loaders import WebBaseLoader, PyPDFDirectoryLoader
5
+ from langchain.vectorstores import FAISS
6
  from langchain.text_splitter import RecursiveCharacterTextSplitter
7
+ from transformers import pipeline
 
 
 
 
8
 
9
  # Function to load, split, and retrieve documents from a URL
10
  def load_and_retrieve_docs(url):
 
21
  vectorstore = FAISS.from_documents(documents=splits, embedding=embeddings)
22
  return vectorstore.as_retriever()
23
 
24
+ # Function to format documents into a context string
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  def format_docs(docs):
26
+ return "\n\n".join([doc['content'] for doc in docs])
27
 
28
  # Function that defines the RAG chain
29
  def rag_chain(url, question):
 
33
  formatted_prompt = f"Question: {question}\n\nContext: {formatted_context}"
34
 
35
  # Using HuggingFace transformers for generating response
36
+ chat_pipeline = pipeline('text-generation', model='gpt-3.5-turbo') # Use the appropriate model here
37
  response = chat_pipeline(formatted_prompt, max_length=512, num_return_sequences=1)
38
 
39
  return response[0]['generated_text']
 
48
  )
49
 
50
  # Launch the app
51
+ iface.launch()