avimittal30 commited on
Commit
1b7e795
·
verified ·
1 Parent(s): 132bf74

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +155 -16
app.py CHANGED
@@ -1,16 +1,155 @@
1
- def upload_file(file):
2
- # If file is a NamedString object
3
- if hasattr(file, 'name'):
4
- filename = file.name
5
- content = str(file) # Convert NamedString to string
6
- else:
7
- # If file is a path or tuple, handle accordingly
8
- filename = file if isinstance(file, str) else file[0]
9
- with open(filename, 'r') as f:
10
- content = f.read()
11
-
12
- # Now write the content
13
- with open("destination_file.txt", "w") as f:
14
- f.write(content)
15
-
16
- return "File uploaded successfully"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ import numpy as np
4
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
5
+ from langchain.document_loaders import DirectoryLoader, TextLoader
6
+ from langchain_community.embeddings import HuggingFaceEmbeddings
7
+ from langchain_community.vectorstores import FAISS
8
+ from langchain.chains import ConversationalRetrievalChain
9
+ from langchain.memory import ConversationBufferMemory
10
+ from langchain_community.llms import HuggingFaceHub
11
+
12
+ # Set up environment variables for HuggingFace - safely handle potential None value
13
+ huggingface_token = os.getenv("HUGGINGFACE_API_TOKEN")
14
+ if huggingface_token:
15
+ os.environ["HUGGINGFACEHUB_API_TOKEN"] = huggingface_token
16
+ else:
17
+ print("Warning: HUGGINGFACE_API_TOKEN environment variable not set. You'll need to set it for the LLM to work.")
18
+
19
+ # Create a directory for document storage if it doesn't exist
20
+ os.makedirs("documents", exist_ok=True)
21
+
22
+ # Function to load documents
23
+ def load_documents(directory="documents"):
24
+ loader = DirectoryLoader(directory, glob="**/*.txt", loader_cls=TextLoader)
25
+ documents = loader.load()
26
+ return documents
27
+
28
+ # Function to process documents and create vector store
29
+ def process_documents():
30
+ documents = load_documents()
31
+
32
+ # Split documents into chunks
33
+ text_splitter = RecursiveCharacterTextSplitter(
34
+ chunk_size=1000,
35
+ chunk_overlap=200
36
+ )
37
+ chunks = text_splitter.split_documents(documents)
38
+
39
+ # Create embeddings
40
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
41
+
42
+ # Create vector store
43
+ vector_store = FAISS.from_documents(chunks, embeddings)
44
+
45
+ return vector_store
46
+
47
+ # Create RAG chain
48
+ def create_chain(vector_store):
49
+ # Check if API token is available
50
+ if not os.getenv("HUGGINGFACEHUB_API_TOKEN"):
51
+ return None
52
+
53
+ # Initialize the LLM
54
+ llm = HuggingFaceHub(
55
+ repo_id="google/flan-t5-large",
56
+ model_kwargs={"temperature": 0.5, "max_length": 512}
57
+ )
58
+
59
+ # Create memory for the conversation
60
+ memory = ConversationBufferMemory(
61
+ memory_key="chat_history",
62
+ return_messages=True
63
+ )
64
+
65
+ # Create the conversational chain
66
+ chain = ConversationalRetrievalChain.from_llm(
67
+ llm=llm,
68
+ retriever=vector_store.as_retriever(search_kwargs={"k": 3}),
69
+ memory=memory
70
+ )
71
+
72
+ return chain
73
+
74
+ # Initialize variables for handling chat state
75
+ vector_store = None
76
+ chain = None
77
+ chat_history = []
78
+
79
+ # Function to handle file uploads
80
+ def upload_file(files):
81
+ for file in files:
82
+ file_path = os.path.join("documents", os.path.basename(file.name))
83
+ with open(file_path, "wb") as f:
84
+ f.write(file.read())
85
+
86
+ global vector_store, chain
87
+ vector_store = process_documents()
88
+ chain = create_chain(vector_store)
89
+
90
+ if chain is None:
91
+ return "Files uploaded and processed, but HuggingFace API token is missing. Set the environment variable to enable the chatbot."
92
+
93
+ return "Files uploaded and processed successfully!"
94
+
95
+ # Function to handle user queries
96
+ def chat(message, history):
97
+ global chain, chat_history, vector_store
98
+
99
+ # Check if documents exist
100
+ if vector_store is None:
101
+ if os.path.exists("documents") and any(os.path.isfile(os.path.join("documents", f)) for f in os.listdir("documents")):
102
+ vector_store = process_documents()
103
+ chain = create_chain(vector_store)
104
+ else:
105
+ # Return in the format expected by Gradio chatbot
106
+ return history + [[message, "Please upload documents first to initialize the chatbot."]]
107
+
108
+ # Check if API token is set
109
+ if chain is None:
110
+ # Return in the format expected by Gradio chatbot
111
+ return history + [[message, "HuggingFace API token is not set. Please set the HUGGINGFACE_API_TOKEN environment variable."]]
112
+
113
+ # Process the message with the chain
114
+ try:
115
+ # Convert history to format expected by chain
116
+ if history:
117
+ chat_history = [(turn[0], turn[1]) for turn in history]
118
+
119
+ # Get response from chain
120
+ response = chain({"question": message})
121
+ answer = response['answer']
122
+
123
+ # Return in the format expected by Gradio chatbot
124
+ return history + [[message, answer]]
125
+ except Exception as e:
126
+ # Handle any errors
127
+ error_message = f"Error processing your request: {str(e)}"
128
+ return history + [[message, error_message]]
129
+
130
+ # Create Gradio interface
131
+ with gr.Blocks(title="RAG Chatbot") as demo:
132
+ gr.Markdown("# RAG-based Conversational Chatbot")
133
+ gr.Markdown("Upload text documents and chat with an AI that can answer questions based on their content.")
134
+
135
+ with gr.Row():
136
+ with gr.Column(scale=1):
137
+ file_output = gr.Textbox(label="Upload Status")
138
+ file_input = gr.File(
139
+ file_count="multiple",
140
+ label="Upload Documents (.txt files)"
141
+ )
142
+ upload_button = gr.Button("Process Documents")
143
+ upload_button.click(upload_file, inputs=[file_input], outputs=[file_output])
144
+
145
+ with gr.Column(scale=2):
146
+ chatbot = gr.Chatbot(height=400)
147
+ msg = gr.Textbox(label="Ask a question about your documents")
148
+
149
+ msg.submit(chat, inputs=[msg, chatbot], outputs=[chatbot])
150
+ clear = gr.Button("Clear")
151
+ clear.click(lambda: [], outputs=[chatbot])
152
+
153
+ # Launch the app
154
+ if __name__ == "__main__":
155
+ demo.launch()