avimittal30 commited on
Commit
132bf74
·
verified ·
1 Parent(s): 0e650ce

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -155
app.py CHANGED
@@ -1,155 +1,16 @@
1
- import os
2
- import gradio as gr
3
- import numpy as np
4
- from langchain.text_splitter import RecursiveCharacterTextSplitter
5
- from langchain.document_loaders import DirectoryLoader, TextLoader
6
- from langchain_community.embeddings import HuggingFaceEmbeddings
7
- from langchain_community.vectorstores import FAISS
8
- from langchain.chains import ConversationalRetrievalChain
9
- from langchain.memory import ConversationBufferMemory
10
- from langchain_community.llms import HuggingFaceHub
11
-
12
- # Set up environment variables for HuggingFace - safely handle potential None value
13
- huggingface_token = os.getenv("HUGGINGFACE_API_TOKEN")
14
- if huggingface_token:
15
- os.environ["HUGGINGFACEHUB_API_TOKEN"] = huggingface_token
16
- else:
17
- print("Warning: HUGGINGFACE_API_TOKEN environment variable not set. You'll need to set it for the LLM to work.")
18
-
19
- # Create a directory for document storage if it doesn't exist
20
- os.makedirs("documents", exist_ok=True)
21
-
22
- # Function to load documents
23
- def load_documents(directory="documents"):
24
- loader = DirectoryLoader(directory, glob="**/*.txt", loader_cls=TextLoader)
25
- documents = loader.load()
26
- return documents
27
-
28
- # Function to process documents and create vector store
29
- def process_documents():
30
- documents = load_documents()
31
-
32
- # Split documents into chunks
33
- text_splitter = RecursiveCharacterTextSplitter(
34
- chunk_size=1000,
35
- chunk_overlap=200
36
- )
37
- chunks = text_splitter.split_documents(documents)
38
-
39
- # Create embeddings
40
- embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
41
-
42
- # Create vector store
43
- vector_store = FAISS.from_documents(chunks, embeddings)
44
-
45
- return vector_store
46
-
47
- # Create RAG chain
48
- def create_chain(vector_store):
49
- # Check if API token is available
50
- if not os.getenv("HUGGINGFACEHUB_API_TOKEN"):
51
- return None
52
-
53
- # Initialize the LLM
54
- llm = HuggingFaceHub(
55
- repo_id="google/flan-t5-large",
56
- model_kwargs={"temperature": 0.5, "max_length": 512}
57
- )
58
-
59
- # Create memory for the conversation
60
- memory = ConversationBufferMemory(
61
- memory_key="chat_history",
62
- return_messages=True
63
- )
64
-
65
- # Create the conversational chain
66
- chain = ConversationalRetrievalChain.from_llm(
67
- llm=llm,
68
- retriever=vector_store.as_retriever(search_kwargs={"k": 3}),
69
- memory=memory
70
- )
71
-
72
- return chain
73
-
74
- # Initialize variables for handling chat state
75
- vector_store = None
76
- chain = None
77
- chat_history = []
78
-
79
- # Function to handle file uploads
80
- def upload_file(files):
81
- for file in files:
82
- file_path = os.path.join("documents", os.path.basename(file.name))
83
- with open(file_path, "wb") as f:
84
- f.write(file.read())
85
-
86
- global vector_store, chain
87
- vector_store = process_documents()
88
- chain = create_chain(vector_store)
89
-
90
- if chain is None:
91
- return "Files uploaded and processed, but HuggingFace API token is missing. Set the environment variable to enable the chatbot."
92
-
93
- return "Files uploaded and processed successfully!"
94
-
95
- # Function to handle user queries
96
- def chat(message, history):
97
- global chain, chat_history, vector_store
98
-
99
- # Check if documents exist
100
- if vector_store is None:
101
- if os.path.exists("documents") and any(os.path.isfile(os.path.join("documents", f)) for f in os.listdir("documents")):
102
- vector_store = process_documents()
103
- chain = create_chain(vector_store)
104
- else:
105
- # Return in the format expected by Gradio chatbot
106
- return history + [[message, "Please upload documents first to initialize the chatbot."]]
107
-
108
- # Check if API token is set
109
- if chain is None:
110
- # Return in the format expected by Gradio chatbot
111
- return history + [[message, "HuggingFace API token is not set. Please set the HUGGINGFACE_API_TOKEN environment variable."]]
112
-
113
- # Process the message with the chain
114
- try:
115
- # Convert history to format expected by chain
116
- if history:
117
- chat_history = [(turn[0], turn[1]) for turn in history]
118
-
119
- # Get response from chain
120
- response = chain({"question": message})
121
- answer = response['answer']
122
-
123
- # Return in the format expected by Gradio chatbot
124
- return history + [[message, answer]]
125
- except Exception as e:
126
- # Handle any errors
127
- error_message = f"Error processing your request: {str(e)}"
128
- return history + [[message, error_message]]
129
-
130
- # Create Gradio interface
131
- with gr.Blocks(title="RAG Chatbot") as demo:
132
- gr.Markdown("# RAG-based Conversational Chatbot")
133
- gr.Markdown("Upload text documents and chat with an AI that can answer questions based on their content.")
134
-
135
- with gr.Row():
136
- with gr.Column(scale=1):
137
- file_output = gr.Textbox(label="Upload Status")
138
- file_input = gr.File(
139
- file_count="multiple",
140
- label="Upload Documents (.txt files)"
141
- )
142
- upload_button = gr.Button("Process Documents")
143
- upload_button.click(upload_file, inputs=[file_input], outputs=[file_output])
144
-
145
- with gr.Column(scale=2):
146
- chatbot = gr.Chatbot(height=400)
147
- msg = gr.Textbox(label="Ask a question about your documents")
148
-
149
- msg.submit(chat, inputs=[msg, chatbot], outputs=[chatbot])
150
- clear = gr.Button("Clear")
151
- clear.click(lambda: [], outputs=[chatbot])
152
-
153
- # Launch the app
154
- if __name__ == "__main__":
155
- demo.launch()
 
1
+ def upload_file(file):
2
+ # If file is a NamedString object
3
+ if hasattr(file, 'name'):
4
+ filename = file.name
5
+ content = str(file) # Convert NamedString to string
6
+ else:
7
+ # If file is a path or tuple, handle accordingly
8
+ filename = file if isinstance(file, str) else file[0]
9
+ with open(filename, 'r') as f:
10
+ content = f.read()
11
+
12
+ # Now write the content
13
+ with open("destination_file.txt", "w") as f:
14
+ f.write(content)
15
+
16
+ return "File uploaded successfully"