Spaces:

avimittal30
/

conversational_rag

Build error

App Files Files Community

avimittal30 commited on Apr 26, 2025

Commit

1b7e795

verified ·

1 Parent(s): 132bf74

Update app.py

Browse files

Files changed (1) hide show

app.py +155 -16

app.py CHANGED Viewed

@@ -1,16 +1,155 @@
-def upload_file(file):
-    # If file is a NamedString object
-    if hasattr(file, 'name'):
-        filename = file.name
-        content = str(file)  # Convert NamedString to string
-    else:
-        # If file is a path or tuple, handle accordingly
-        filename = file if isinstance(file, str) else file[0]
-        with open(filename, 'r') as f:
-            content = f.read()
-    # Now write the content
-    with open("destination_file.txt", "w") as f:
-        f.write(content)
-    return "File uploaded successfully"

+import os
+import gradio as gr
+import numpy as np
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.document_loaders import DirectoryLoader, TextLoader
+from langchain_community.embeddings import HuggingFaceEmbeddings
+from langchain_community.vectorstores import FAISS
+from langchain.chains import ConversationalRetrievalChain
+from langchain.memory import ConversationBufferMemory
+from langchain_community.llms import HuggingFaceHub
+# Set up environment variables for HuggingFace - safely handle potential None value
+huggingface_token = os.getenv("HUGGINGFACE_API_TOKEN")
+if huggingface_token:
+    os.environ["HUGGINGFACEHUB_API_TOKEN"] = huggingface_token
+else:
+    print("Warning: HUGGINGFACE_API_TOKEN environment variable not set. You'll need to set it for the LLM to work.")
+# Create a directory for document storage if it doesn't exist
+os.makedirs("documents", exist_ok=True)
+# Function to load documents
+def load_documents(directory="documents"):
+    loader = DirectoryLoader(directory, glob="**/*.txt", loader_cls=TextLoader)
+    documents = loader.load()
+    return documents
+# Function to process documents and create vector store
+def process_documents():
+    documents = load_documents()
+    # Split documents into chunks
+    text_splitter = RecursiveCharacterTextSplitter(
+        chunk_size=1000,
+        chunk_overlap=200
+    )
+    chunks = text_splitter.split_documents(documents)
+    # Create embeddings
+    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
+    # Create vector store
+    vector_store = FAISS.from_documents(chunks, embeddings)
+    return vector_store
+# Create RAG chain
+def create_chain(vector_store):
+    # Check if API token is available
+    if not os.getenv("HUGGINGFACEHUB_API_TOKEN"):
+        return None
+    # Initialize the LLM
+    llm = HuggingFaceHub(
+        repo_id="google/flan-t5-large",
+        model_kwargs={"temperature": 0.5, "max_length": 512}
+    )
+    # Create memory for the conversation
+    memory = ConversationBufferMemory(
+        memory_key="chat_history",
+        return_messages=True
+    )
+    # Create the conversational chain
+    chain = ConversationalRetrievalChain.from_llm(
+        llm=llm,
+        retriever=vector_store.as_retriever(search_kwargs={"k": 3}),
+        memory=memory
+    )
+    return chain
+# Initialize variables for handling chat state
+vector_store = None
+chain = None
+chat_history = []
+# Function to handle file uploads
+def upload_file(files):
+    for file in files:
+        file_path = os.path.join("documents", os.path.basename(file.name))
+        with open(file_path, "wb") as f:
+            f.write(file.read())
+    global vector_store, chain
+    vector_store = process_documents()
+    chain = create_chain(vector_store)
+    if chain is None:
+        return "Files uploaded and processed, but HuggingFace API token is missing. Set the environment variable to enable the chatbot."
+    return "Files uploaded and processed successfully!"
+# Function to handle user queries
+def chat(message, history):
+    global chain, chat_history, vector_store
+    # Check if documents exist
+    if vector_store is None:
+        if os.path.exists("documents") and any(os.path.isfile(os.path.join("documents", f)) for f in os.listdir("documents")):
+            vector_store = process_documents()
+            chain = create_chain(vector_store)
+        else:
+            # Return in the format expected by Gradio chatbot
+            return history + [[message, "Please upload documents first to initialize the chatbot."]]
+    # Check if API token is set
+    if chain is None:
+        # Return in the format expected by Gradio chatbot
+        return history + [[message, "HuggingFace API token is not set. Please set the HUGGINGFACE_API_TOKEN environment variable."]]
+    # Process the message with the chain
+    try:
+        # Convert history to format expected by chain
+        if history:
+            chat_history = [(turn[0], turn[1]) for turn in history]
+        # Get response from chain
+        response = chain({"question": message})
+        answer = response['answer']
+        # Return in the format expected by Gradio chatbot
+        return history + [[message, answer]]
+    except Exception as e:
+        # Handle any errors
+        error_message = f"Error processing your request: {str(e)}"
+        return history + [[message, error_message]]
+# Create Gradio interface
+with gr.Blocks(title="RAG Chatbot") as demo:
+    gr.Markdown("# RAG-based Conversational Chatbot")
+    gr.Markdown("Upload text documents and chat with an AI that can answer questions based on their content.")
+    with gr.Row():
+        with gr.Column(scale=1):
+            file_output = gr.Textbox(label="Upload Status")
+            file_input = gr.File(
+                file_count="multiple",
+                label="Upload Documents (.txt files)"
+            )
+            upload_button = gr.Button("Process Documents")
+            upload_button.click(upload_file, inputs=[file_input], outputs=[file_output])
+        with gr.Column(scale=2):
+            chatbot = gr.Chatbot(height=400)
+            msg = gr.Textbox(label="Ask a question about your documents")
+            msg.submit(chat, inputs=[msg, chatbot], outputs=[chatbot])
+            clear = gr.Button("Clear")
+            clear.click(lambda: [], outputs=[chatbot])
+# Launch the app
+if __name__ == "__main__":
+    demo.launch()