Spaces:

avimittal30
/

conversational_rag

Build error

App Files Files Community

avimittal30 commited on Apr 26, 2025

Commit

132bf74

verified ·

1 Parent(s): 0e650ce

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -155

app.py CHANGED Viewed

@@ -1,155 +1,16 @@
-import os
-import gradio as gr
-import numpy as np
-from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain.document_loaders import DirectoryLoader, TextLoader
-from langchain_community.embeddings import HuggingFaceEmbeddings
-from langchain_community.vectorstores import FAISS
-from langchain.chains import ConversationalRetrievalChain
-from langchain.memory import ConversationBufferMemory
-from langchain_community.llms import HuggingFaceHub
-# Set up environment variables for HuggingFace - safely handle potential None value
-huggingface_token = os.getenv("HUGGINGFACE_API_TOKEN")
-if huggingface_token:
-    os.environ["HUGGINGFACEHUB_API_TOKEN"] = huggingface_token
-else:
-    print("Warning: HUGGINGFACE_API_TOKEN environment variable not set. You'll need to set it for the LLM to work.")
-# Create a directory for document storage if it doesn't exist
-os.makedirs("documents", exist_ok=True)
-# Function to load documents
-def load_documents(directory="documents"):
-    loader = DirectoryLoader(directory, glob="**/*.txt", loader_cls=TextLoader)
-    documents = loader.load()
-    return documents
-# Function to process documents and create vector store
-def process_documents():
-    documents = load_documents()
-    # Split documents into chunks
-    text_splitter = RecursiveCharacterTextSplitter(
-        chunk_size=1000,
-        chunk_overlap=200
-    )
-    chunks = text_splitter.split_documents(documents)
-    # Create embeddings
-    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
-    # Create vector store
-    vector_store = FAISS.from_documents(chunks, embeddings)
-    return vector_store
-# Create RAG chain
-def create_chain(vector_store):
-    # Check if API token is available
-    if not os.getenv("HUGGINGFACEHUB_API_TOKEN"):
-        return None
-    # Initialize the LLM
-    llm = HuggingFaceHub(
-        repo_id="google/flan-t5-large",
-        model_kwargs={"temperature": 0.5, "max_length": 512}
-    )
-    # Create memory for the conversation
-    memory = ConversationBufferMemory(
-        memory_key="chat_history",
-        return_messages=True
-    )
-    # Create the conversational chain
-    chain = ConversationalRetrievalChain.from_llm(
-        llm=llm,
-        retriever=vector_store.as_retriever(search_kwargs={"k": 3}),
-        memory=memory
-    )
-    return chain
-# Initialize variables for handling chat state
-vector_store = None
-chain = None
-chat_history = []
-# Function to handle file uploads
-def upload_file(files):
-    for file in files:
-        file_path = os.path.join("documents", os.path.basename(file.name))
-        with open(file_path, "wb") as f:
-            f.write(file.read())
-    global vector_store, chain
-    vector_store = process_documents()
-    chain = create_chain(vector_store)
-    if chain is None:
-        return "Files uploaded and processed, but HuggingFace API token is missing. Set the environment variable to enable the chatbot."
-    return "Files uploaded and processed successfully!"
-# Function to handle user queries
-def chat(message, history):
-    global chain, chat_history, vector_store
-    # Check if documents exist
-    if vector_store is None:
-        if os.path.exists("documents") and any(os.path.isfile(os.path.join("documents", f)) for f in os.listdir("documents")):
-            vector_store = process_documents()
-            chain = create_chain(vector_store)
-        else:
-            # Return in the format expected by Gradio chatbot
-            return history + [[message, "Please upload documents first to initialize the chatbot."]]
-    # Check if API token is set
-    if chain is None:
-        # Return in the format expected by Gradio chatbot
-        return history + [[message, "HuggingFace API token is not set. Please set the HUGGINGFACE_API_TOKEN environment variable."]]
-    # Process the message with the chain
-    try:
-        # Convert history to format expected by chain
-        if history:
-            chat_history = [(turn[0], turn[1]) for turn in history]
-        # Get response from chain
-        response = chain({"question": message})
-        answer = response['answer']
-        # Return in the format expected by Gradio chatbot
-        return history + [[message, answer]]
-    except Exception as e:
-        # Handle any errors
-        error_message = f"Error processing your request: {str(e)}"
-        return history + [[message, error_message]]
-# Create Gradio interface
-with gr.Blocks(title="RAG Chatbot") as demo:
-    gr.Markdown("# RAG-based Conversational Chatbot")
-    gr.Markdown("Upload text documents and chat with an AI that can answer questions based on their content.")
-    with gr.Row():
-        with gr.Column(scale=1):
-            file_output = gr.Textbox(label="Upload Status")
-            file_input = gr.File(
-                file_count="multiple",
-                label="Upload Documents (.txt files)"
-            )
-            upload_button = gr.Button("Process Documents")
-            upload_button.click(upload_file, inputs=[file_input], outputs=[file_output])
-        with gr.Column(scale=2):
-            chatbot = gr.Chatbot(height=400)
-            msg = gr.Textbox(label="Ask a question about your documents")
-            msg.submit(chat, inputs=[msg, chatbot], outputs=[chatbot])
-            clear = gr.Button("Clear")
-            clear.click(lambda: [], outputs=[chatbot])
-# Launch the app
-if __name__ == "__main__":
-    demo.launch()

+def upload_file(file):
+    # If file is a NamedString object
+    if hasattr(file, 'name'):
+        filename = file.name
+        content = str(file)  # Convert NamedString to string
+    else:
+        # If file is a path or tuple, handle accordingly
+        filename = file if isinstance(file, str) else file[0]
+        with open(filename, 'r') as f:
+            content = f.read()
+    # Now write the content
+    with open("destination_file.txt", "w") as f:
+        f.write(content)
+    return "File uploaded successfully"