Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -3,14 +3,18 @@ import gradio as gr
|
|
| 3 |
import numpy as np
|
| 4 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
| 5 |
from langchain.document_loaders import DirectoryLoader, TextLoader
|
| 6 |
-
from
|
| 7 |
-
from
|
| 8 |
from langchain.chains import ConversationalRetrievalChain
|
| 9 |
from langchain.memory import ConversationBufferMemory
|
| 10 |
-
from
|
| 11 |
|
| 12 |
-
# Set up environment variables for HuggingFace
|
| 13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
|
| 15 |
# Create a directory for document storage if it doesn't exist
|
| 16 |
os.makedirs("documents", exist_ok=True)
|
|
@@ -42,6 +46,10 @@ def process_documents():
|
|
| 42 |
|
| 43 |
# Create RAG chain
|
| 44 |
def create_chain(vector_store):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
# Initialize the LLM
|
| 46 |
llm = HuggingFaceHub(
|
| 47 |
repo_id="google/flan-t5-large",
|
|
@@ -79,28 +87,45 @@ def upload_file(files):
|
|
| 79 |
vector_store = process_documents()
|
| 80 |
chain = create_chain(vector_store)
|
| 81 |
|
|
|
|
|
|
|
|
|
|
| 82 |
return "Files uploaded and processed successfully!"
|
| 83 |
|
| 84 |
# Function to handle user queries
|
| 85 |
def chat(message, history):
|
| 86 |
global chain, chat_history, vector_store
|
| 87 |
|
| 88 |
-
#
|
| 89 |
if vector_store is None:
|
| 90 |
if os.path.exists("documents") and any(os.path.isfile(os.path.join("documents", f)) for f in os.listdir("documents")):
|
| 91 |
vector_store = process_documents()
|
| 92 |
chain = create_chain(vector_store)
|
| 93 |
else:
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
# Convert Gradio history to the format expected by the chain
|
| 97 |
-
if history:
|
| 98 |
-
chat_history = [(turn[0], turn[1]) for turn in history]
|
| 99 |
|
| 100 |
-
#
|
| 101 |
-
|
|
|
|
|
|
|
| 102 |
|
| 103 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 104 |
|
| 105 |
# Create Gradio interface
|
| 106 |
with gr.Blocks(title="RAG Chatbot") as demo:
|
|
|
|
| 3 |
import numpy as np
|
| 4 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
| 5 |
from langchain.document_loaders import DirectoryLoader, TextLoader
|
| 6 |
+
from langchain_community.embeddings import HuggingFaceEmbeddings
|
| 7 |
+
from langchain_community.vectorstores import FAISS
|
| 8 |
from langchain.chains import ConversationalRetrievalChain
|
| 9 |
from langchain.memory import ConversationBufferMemory
|
| 10 |
+
from langchain_community.llms import HuggingFaceHub
|
| 11 |
|
| 12 |
+
# Set up environment variables for HuggingFace - safely handle potential None value
|
| 13 |
+
huggingface_token = os.getenv("HUGGINGFACE_API_TOKEN")
|
| 14 |
+
if huggingface_token:
|
| 15 |
+
os.environ["HUGGINGFACEHUB_API_TOKEN"] = huggingface_token
|
| 16 |
+
else:
|
| 17 |
+
print("Warning: HUGGINGFACE_API_TOKEN environment variable not set. You'll need to set it for the LLM to work.")
|
| 18 |
|
| 19 |
# Create a directory for document storage if it doesn't exist
|
| 20 |
os.makedirs("documents", exist_ok=True)
|
|
|
|
| 46 |
|
| 47 |
# Create RAG chain
|
| 48 |
def create_chain(vector_store):
|
| 49 |
+
# Check if API token is available
|
| 50 |
+
if not os.getenv("HUGGINGFACEHUB_API_TOKEN"):
|
| 51 |
+
return None
|
| 52 |
+
|
| 53 |
# Initialize the LLM
|
| 54 |
llm = HuggingFaceHub(
|
| 55 |
repo_id="google/flan-t5-large",
|
|
|
|
| 87 |
vector_store = process_documents()
|
| 88 |
chain = create_chain(vector_store)
|
| 89 |
|
| 90 |
+
if chain is None:
|
| 91 |
+
return "Files uploaded and processed, but HuggingFace API token is missing. Set the environment variable to enable the chatbot."
|
| 92 |
+
|
| 93 |
return "Files uploaded and processed successfully!"
|
| 94 |
|
| 95 |
# Function to handle user queries
|
| 96 |
def chat(message, history):
|
| 97 |
global chain, chat_history, vector_store
|
| 98 |
|
| 99 |
+
# Check if documents exist
|
| 100 |
if vector_store is None:
|
| 101 |
if os.path.exists("documents") and any(os.path.isfile(os.path.join("documents", f)) for f in os.listdir("documents")):
|
| 102 |
vector_store = process_documents()
|
| 103 |
chain = create_chain(vector_store)
|
| 104 |
else:
|
| 105 |
+
# Return in the format expected by Gradio chatbot
|
| 106 |
+
return history + [[message, "Please upload documents first to initialize the chatbot."]]
|
|
|
|
|
|
|
|
|
|
| 107 |
|
| 108 |
+
# Check if API token is set
|
| 109 |
+
if chain is None:
|
| 110 |
+
# Return in the format expected by Gradio chatbot
|
| 111 |
+
return history + [[message, "HuggingFace API token is not set. Please set the HUGGINGFACE_API_TOKEN environment variable."]]
|
| 112 |
|
| 113 |
+
# Process the message with the chain
|
| 114 |
+
try:
|
| 115 |
+
# Convert history to format expected by chain
|
| 116 |
+
if history:
|
| 117 |
+
chat_history = [(turn[0], turn[1]) for turn in history]
|
| 118 |
+
|
| 119 |
+
# Get response from chain
|
| 120 |
+
response = chain({"question": message})
|
| 121 |
+
answer = response['answer']
|
| 122 |
+
|
| 123 |
+
# Return in the format expected by Gradio chatbot
|
| 124 |
+
return history + [[message, answer]]
|
| 125 |
+
except Exception as e:
|
| 126 |
+
# Handle any errors
|
| 127 |
+
error_message = f"Error processing your request: {str(e)}"
|
| 128 |
+
return history + [[message, error_message]]
|
| 129 |
|
| 130 |
# Create Gradio interface
|
| 131 |
with gr.Blocks(title="RAG Chatbot") as demo:
|