Spaces:

anl139
/

test

Sleeping

App Files Files Community

anl139 commited on Feb 4, 2025

Commit

43551fe

verified ·

1 Parent(s): 8c96287

Update app.py

Browse files

Files changed (1) hide show

app.py +103 -35

app.py CHANGED Viewed

@@ -4,9 +4,14 @@ import gradio as gr
 from dotenv import load_dotenv
 from langchain_community.document_loaders import JSONLoader
 from pathlib import Path
-from langchain_core.documents import Document
 import re
 import json
 from langchain_chroma import Chroma
 from langchain_openai import OpenAIEmbeddings
 from langchain_community.retrievers import BM25Retriever
@@ -19,13 +24,18 @@ from langchain.chains import create_retrieval_chain
 from langchain.chains.combine_documents import create_stuff_documents_chain
 from langchain_core.prompts import ChatPromptTemplate
-# Load environment variables for Hugging Face
 load_dotenv()
 os.environ['LANGCHAIN_API_KEY'] = os.getenv('LANGCHAIN_API_KEY')
 os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY')
-# Extract metadata from text
 def extract_metadata(text: str) -> dict:
     metadata = {}
     urls = re.findall(r"(Website|Volunteer|Newsletter):\s*(https?://\S+)", text)
     for key, url in urls:
@@ -35,8 +45,9 @@ def extract_metadata(text: str) -> dict:
         metadata[f"{platform.lower()}_handle"] = f"https://{platform.lower()}.com/{handle}"
     return metadata
-# Load and process JSON data
 def load_and_process_data(file_path: str):
     try:
         data = json.loads(Path(file_path).read_text(encoding='utf-8'))
         docs = []
@@ -51,11 +62,15 @@ def load_and_process_data(file_path: str):
         print(f"Error loading JSON: {e}")
         return []
-# Set up document loading and processing
-file_path = './2024data.json'  # In Hugging Face, you'll need to upload this file or access it from Hugging Face Datasets
 docs = load_and_process_data(file_path)
-# Set up LangChain text splitter
 from langchain_text_splitters import RecursiveCharacterTextSplitter
 text_splitter = RecursiveCharacterTextSplitter(
     chunk_size=1000,
@@ -64,73 +79,126 @@ text_splitter = RecursiveCharacterTextSplitter(
 )
 all_splits = text_splitter.split_documents(docs)
-# Set up retrievers
-vectorstore = Chroma.from_documents(documents=all_splits, embedding=OpenAIEmbeddings(), persist_directory="./chroma_db")
 bm25_retriever = BM25Retriever.from_documents(all_splits)
 ensemble_retriever = EnsembleRetriever(
     retrievers=[vectorstore.as_retriever(search_kwargs={"k": 4}), bm25_retriever],
     weights=[0.7, 0.3]
 )
 retriever = ensemble_retriever
-# Prepare for retrieval and generation
 prompt = hub.pull("rlm/rag-prompt")
-llm = ChatOpenAI(model_name="gpt-4o-mini", temperature=0)
-def format_docs(docs):
-    return "\n\n".join(doc.page_content for doc in docs)
 question_answer_chain = create_stuff_documents_chain(llm, prompt)
-rag_chain = create_retrieval_chain(retriever, question_answer_chain)
-# Set up Gradio interface
-green_theme = gr.themes.Base(
-    primary_hue=gr.themes.Color(c50="#00A168", c100="#57B485", c200="#D7ECE0", c300="#FFFFFF", c400="#EAE9E9", c500="#000000", c600="#3A905E", c700="#2A774A", c800="#1A5E36", c900="#0A4512", c950="#052A08")
-)
-# Define response logic
-def message_and_history(message, history):
-    history = history or [{"role": "assistant", "content": "<b>LA2050 Navigator:</b><br> Welcome to the LA2050 ideas hub! How can I help you today?"}]
-    history.append({"role": "user", "content": message.get("text", "")})
     time.sleep(1)
-    user_input = message.get("text", "")
-    if not user_input:
         history.append({"role": "assistant", "content": "<b>LA2050 Navigator:</b><br> Please enter a valid message."})
         yield history, history
         return
     try:
-        response = rag_chain.invoke({"input": user_input})
         answer = response["answer"]
     except Exception as e:
         answer = f"An error occurred: {e}"
     dynamic_message = {"role": "assistant", "content": "<b>LA2050 Navigator:</b><br> "}
     history.append(dynamic_message)
     for character in answer:
         dynamic_message["content"] += character
         yield history, history
     history[-1]["content"] = f"<b>LA2050 Navigator:</b><br> {answer}"
     yield history, history
-# Set up the Gradio interface
 with gr.Blocks(theme=green_theme) as block:
     gr.HTML('<div class="chat-header"><h1>LA2050 Navigator</h1></div>')
-    chatbot = gr.Chatbot(value=[{"role": "assistant", "content": "<b>LA2050 Navigator:</b><br> Welcome to the LA2050 ideas hub! How can I help you today?"}], type="messages")
     state = gr.State([])
-    message = gr.Textbox(placeholder="Type a message", scale=3, show_label=False)
-    message.submit(
         message_and_history,
-        inputs=[message, state],
         outputs=[chatbot, state]
     ).then(
-        lambda: "", inputs=[], outputs=message
     )
-block.launch(debug=True,share=True)

 from dotenv import load_dotenv
 from langchain_community.document_loaders import JSONLoader
 from pathlib import Path
 import re
 import json
+# Import Document from your LangChain module.
+# (If your version of LangChain uses a different path, update accordingly.)
+from langchain_core.documents import Document
+# Import additional libraries from LangChain
 from langchain_chroma import Chroma
 from langchain_openai import OpenAIEmbeddings
 from langchain_community.retrievers import BM25Retriever
 from langchain.chains.combine_documents import create_stuff_documents_chain
 from langchain_core.prompts import ChatPromptTemplate
+# Load environment variables for Hugging Face and OpenAI
 load_dotenv()
 os.environ['LANGCHAIN_API_KEY'] = os.getenv('LANGCHAIN_API_KEY')
 os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY')
+# -------------------------------
+# Utility Functions
+# -------------------------------
 def extract_metadata(text: str) -> dict:
+    """Extracts URLs and social handles from the given text."""
     metadata = {}
     urls = re.findall(r"(Website|Volunteer|Newsletter):\s*(https?://\S+)", text)
     for key, url in urls:
         metadata[f"{platform.lower()}_handle"] = f"https://{platform.lower()}.com/{handle}"
     return metadata
 def load_and_process_data(file_path: str):
+    """Loads JSON data from a file, extracts organization text and metadata, and returns a list of Documents."""
     try:
         data = json.loads(Path(file_path).read_text(encoding='utf-8'))
         docs = []
         print(f"Error loading JSON: {e}")
         return []
+# -------------------------------
+# Data Loading and Preprocessing
+# -------------------------------
+file_path = './2024data.json'  # Ensure this file is available in your environment.
 docs = load_and_process_data(file_path)
+# Use a text splitter to create chunks from the documents
 from langchain_text_splitters import RecursiveCharacterTextSplitter
 text_splitter = RecursiveCharacterTextSplitter(
     chunk_size=1000,
 )
 all_splits = text_splitter.split_documents(docs)
+# -------------------------------
+# Set Up Retrievers
+# -------------------------------
+# Create a Chroma vector store using the document splits
+vectorstore = Chroma.from_documents(
+    documents=all_splits,
+    embedding=OpenAIEmbeddings(),
+    persist_directory="./chroma_db"
+)
+# Create a BM25 retriever from the document splits
 bm25_retriever = BM25Retriever.from_documents(all_splits)
+# Combine the retrievers using an ensemble approach
 ensemble_retriever = EnsembleRetriever(
     retrievers=[vectorstore.as_retriever(search_kwargs={"k": 4}), bm25_retriever],
     weights=[0.7, 0.3]
 )
 retriever = ensemble_retriever
+# -------------------------------
+# Prepare Retrieval and Generation Chain
+# -------------------------------
+# Pull the prompt from the hub; ensure that the prompt exists at the specified location
 prompt = hub.pull("rlm/rag-prompt")
+# Initialize the language model (adjust the model name as needed)
+llm = ChatOpenAI(model_name="gpt-4o-mini", temperature=0)
+# Create the document chain (the "stuff" chain that combines retrieved documents)
 question_answer_chain = create_stuff_documents_chain(llm, prompt)
+# Create the retrieval augmented generation (RAG) chain using the retriever and document chain
+rag_chain = create_retrieval_chain(retriever, question_answer_chain)
+# -------------------------------
+# Define the Chat Callback Function
+# -------------------------------
+def message_and_history(user_message, history):
+    """
+    Processes the user input, performs retrieval and generation,
+    and updates the conversation history.
+    """
+    # Initialize history if empty
+    if not history:
+        history = [{"role": "assistant", "content": "<b>LA2050 Navigator:</b><br> Welcome to the LA2050 ideas hub! How can I help you today?"}]
+    # Append the user's message to history
+    history.append({"role": "user", "content": user_message})
+    # Simulate a brief delay (optional)
     time.sleep(1)
+    # If the input is empty, return an error message
+    if not user_message.strip():
         history.append({"role": "assistant", "content": "<b>LA2050 Navigator:</b><br> Please enter a valid message."})
         yield history, history
         return
     try:
+        # Invoke the RAG chain with the user's input
+        response = rag_chain.invoke({"input": user_message})
         answer = response["answer"]
     except Exception as e:
         answer = f"An error occurred: {e}"
+    # Prepare a dynamic response that simulates streaming text
     dynamic_message = {"role": "assistant", "content": "<b>LA2050 Navigator:</b><br> "}
     history.append(dynamic_message)
+    # Stream the answer character by character (this loop yields intermediate updates)
     for character in answer:
         dynamic_message["content"] += character
         yield history, history
+    # Finalize the answer and yield the final history
     history[-1]["content"] = f"<b>LA2050 Navigator:</b><br> {answer}"
     yield history, history
+# -------------------------------
+# Set Up the Gradio Interface
+# -------------------------------
+# Define a custom green theme for the interface
+green_theme = gr.themes.Base(
+    primary_hue=gr.themes.Color(
+        c50="#00A168", c100="#57B485", c200="#D7ECE0", c300="#FFFFFF",
+        c400="#EAE9E9", c500="#000000", c600="#3A905E", c700="#2A774A",
+        c800="#1A5E36", c900="#0A4512", c950="#052A08"
+    )
+)
 with gr.Blocks(theme=green_theme) as block:
     gr.HTML('<div class="chat-header"><h1>LA2050 Navigator</h1></div>')
+    # Initialize the chatbot with a welcome message
+    chatbot = gr.Chatbot(
+        value=[{"role": "assistant", "content": "<b>LA2050 Navigator:</b><br> Welcome to the LA2050 ideas hub! How can I help you today?"}],
+        type="messages"
+    )
+    # Use a Gradio State to keep track of the conversation history
     state = gr.State([])
+    # Textbox for user input
+    user_input_box = gr.Textbox(placeholder="Type a message", scale=3, show_label=False)
+    # When the textbox is submitted, run the callback function
+    user_input_box.submit(
         message_and_history,
+        inputs=[user_input_box, state],
         outputs=[chatbot, state]
     ).then(
+        lambda: "", inputs=[], outputs=user_input_box
     )
+block.launch(debug=True, share=True)