Spaces:
Sleeping
Sleeping
File size: 2,984 Bytes
f8bf7df |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
import gradio as gr
import os
import openai
import re
import html
from typing import List, Tuple
from langchain_chroma import Chroma # โ
Updated import
from utils.load_config import LoadConfig
# Load configuration
APPCFG = LoadConfig()
# OpenAI Client
client = openai.OpenAI()
class ChatBot:
"""
Chatbot class that only retrieves answers from a pre-uploaded document stored in ChromaDB.
"""
@staticmethod
def respond(chatbot: List, message: str) -> Tuple:
"""
Retrieve an answer strictly from the pre-uploaded document.
If no relevant information is found, return a "no answer found" message.
"""
# Ensure the pre-processed document database exists
if not os.path.exists(APPCFG.persist_directory):
chatbot.append({"role": "assistant", "content": "โ ๏ธ No document database found. Please ensure the First Aid PDF is preloaded."})
return "", chatbot
# Load ChromaDB with stored document embeddings
vectordb = Chroma(persist_directory=APPCFG.persist_directory, embedding_function=APPCFG.embedding_model)
docs = vectordb.similarity_search(message, k=APPCFG.k)
if not docs:
chatbot.append({"role": "assistant", "content": "โ ๏ธ No relevant answer found in the document."})
return "", chatbot
# Extract retrieved content
retrieved_content = ChatBot.clean_references(docs)
chat_history = f"Chat history:\n {str(chatbot[-APPCFG.number_of_q_a_pairs:])}\n\n"
prompt = f"{chat_history}{retrieved_content}# User question:\n{message}"
# Generate response using OpenAI GPT
response = client.chat.completions.create(
model=APPCFG.llm_engine,
messages=[
{"role": "system", "content": "Answer only using the First Aid document. If unsure, say 'I don't know'."},
{"role": "user", "content": prompt}
]
)
chatbot.append({"role": "user", "content": message}) # โ
User input
chatbot.append({"role": "assistant", "content": response.choices[0].message.content}) # โ
AI response
return "", chatbot
@staticmethod
def clean_references(documents: List) -> str:
"""
Extract and format relevant content from retrieved documents.
"""
cleaned_content = []
for doc in documents:
# โ
Ensure correct extraction of content and metadata
content = doc.page_content
metadata = doc.metadata
# โ
Handle missing metadata safely
source = metadata.get('source', 'Unknown source')
page_number = metadata.get('page', 'Unknown page')
cleaned_content.append(
f"๐ Page {page_number}: {content}\n๐ Source: {source}\n"
)
return "\n".join(cleaned_content)
|