Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import os | |
| import openai | |
| import re | |
| import html | |
| from typing import List, Tuple | |
| from langchain_chroma import Chroma # β Updated import | |
| from utils.load_config import LoadConfig | |
| # Load configuration | |
| APPCFG = LoadConfig() | |
| # OpenAI Client | |
| client = openai.OpenAI() | |
| class ChatBot: | |
| """ | |
| Chatbot class that only retrieves answers from a pre-uploaded document stored in ChromaDB. | |
| """ | |
| def respond(chatbot: List, message: str) -> Tuple: | |
| """ | |
| Retrieve an answer strictly from the pre-uploaded document. | |
| If no relevant information is found, return a "no answer found" message. | |
| """ | |
| # Ensure the pre-processed document database exists | |
| if not os.path.exists(APPCFG.persist_directory): | |
| chatbot.append({"role": "assistant", "content": "β οΈ No document database found. Please ensure the First Aid PDF is preloaded."}) | |
| return "", chatbot | |
| # Load ChromaDB with stored document embeddings | |
| vectordb = Chroma(persist_directory=APPCFG.persist_directory, embedding_function=APPCFG.embedding_model) | |
| docs = vectordb.similarity_search(message, k=APPCFG.k) | |
| if not docs: | |
| chatbot.append({"role": "assistant", "content": "β οΈ No relevant answer found in the document."}) | |
| return "", chatbot | |
| # Extract retrieved content | |
| retrieved_content = ChatBot.clean_references(docs) | |
| chat_history = f"Chat history:\n {str(chatbot[-APPCFG.number_of_q_a_pairs:])}\n\n" | |
| prompt = f"{chat_history}{retrieved_content}# User question:\n{message}" | |
| # Generate response using OpenAI GPT | |
| response = client.chat.completions.create( | |
| model=APPCFG.llm_engine, | |
| messages=[ | |
| {"role": "system", "content": "Answer only using the First Aid document. If unsure, say 'I don't know'."}, | |
| {"role": "user", "content": prompt} | |
| ] | |
| ) | |
| chatbot.append({"role": "user", "content": message}) # β User input | |
| chatbot.append({"role": "assistant", "content": response.choices[0].message.content}) # β AI response | |
| return "", chatbot | |
| def clean_references(documents: List) -> str: | |
| """ | |
| Extract and format relevant content from retrieved documents. | |
| """ | |
| cleaned_content = [] | |
| for doc in documents: | |
| # β Ensure correct extraction of content and metadata | |
| content = doc.page_content | |
| metadata = doc.metadata | |
| # β Handle missing metadata safely | |
| source = metadata.get('source', 'Unknown source') | |
| page_number = metadata.get('page', 'Unknown page') | |
| cleaned_content.append( | |
| f"π Page {page_number}: {content}\nπ Source: {source}\n" | |
| ) | |
| return "\n".join(cleaned_content) | |