Spaces:
Build error
Build error
Update utils/database.py
Browse files- utils/database.py +34 -56
utils/database.py
CHANGED
|
@@ -401,13 +401,6 @@ def add_query(conn: sqlite3.Connection, query: str, response: str, document_id:
|
|
| 401 |
st.error(f"Error adding query: {e}")
|
| 402 |
return False
|
| 403 |
|
| 404 |
-
# Add to utils/database.py
|
| 405 |
-
|
| 406 |
-
import sqlite3
|
| 407 |
-
from typing import List, Dict, Optional
|
| 408 |
-
from datetime import datetime
|
| 409 |
-
from langchain_core.messages import HumanMessage, AIMessage
|
| 410 |
-
import streamlit as st
|
| 411 |
|
| 412 |
def create_chat_tables(conn: sqlite3.Connection) -> None:
|
| 413 |
"""Create necessary tables for chat management."""
|
|
@@ -1656,15 +1649,25 @@ def get_document_content(conn: sqlite3.Connection, document_id: int) -> Optional
|
|
| 1656 |
st.error(f"Error retrieving document content: {e}")
|
| 1657 |
return None
|
| 1658 |
|
| 1659 |
-
def
|
| 1660 |
-
"""
|
| 1661 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1662 |
|
| 1663 |
-
|
| 1664 |
-
|
| 1665 |
-
|
| 1666 |
-
|
| 1667 |
-
|
|
|
|
|
|
|
|
|
|
| 1668 |
try:
|
| 1669 |
llm = ChatOpenAI(
|
| 1670 |
temperature=0.5,
|
|
@@ -1673,60 +1676,35 @@ def initialize_qa_system(vector_store):
|
|
| 1673 |
api_key=os.environ.get("OPENAI_API_KEY")
|
| 1674 |
)
|
| 1675 |
|
| 1676 |
-
# Optimize retriever settings
|
| 1677 |
retriever = vector_store.as_retriever(
|
| 1678 |
search_kwargs={
|
| 1679 |
-
"k": 3,
|
| 1680 |
-
"fetch_k": 5,
|
| 1681 |
-
"include_metadata": True
|
| 1682 |
}
|
| 1683 |
)
|
| 1684 |
|
| 1685 |
-
# Create
|
| 1686 |
prompt = ChatPromptTemplate.from_messages([
|
| 1687 |
("system", """
|
| 1688 |
-
You are an expert consultant specializing in analyzing Request for Proposal (RFP) documents. Your goal is to assist users by providing clear, concise, and professional insights based on the content provided. Please adhere to the following guidelines
|
| 1689 |
|
| 1690 |
Begin with a summary that highlights the key findings or answers the main query.
|
| 1691 |
-
|
| 1692 |
-
|
| 1693 |
-
|
| 1694 |
-
|
| 1695 |
-
|
| 1696 |
-
|
| 1697 |
-
|
| 1698 |
-
|
| 1699 |
-
|
| 1700 |
-
Use Markdown Syntax: Ensure the response is fully formatted using Markdown for optimal readability in the chat.
|
| 1701 |
-
|
| 1702 |
-
Focused Content: Keep your responses concise and directly related to the user's query, avoiding unnecessary information.
|
| 1703 |
-
|
| 1704 |
-
Scope Awareness: If a query falls outside the provided information or context, politely acknowledge this and suggest consulting the relevant sections or additional sources.
|
| 1705 |
-
|
| 1706 |
-
Confidentiality: Respect the confidentiality of the information provided and avoid sharing any sensitive data beyond the scope of the query.
|
| 1707 |
-
|
| 1708 |
-
Tone and Language: Use formal and professional language, ensuring clarity and precision in your responses.
|
| 1709 |
-
|
| 1710 |
-
Accuracy: Double-check all information for accuracy and completeness before providing it to the user.
|
| 1711 |
-
"""),
|
| 1712 |
MessagesPlaceholder(variable_name="chat_history"),
|
| 1713 |
("human", "{input}\n\nContext: {context}")
|
| 1714 |
])
|
| 1715 |
-
|
| 1716 |
-
def get_chat_history(inputs):
|
| 1717 |
-
chat_history = inputs.get("chat_history", [])
|
| 1718 |
-
if not isinstance(chat_history, list):
|
| 1719 |
-
return []
|
| 1720 |
-
return [msg for msg in chat_history if isinstance(msg, BaseMessage)]
|
| 1721 |
-
|
| 1722 |
-
def get_context(inputs):
|
| 1723 |
-
docs = retriever.get_relevant_documents(inputs["input"])
|
| 1724 |
-
context_parts = []
|
| 1725 |
-
for doc in docs:
|
| 1726 |
-
source = doc.metadata.get('source', 'Unknown source')
|
| 1727 |
-
context_parts.append(f"\nFrom {source}:\n{doc.page_content}")
|
| 1728 |
-
return "\n".join(context_parts)
|
| 1729 |
|
|
|
|
| 1730 |
chain = (
|
| 1731 |
{
|
| 1732 |
"context": lambda x: get_context_with_sources(retriever, x["input"]),
|
|
|
|
| 401 |
st.error(f"Error adding query: {e}")
|
| 402 |
return False
|
| 403 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 404 |
|
| 405 |
def create_chat_tables(conn: sqlite3.Connection) -> None:
|
| 406 |
"""Create necessary tables for chat management."""
|
|
|
|
| 1649 |
st.error(f"Error retrieving document content: {e}")
|
| 1650 |
return None
|
| 1651 |
|
| 1652 |
+
def get_context_with_sources(retriever, query):
|
| 1653 |
+
"""Get context with source documents."""
|
| 1654 |
+
docs = retriever.get_relevant_documents(query)
|
| 1655 |
+
formatted_docs = []
|
| 1656 |
+
|
| 1657 |
+
for doc in docs:
|
| 1658 |
+
source = doc.metadata.get('source', 'Unknown source')
|
| 1659 |
+
formatted_docs.append(f"\nFrom {source}:\n{doc.page_content}")
|
| 1660 |
+
|
| 1661 |
+
return "\n".join(formatted_docs)
|
| 1662 |
|
| 1663 |
+
def format_chat_history(chat_history):
|
| 1664 |
+
"""Format chat history for the prompt."""
|
| 1665 |
+
if not chat_history or not isinstance(chat_history, list):
|
| 1666 |
+
return []
|
| 1667 |
+
return [msg for msg in chat_history if isinstance(msg, (HumanMessage, AIMessage))]
|
| 1668 |
+
|
| 1669 |
+
def initialize_qa_system(vector_store):
|
| 1670 |
+
"""Initialize QA system with optimized retrieval."""
|
| 1671 |
try:
|
| 1672 |
llm = ChatOpenAI(
|
| 1673 |
temperature=0.5,
|
|
|
|
| 1676 |
api_key=os.environ.get("OPENAI_API_KEY")
|
| 1677 |
)
|
| 1678 |
|
| 1679 |
+
# Optimize retriever settings
|
| 1680 |
retriever = vector_store.as_retriever(
|
| 1681 |
search_kwargs={
|
| 1682 |
+
"k": 3,
|
| 1683 |
+
"fetch_k": 5,
|
| 1684 |
+
"include_metadata": True
|
| 1685 |
}
|
| 1686 |
)
|
| 1687 |
|
| 1688 |
+
# Create system prompt template
|
| 1689 |
prompt = ChatPromptTemplate.from_messages([
|
| 1690 |
("system", """
|
| 1691 |
+
You are an expert consultant specializing in analyzing Request for Proposal (RFP) documents. Your goal is to assist users by providing clear, concise, and professional insights based on the content provided. Please adhere to the following guidelines:
|
| 1692 |
|
| 1693 |
Begin with a summary that highlights the key findings or answers the main query.
|
| 1694 |
+
Use clear section headers to organize information logically.
|
| 1695 |
+
Utilize bullet points for lists or complex information.
|
| 1696 |
+
Cite specific sections or page numbers from the RFP document when referencing information.
|
| 1697 |
+
Maintain professional formatting using Markdown.
|
| 1698 |
+
Keep responses focused and directly related to the query.
|
| 1699 |
+
Acknowledge when information falls outside the provided context.
|
| 1700 |
+
Use formal and professional language.
|
| 1701 |
+
Ensure accuracy and completeness in responses.
|
| 1702 |
+
"""),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1703 |
MessagesPlaceholder(variable_name="chat_history"),
|
| 1704 |
("human", "{input}\n\nContext: {context}")
|
| 1705 |
])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1706 |
|
| 1707 |
+
# Create the chain
|
| 1708 |
chain = (
|
| 1709 |
{
|
| 1710 |
"context": lambda x: get_context_with_sources(retriever, x["input"]),
|