cryogenic22 commited on
Commit
90959eb
·
verified ·
1 Parent(s): 0a67c2d

Update utils/database.py

Browse files
Files changed (1) hide show
  1. utils/database.py +34 -56
utils/database.py CHANGED
@@ -401,13 +401,6 @@ def add_query(conn: sqlite3.Connection, query: str, response: str, document_id:
401
  st.error(f"Error adding query: {e}")
402
  return False
403
 
404
- # Add to utils/database.py
405
-
406
- import sqlite3
407
- from typing import List, Dict, Optional
408
- from datetime import datetime
409
- from langchain_core.messages import HumanMessage, AIMessage
410
- import streamlit as st
411
 
412
  def create_chat_tables(conn: sqlite3.Connection) -> None:
413
  """Create necessary tables for chat management."""
@@ -1656,15 +1649,25 @@ def get_document_content(conn: sqlite3.Connection, document_id: int) -> Optional
1656
  st.error(f"Error retrieving document content: {e}")
1657
  return None
1658
 
1659
- def initialize_qa_system(vector_store):
1660
- """
1661
- Initialize QA system with optimized retrieval.
 
 
 
 
 
 
 
1662
 
1663
- Args:
1664
- vector_store (FAISS): FAISS vector store instance.
1665
- Returns:
1666
- dict: QA system chain or None if initialization fails.
1667
- """
 
 
 
1668
  try:
1669
  llm = ChatOpenAI(
1670
  temperature=0.5,
@@ -1673,60 +1676,35 @@ def initialize_qa_system(vector_store):
1673
  api_key=os.environ.get("OPENAI_API_KEY")
1674
  )
1675
 
1676
- # Optimize retriever settings and add source tracking
1677
  retriever = vector_store.as_retriever(
1678
  search_kwargs={
1679
- "k": 3, # Retrieve fewer, more relevant chunks
1680
- "fetch_k": 5, # Consider more candidates before selecting top k
1681
- "include_metadata": True # Enable source tracking
1682
  }
1683
  )
1684
 
1685
- # Create a template that enforces clean formatting
1686
  prompt = ChatPromptTemplate.from_messages([
1687
  ("system", """
1688
- You are an expert consultant specializing in analyzing Request for Proposal (RFP) documents. Your goal is to assist users by providing clear, concise, and professional insights based on the content provided. Please adhere to the following guidelines when crafting your responses:
1689
 
1690
  Begin with a summary that highlights the key findings or answers the main query.
1691
-
1692
- Structured Format: Use clear and descriptive section headers to organize the information logically.
1693
-
1694
- Bullet Points: Utilize bullet points for lists or complex information to enhance readability.
1695
-
1696
- Source Attribution: Cite specific sections or page numbers from the RFP document when referencing information.
1697
-
1698
- Professional Formatting: Maintain a clean and professional layout using Markdown formatting, such as headings, bullet points, bold, italics, and tables where appropriate.
1699
-
1700
- Use Markdown Syntax: Ensure the response is fully formatted using Markdown for optimal readability in the chat.
1701
-
1702
- Focused Content: Keep your responses concise and directly related to the user's query, avoiding unnecessary information.
1703
-
1704
- Scope Awareness: If a query falls outside the provided information or context, politely acknowledge this and suggest consulting the relevant sections or additional sources.
1705
-
1706
- Confidentiality: Respect the confidentiality of the information provided and avoid sharing any sensitive data beyond the scope of the query.
1707
-
1708
- Tone and Language: Use formal and professional language, ensuring clarity and precision in your responses.
1709
-
1710
- Accuracy: Double-check all information for accuracy and completeness before providing it to the user.
1711
- """),
1712
  MessagesPlaceholder(variable_name="chat_history"),
1713
  ("human", "{input}\n\nContext: {context}")
1714
  ])
1715
-
1716
- def get_chat_history(inputs):
1717
- chat_history = inputs.get("chat_history", [])
1718
- if not isinstance(chat_history, list):
1719
- return []
1720
- return [msg for msg in chat_history if isinstance(msg, BaseMessage)]
1721
-
1722
- def get_context(inputs):
1723
- docs = retriever.get_relevant_documents(inputs["input"])
1724
- context_parts = []
1725
- for doc in docs:
1726
- source = doc.metadata.get('source', 'Unknown source')
1727
- context_parts.append(f"\nFrom {source}:\n{doc.page_content}")
1728
- return "\n".join(context_parts)
1729
 
 
1730
  chain = (
1731
  {
1732
  "context": lambda x: get_context_with_sources(retriever, x["input"]),
 
401
  st.error(f"Error adding query: {e}")
402
  return False
403
 
 
 
 
 
 
 
 
404
 
405
  def create_chat_tables(conn: sqlite3.Connection) -> None:
406
  """Create necessary tables for chat management."""
 
1649
  st.error(f"Error retrieving document content: {e}")
1650
  return None
1651
 
1652
+ def get_context_with_sources(retriever, query):
1653
+ """Get context with source documents."""
1654
+ docs = retriever.get_relevant_documents(query)
1655
+ formatted_docs = []
1656
+
1657
+ for doc in docs:
1658
+ source = doc.metadata.get('source', 'Unknown source')
1659
+ formatted_docs.append(f"\nFrom {source}:\n{doc.page_content}")
1660
+
1661
+ return "\n".join(formatted_docs)
1662
 
1663
+ def format_chat_history(chat_history):
1664
+ """Format chat history for the prompt."""
1665
+ if not chat_history or not isinstance(chat_history, list):
1666
+ return []
1667
+ return [msg for msg in chat_history if isinstance(msg, (HumanMessage, AIMessage))]
1668
+
1669
+ def initialize_qa_system(vector_store):
1670
+ """Initialize QA system with optimized retrieval."""
1671
  try:
1672
  llm = ChatOpenAI(
1673
  temperature=0.5,
 
1676
  api_key=os.environ.get("OPENAI_API_KEY")
1677
  )
1678
 
1679
+ # Optimize retriever settings
1680
  retriever = vector_store.as_retriever(
1681
  search_kwargs={
1682
+ "k": 3,
1683
+ "fetch_k": 5,
1684
+ "include_metadata": True
1685
  }
1686
  )
1687
 
1688
+ # Create system prompt template
1689
  prompt = ChatPromptTemplate.from_messages([
1690
  ("system", """
1691
+ You are an expert consultant specializing in analyzing Request for Proposal (RFP) documents. Your goal is to assist users by providing clear, concise, and professional insights based on the content provided. Please adhere to the following guidelines:
1692
 
1693
  Begin with a summary that highlights the key findings or answers the main query.
1694
+ Use clear section headers to organize information logically.
1695
+ Utilize bullet points for lists or complex information.
1696
+ Cite specific sections or page numbers from the RFP document when referencing information.
1697
+ Maintain professional formatting using Markdown.
1698
+ Keep responses focused and directly related to the query.
1699
+ Acknowledge when information falls outside the provided context.
1700
+ Use formal and professional language.
1701
+ Ensure accuracy and completeness in responses.
1702
+ """),
 
 
 
 
 
 
 
 
 
 
 
 
1703
  MessagesPlaceholder(variable_name="chat_history"),
1704
  ("human", "{input}\n\nContext: {context}")
1705
  ])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1706
 
1707
+ # Create the chain
1708
  chain = (
1709
  {
1710
  "context": lambda x: get_context_with_sources(retriever, x["input"]),