37-AN
commited on
Commit
·
6c6cf17
1
Parent(s):
48a1a2b
Fix output keys error and file upload issues
Browse files- app/core/agent.py +68 -30
- app/core/memory.py +26 -7
- app/ui/streamlit_app.py +68 -35
- app/utils/helpers.py +111 -46
app/core/agent.py
CHANGED
|
@@ -1,8 +1,13 @@
|
|
| 1 |
import sys
|
| 2 |
import os
|
|
|
|
| 3 |
from typing import List, Dict, Any
|
| 4 |
from langchain.prompts import PromptTemplate
|
| 5 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
# Add project root to path for imports
|
| 7 |
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
| 8 |
from app.core.memory import MemoryManager
|
|
@@ -35,39 +40,72 @@ Assistant:"""
|
|
| 35 |
input_variables=["context", "chat_history", "question"],
|
| 36 |
template=self.system_template
|
| 37 |
)
|
|
|
|
|
|
|
| 38 |
|
| 39 |
def query(self, question: str) -> Dict[str, Any]:
|
| 40 |
"""Process a user query and return a response."""
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
"
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
"sources":
|
| 62 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
|
| 64 |
def add_conversation_to_memory(self, question: str, answer: str):
|
| 65 |
"""Add a conversation exchange to the memory for future context."""
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import sys
|
| 2 |
import os
|
| 3 |
+
import logging
|
| 4 |
from typing import List, Dict, Any
|
| 5 |
from langchain.prompts import PromptTemplate
|
| 6 |
|
| 7 |
+
# Configure logging
|
| 8 |
+
logging.basicConfig(level=logging.INFO)
|
| 9 |
+
logger = logging.getLogger(__name__)
|
| 10 |
+
|
| 11 |
# Add project root to path for imports
|
| 12 |
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
| 13 |
from app.core.memory import MemoryManager
|
|
|
|
| 40 |
input_variables=["context", "chat_history", "question"],
|
| 41 |
template=self.system_template
|
| 42 |
)
|
| 43 |
+
|
| 44 |
+
logger.info("AssistantAgent initialized successfully")
|
| 45 |
|
| 46 |
def query(self, question: str) -> Dict[str, Any]:
|
| 47 |
"""Process a user query and return a response."""
|
| 48 |
+
try:
|
| 49 |
+
logger.info(f"Processing query: {question[:50]}...")
|
| 50 |
+
|
| 51 |
+
# Use the RAG chain to get an answer
|
| 52 |
+
response = self.rag_chain({"question": question})
|
| 53 |
+
|
| 54 |
+
# Extract the answer and source documents
|
| 55 |
+
logger.debug(f"RAG chain response keys: {response.keys()}")
|
| 56 |
+
|
| 57 |
+
if "answer" not in response:
|
| 58 |
+
logger.warning(f"Missing 'answer' key in response. Available keys: {response.keys()}")
|
| 59 |
+
# Create a fallback answer if the expected key is missing
|
| 60 |
+
answer = "I'm sorry, I encountered an issue processing your request. Let me try a simpler response."
|
| 61 |
+
else:
|
| 62 |
+
answer = response["answer"]
|
| 63 |
+
|
| 64 |
+
# Handle different variations of source document keys
|
| 65 |
+
source_docs = []
|
| 66 |
+
if "source_documents" in response:
|
| 67 |
+
source_docs = response["source_documents"]
|
| 68 |
+
elif "sources" in response:
|
| 69 |
+
source_docs = response["sources"]
|
| 70 |
+
|
| 71 |
+
# Format source documents for display
|
| 72 |
+
sources = []
|
| 73 |
+
for doc in source_docs:
|
| 74 |
+
metadata = getattr(doc, 'metadata', {})
|
| 75 |
+
page_content = getattr(doc, 'page_content', str(doc)[:100])
|
| 76 |
+
|
| 77 |
+
sources.append({
|
| 78 |
+
"content": page_content[:100] + "..." if len(page_content) > 100 else page_content,
|
| 79 |
+
"source": metadata.get("source", "Unknown"),
|
| 80 |
+
"file_name": metadata.get("file_name", "Unknown"),
|
| 81 |
+
"page": metadata.get("page", "N/A") if "page" in metadata else None
|
| 82 |
+
})
|
| 83 |
+
|
| 84 |
+
logger.info(f"Query processed successfully with {len(sources)} sources")
|
| 85 |
+
return {
|
| 86 |
+
"answer": answer,
|
| 87 |
+
"sources": sources
|
| 88 |
+
}
|
| 89 |
+
except Exception as e:
|
| 90 |
+
logger.error(f"Error in query method: {str(e)}")
|
| 91 |
+
# Return a graceful fallback response
|
| 92 |
+
return {
|
| 93 |
+
"answer": f"I encountered an error while processing your question. Error details: {str(e)}",
|
| 94 |
+
"sources": []
|
| 95 |
+
}
|
| 96 |
|
| 97 |
def add_conversation_to_memory(self, question: str, answer: str):
|
| 98 |
"""Add a conversation exchange to the memory for future context."""
|
| 99 |
+
try:
|
| 100 |
+
# Create metadata for the conversation
|
| 101 |
+
metadata = {
|
| 102 |
+
"type": "conversation",
|
| 103 |
+
"question": question
|
| 104 |
+
}
|
| 105 |
+
|
| 106 |
+
# Add the exchange to the vector store
|
| 107 |
+
logger.info("Adding conversation to memory")
|
| 108 |
+
self.memory_manager.add_texts([answer], [metadata])
|
| 109 |
+
except Exception as e:
|
| 110 |
+
logger.error(f"Error adding conversation to memory: {str(e)}")
|
| 111 |
+
# Silently fail - this is not critical for the user experience
|
app/core/memory.py
CHANGED
|
@@ -116,13 +116,32 @@ class MemoryManager:
|
|
| 116 |
|
| 117 |
def create_rag_chain(self):
|
| 118 |
"""Create a RAG chain for question answering."""
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 126 |
|
| 127 |
def add_texts(self, texts, metadatas=None):
|
| 128 |
"""Add texts to the vector store."""
|
|
|
|
| 116 |
|
| 117 |
def create_rag_chain(self):
|
| 118 |
"""Create a RAG chain for question answering."""
|
| 119 |
+
try:
|
| 120 |
+
# Configure correct return keys to match what agent.py expects
|
| 121 |
+
logger.info("Creating ConversationalRetrievalChain")
|
| 122 |
+
chain = ConversationalRetrievalChain.from_llm(
|
| 123 |
+
llm=self.llm,
|
| 124 |
+
retriever=self.get_retriever(),
|
| 125 |
+
memory=self.memory,
|
| 126 |
+
return_source_documents=True,
|
| 127 |
+
return_generated_question=False,
|
| 128 |
+
)
|
| 129 |
+
return chain
|
| 130 |
+
except Exception as e:
|
| 131 |
+
logger.error(f"Error creating RAG chain: {e}")
|
| 132 |
+
|
| 133 |
+
# Create a mock chain as fallback
|
| 134 |
+
logger.warning("Using fallback mock chain")
|
| 135 |
+
|
| 136 |
+
# Create a simple function that mimics the chain's interface
|
| 137 |
+
def mock_chain(inputs):
|
| 138 |
+
logger.info(f"Mock chain received query: {inputs.get('question', '')}")
|
| 139 |
+
return {
|
| 140 |
+
"answer": "I'm having trouble accessing the knowledge base. I can only answer general questions right now.",
|
| 141 |
+
"source_documents": []
|
| 142 |
+
}
|
| 143 |
+
|
| 144 |
+
return mock_chain
|
| 145 |
|
| 146 |
def add_texts(self, texts, metadatas=None):
|
| 147 |
"""Add texts to the vector store."""
|
app/ui/streamlit_app.py
CHANGED
|
@@ -18,14 +18,14 @@ sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(
|
|
| 18 |
try:
|
| 19 |
from app.core.agent import AssistantAgent
|
| 20 |
from app.core.ingestion import DocumentProcessor
|
| 21 |
-
from app.utils.helpers import get_document_path, format_sources, save_conversation
|
| 22 |
from app.config import LLM_MODEL, EMBEDDING_MODEL
|
| 23 |
except ImportError:
|
| 24 |
# Fallback to direct imports if app is not recognized as a package
|
| 25 |
sys.path.append(os.path.abspath('.'))
|
| 26 |
from app.core.agent import AssistantAgent
|
| 27 |
from app.core.ingestion import DocumentProcessor
|
| 28 |
-
from app.utils.helpers import get_document_path, format_sources, save_conversation
|
| 29 |
from app.config import LLM_MODEL, EMBEDDING_MODEL
|
| 30 |
|
| 31 |
# Set page config
|
|
@@ -89,33 +89,56 @@ st.title("🤗 Personal AI Assistant (Hugging Face)")
|
|
| 89 |
# Create a sidebar for uploading documents and settings
|
| 90 |
with st.sidebar:
|
| 91 |
st.header("Upload Documents")
|
| 92 |
-
uploaded_file = st.file_uploader("Choose a file", type=["pdf", "txt", "csv"])
|
| 93 |
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
tmp.write(uploaded_file.getvalue())
|
| 98 |
-
tmp_path = tmp.name
|
| 99 |
|
| 100 |
-
if
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 119 |
|
| 120 |
st.header("Raw Text Input")
|
| 121 |
text_input = st.text_area("Enter text to add to the knowledge base")
|
|
@@ -135,6 +158,7 @@ with st.sidebar:
|
|
| 135 |
|
| 136 |
st.success("Text added to knowledge base successfully!")
|
| 137 |
except Exception as e:
|
|
|
|
| 138 |
st.error(f"Error adding text: {str(e)}")
|
| 139 |
|
| 140 |
# Display model information
|
|
@@ -166,8 +190,9 @@ for message in st.session_state.messages:
|
|
| 166 |
sources = message["sources"]
|
| 167 |
if sources:
|
| 168 |
for i, source in enumerate(sources, 1):
|
| 169 |
-
st.write(f"{i}. {source
|
| 170 |
-
|
|
|
|
| 171 |
else:
|
| 172 |
st.write("No specific sources used.")
|
| 173 |
|
|
@@ -197,8 +222,9 @@ if prompt := st.chat_input("Ask a question..."):
|
|
| 197 |
else:
|
| 198 |
raise
|
| 199 |
|
| 200 |
-
answer
|
| 201 |
-
|
|
|
|
| 202 |
|
| 203 |
# Display the response
|
| 204 |
st.write(answer)
|
|
@@ -207,13 +233,17 @@ if prompt := st.chat_input("Ask a question..."):
|
|
| 207 |
with st.expander("View Sources"):
|
| 208 |
if sources:
|
| 209 |
for i, source in enumerate(sources, 1):
|
| 210 |
-
st.write(f"{i}. {source
|
| 211 |
-
|
|
|
|
| 212 |
else:
|
| 213 |
st.write("No specific sources used.")
|
| 214 |
|
| 215 |
# Save conversation
|
| 216 |
-
|
|
|
|
|
|
|
|
|
|
| 217 |
|
| 218 |
# Add assistant response to chat history
|
| 219 |
st.session_state.messages.append({
|
|
@@ -223,7 +253,10 @@ if prompt := st.chat_input("Ask a question..."):
|
|
| 223 |
})
|
| 224 |
|
| 225 |
# Update the agent's memory
|
| 226 |
-
|
|
|
|
|
|
|
|
|
|
| 227 |
|
| 228 |
except Exception as e:
|
| 229 |
error_msg = f"Error generating response: {str(e)}"
|
|
|
|
| 18 |
try:
|
| 19 |
from app.core.agent import AssistantAgent
|
| 20 |
from app.core.ingestion import DocumentProcessor
|
| 21 |
+
from app.utils.helpers import get_document_path, format_sources, save_conversation, copy_uploaded_file
|
| 22 |
from app.config import LLM_MODEL, EMBEDDING_MODEL
|
| 23 |
except ImportError:
|
| 24 |
# Fallback to direct imports if app is not recognized as a package
|
| 25 |
sys.path.append(os.path.abspath('.'))
|
| 26 |
from app.core.agent import AssistantAgent
|
| 27 |
from app.core.ingestion import DocumentProcessor
|
| 28 |
+
from app.utils.helpers import get_document_path, format_sources, save_conversation, copy_uploaded_file
|
| 29 |
from app.config import LLM_MODEL, EMBEDDING_MODEL
|
| 30 |
|
| 31 |
# Set page config
|
|
|
|
| 89 |
# Create a sidebar for uploading documents and settings
|
| 90 |
with st.sidebar:
|
| 91 |
st.header("Upload Documents")
|
|
|
|
| 92 |
|
| 93 |
+
# Add file uploader with error handling
|
| 94 |
+
try:
|
| 95 |
+
uploaded_file = st.file_uploader("Choose a file", type=["pdf", "txt", "csv"])
|
|
|
|
|
|
|
| 96 |
|
| 97 |
+
if uploaded_file is not None:
|
| 98 |
+
# Handle the uploaded file
|
| 99 |
+
if st.button("Process Document"):
|
| 100 |
+
with st.spinner("Processing document..."):
|
| 101 |
+
try:
|
| 102 |
+
# Create a temporary file with proper error handling
|
| 103 |
+
temp_dir = tempfile.gettempdir()
|
| 104 |
+
temp_path = os.path.join(temp_dir, uploaded_file.name)
|
| 105 |
+
|
| 106 |
+
logger.info(f"Saving uploaded file to temporary path: {temp_path}")
|
| 107 |
+
|
| 108 |
+
# Write the file data to the temporary file
|
| 109 |
+
with open(temp_path, "wb") as temp_file:
|
| 110 |
+
temp_file.write(uploaded_file.getvalue())
|
| 111 |
+
|
| 112 |
+
# Get a path to store the document permanently
|
| 113 |
+
doc_path = get_document_path(uploaded_file.name)
|
| 114 |
+
|
| 115 |
+
# Copy the file to the documents directory
|
| 116 |
+
logger.info(f"Copying file to documents directory: {doc_path}")
|
| 117 |
+
copy_success = copy_uploaded_file(temp_path, doc_path)
|
| 118 |
+
|
| 119 |
+
if not copy_success:
|
| 120 |
+
logger.warning("Using temporary file path instead of documents directory")
|
| 121 |
+
doc_path = temp_path
|
| 122 |
+
|
| 123 |
+
# Ingest the document
|
| 124 |
+
logger.info("Ingesting document")
|
| 125 |
+
document_processor.ingest_file(temp_path, {"original_name": uploaded_file.name})
|
| 126 |
+
|
| 127 |
+
# Clean up the temporary file if different from doc_path
|
| 128 |
+
if temp_path != doc_path and os.path.exists(temp_path):
|
| 129 |
+
try:
|
| 130 |
+
os.unlink(temp_path)
|
| 131 |
+
logger.info(f"Temporary file removed: {temp_path}")
|
| 132 |
+
except Exception as e:
|
| 133 |
+
logger.warning(f"Could not remove temporary file: {e}")
|
| 134 |
+
|
| 135 |
+
st.success(f"Document {uploaded_file.name} processed successfully!")
|
| 136 |
+
except Exception as e:
|
| 137 |
+
logger.error(f"Error processing document: {str(e)}")
|
| 138 |
+
st.error(f"Error processing document: {str(e)}")
|
| 139 |
+
except Exception as e:
|
| 140 |
+
logger.error(f"File uploader error: {str(e)}")
|
| 141 |
+
st.error(f"File upload functionality is currently unavailable: {str(e)}")
|
| 142 |
|
| 143 |
st.header("Raw Text Input")
|
| 144 |
text_input = st.text_area("Enter text to add to the knowledge base")
|
|
|
|
| 158 |
|
| 159 |
st.success("Text added to knowledge base successfully!")
|
| 160 |
except Exception as e:
|
| 161 |
+
logger.error(f"Error adding text: {str(e)}")
|
| 162 |
st.error(f"Error adding text: {str(e)}")
|
| 163 |
|
| 164 |
# Display model information
|
|
|
|
| 190 |
sources = message["sources"]
|
| 191 |
if sources:
|
| 192 |
for i, source in enumerate(sources, 1):
|
| 193 |
+
st.write(f"{i}. {source.get('file_name', 'Unknown')}" +
|
| 194 |
+
(f" (Page {source['page']})" if source.get('page') else ""))
|
| 195 |
+
st.text(source.get('content', 'No content available'))
|
| 196 |
else:
|
| 197 |
st.write("No specific sources used.")
|
| 198 |
|
|
|
|
| 222 |
else:
|
| 223 |
raise
|
| 224 |
|
| 225 |
+
# Extract answer and sources, with fallbacks if missing
|
| 226 |
+
answer = response.get("answer", "I couldn't generate a proper response.")
|
| 227 |
+
sources = response.get("sources", [])
|
| 228 |
|
| 229 |
# Display the response
|
| 230 |
st.write(answer)
|
|
|
|
| 233 |
with st.expander("View Sources"):
|
| 234 |
if sources:
|
| 235 |
for i, source in enumerate(sources, 1):
|
| 236 |
+
st.write(f"{i}. {source.get('file_name', 'Unknown')}" +
|
| 237 |
+
(f" (Page {source['page']})" if source.get('page') else ""))
|
| 238 |
+
st.text(source.get('content', 'No content available'))
|
| 239 |
else:
|
| 240 |
st.write("No specific sources used.")
|
| 241 |
|
| 242 |
# Save conversation
|
| 243 |
+
try:
|
| 244 |
+
save_conversation(prompt, answer, sources)
|
| 245 |
+
except Exception as save_error:
|
| 246 |
+
logger.error(f"Error saving conversation: {save_error}")
|
| 247 |
|
| 248 |
# Add assistant response to chat history
|
| 249 |
st.session_state.messages.append({
|
|
|
|
| 253 |
})
|
| 254 |
|
| 255 |
# Update the agent's memory
|
| 256 |
+
try:
|
| 257 |
+
agent.add_conversation_to_memory(prompt, answer)
|
| 258 |
+
except Exception as memory_error:
|
| 259 |
+
logger.error(f"Error adding to memory: {memory_error}")
|
| 260 |
|
| 261 |
except Exception as e:
|
| 262 |
error_msg = f"Error generating response: {str(e)}"
|
app/utils/helpers.py
CHANGED
|
@@ -1,67 +1,132 @@
|
|
| 1 |
import os
|
| 2 |
import sys
|
|
|
|
|
|
|
| 3 |
from datetime import datetime
|
| 4 |
from typing import List, Dict, Any
|
| 5 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
def sanitize_filename(filename: str) -> str:
|
| 7 |
"""Sanitize a filename by removing invalid characters."""
|
| 8 |
# Replace invalid characters with underscores
|
| 9 |
invalid_chars = '<>:"/\\|?*'
|
| 10 |
for char in invalid_chars:
|
| 11 |
filename = filename.replace(char, '_')
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
return filename
|
| 13 |
|
| 14 |
def get_document_path(filename: str) -> str:
|
| 15 |
"""Get the path to store a document."""
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
|
| 32 |
def format_sources(sources: List[Dict[str, Any]]) -> str:
|
| 33 |
"""Format source documents for display."""
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
|
| 46 |
def save_conversation(question: str, answer: str, sources: List[Dict[str, Any]]) -> str:
|
| 47 |
"""Save a conversation to a file."""
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import os
|
| 2 |
import sys
|
| 3 |
+
import logging
|
| 4 |
+
import shutil
|
| 5 |
from datetime import datetime
|
| 6 |
from typing import List, Dict, Any
|
| 7 |
|
| 8 |
+
# Configure logging
|
| 9 |
+
logging.basicConfig(level=logging.INFO)
|
| 10 |
+
logger = logging.getLogger(__name__)
|
| 11 |
+
|
| 12 |
def sanitize_filename(filename: str) -> str:
|
| 13 |
"""Sanitize a filename by removing invalid characters."""
|
| 14 |
# Replace invalid characters with underscores
|
| 15 |
invalid_chars = '<>:"/\\|?*'
|
| 16 |
for char in invalid_chars:
|
| 17 |
filename = filename.replace(char, '_')
|
| 18 |
+
# Limit filename length to avoid issues
|
| 19 |
+
if len(filename) > 200:
|
| 20 |
+
base, ext = os.path.splitext(filename)
|
| 21 |
+
filename = base[:195] + ext
|
| 22 |
return filename
|
| 23 |
|
| 24 |
def get_document_path(filename: str) -> str:
|
| 25 |
"""Get the path to store a document."""
|
| 26 |
+
try:
|
| 27 |
+
# Get the documents directory
|
| 28 |
+
docs_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))), 'data', 'documents')
|
| 29 |
+
|
| 30 |
+
# Create the directory if it doesn't exist
|
| 31 |
+
os.makedirs(docs_dir, exist_ok=True)
|
| 32 |
+
|
| 33 |
+
# Try to ensure the directory has write permissions
|
| 34 |
+
try:
|
| 35 |
+
# Test file to check write permissions
|
| 36 |
+
test_file = os.path.join(docs_dir, '.test_write_access')
|
| 37 |
+
with open(test_file, 'w') as f:
|
| 38 |
+
f.write('test')
|
| 39 |
+
os.remove(test_file)
|
| 40 |
+
except Exception as e:
|
| 41 |
+
logger.warning(f"Document directory may not be writable: {e}")
|
| 42 |
+
# Try alternative location
|
| 43 |
+
docs_dir = '/tmp/documents' if os.name != 'nt' else os.path.join(os.environ.get('TEMP', 'C:\\Temp'), 'documents')
|
| 44 |
+
os.makedirs(docs_dir, exist_ok=True)
|
| 45 |
+
|
| 46 |
+
# Sanitize the filename
|
| 47 |
+
filename = sanitize_filename(filename)
|
| 48 |
+
|
| 49 |
+
# Add a timestamp to make the filename unique
|
| 50 |
+
timestamp = datetime.now().strftime('%Y%m%d%H%M%S')
|
| 51 |
+
base, ext = os.path.splitext(filename)
|
| 52 |
+
unique_filename = f"{base}_{timestamp}{ext}"
|
| 53 |
+
|
| 54 |
+
filepath = os.path.join(docs_dir, unique_filename)
|
| 55 |
+
logger.info(f"Document will be stored at: {filepath}")
|
| 56 |
+
return filepath
|
| 57 |
+
except Exception as e:
|
| 58 |
+
logger.error(f"Error getting document path: {e}")
|
| 59 |
+
# Fallback to a simple path in /tmp or temp directory
|
| 60 |
+
fallback_dir = '/tmp' if os.name != 'nt' else os.environ.get('TEMP', 'C:\\Temp')
|
| 61 |
+
os.makedirs(fallback_dir, exist_ok=True)
|
| 62 |
+
return os.path.join(fallback_dir, f"doc_{datetime.now().strftime('%Y%m%d%H%M%S')}")
|
| 63 |
+
|
| 64 |
+
def copy_uploaded_file(source_path: str, destination_path: str) -> bool:
|
| 65 |
+
"""Copy an uploaded file with proper error handling."""
|
| 66 |
+
try:
|
| 67 |
+
shutil.copy2(source_path, destination_path)
|
| 68 |
+
logger.info(f"File copied from {source_path} to {destination_path}")
|
| 69 |
+
return True
|
| 70 |
+
except Exception as e:
|
| 71 |
+
logger.error(f"Error copying file: {e}")
|
| 72 |
+
# Try alternate approach
|
| 73 |
+
try:
|
| 74 |
+
with open(source_path, 'rb') as src, open(destination_path, 'wb') as dst:
|
| 75 |
+
dst.write(src.read())
|
| 76 |
+
logger.info(f"File copied using alternate method")
|
| 77 |
+
return True
|
| 78 |
+
except Exception as e2:
|
| 79 |
+
logger.error(f"All methods of copying file failed: {e2}")
|
| 80 |
+
return False
|
| 81 |
|
| 82 |
def format_sources(sources: List[Dict[str, Any]]) -> str:
|
| 83 |
"""Format source documents for display."""
|
| 84 |
+
try:
|
| 85 |
+
if not sources:
|
| 86 |
+
return "No sources found."
|
| 87 |
+
|
| 88 |
+
formatted = []
|
| 89 |
+
for i, source in enumerate(sources, 1):
|
| 90 |
+
source_str = f"{i}. {source.get('file_name', 'Unknown Source')} "
|
| 91 |
+
if source.get('page'):
|
| 92 |
+
source_str += f"(Page {source['page']}) "
|
| 93 |
+
formatted.append(source_str)
|
| 94 |
+
|
| 95 |
+
return "\n".join(formatted)
|
| 96 |
+
except Exception as e:
|
| 97 |
+
logger.error(f"Error formatting sources: {e}")
|
| 98 |
+
return "Error displaying sources."
|
| 99 |
|
| 100 |
def save_conversation(question: str, answer: str, sources: List[Dict[str, Any]]) -> str:
|
| 101 |
"""Save a conversation to a file."""
|
| 102 |
+
try:
|
| 103 |
+
# Create a directory for conversations
|
| 104 |
+
conv_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))), 'data', 'conversations')
|
| 105 |
+
try:
|
| 106 |
+
os.makedirs(conv_dir, exist_ok=True)
|
| 107 |
+
except Exception as e:
|
| 108 |
+
logger.warning(f"Could not create conversation directory: {e}")
|
| 109 |
+
# Use alternative directory
|
| 110 |
+
conv_dir = '/tmp/conversations' if os.name != 'nt' else os.path.join(os.environ.get('TEMP', 'C:\\Temp'), 'conversations')
|
| 111 |
+
os.makedirs(conv_dir, exist_ok=True)
|
| 112 |
+
|
| 113 |
+
# Create a filename based on the timestamp and first few words of the question
|
| 114 |
+
timestamp = datetime.now().strftime('%Y%m%d%H%M%S')
|
| 115 |
+
question_slug = "_".join((question or "empty_question").split()[:5]).lower()
|
| 116 |
+
question_slug = sanitize_filename(question_slug)
|
| 117 |
+
filename = f"{timestamp}_{question_slug}.txt"
|
| 118 |
+
|
| 119 |
+
# Format the conversation
|
| 120 |
+
formatted_sources = format_sources(sources)
|
| 121 |
+
content = f"Question: {question}\n\nAnswer: {answer}\n\nSources:\n{formatted_sources}\n"
|
| 122 |
+
|
| 123 |
+
# Save the conversation
|
| 124 |
+
filepath = os.path.join(conv_dir, filename)
|
| 125 |
+
with open(filepath, 'w') as f:
|
| 126 |
+
f.write(content)
|
| 127 |
+
|
| 128 |
+
logger.info(f"Conversation saved to {filepath}")
|
| 129 |
+
return filepath
|
| 130 |
+
except Exception as e:
|
| 131 |
+
logger.error(f"Error saving conversation: {e}")
|
| 132 |
+
return ""
|