Spaces:

JanviMl
/

RAGFintech

Paused

App Files Files Community

RAGFintech / src /enhanced_rag_system.py

JanviMl

Update src/enhanced_rag_system.py

293b891 verified 7 months ago

raw

history blame contribute delete

35.2 kB

	import os
	import tempfile

	# Fix HuggingFace cache directory issue for HuggingFace Spaces
	# Set cache directories to writable temporary directories
	os.environ['TRANSFORMERS_CACHE'] = tempfile.mkdtemp()
	os.environ['HF_HOME'] = tempfile.mkdtemp()
	os.environ['SENTENCE_TRANSFORMERS_HOME'] = tempfile.mkdtemp()

	import pandas as pd
	import plotly.express as px
	import plotly.graph_objects as go
	from typing import List, Tuple, Dict, Optional
	from langchain.schema import Document
	import re
	import json
	import warnings
	warnings.filterwarnings('ignore')

	# Import vector store components with better error handling
	try:
	import chromadb
	from chromadb.config import Settings
	from sentence_transformers import SentenceTransformer
	VECTOR_STORE_AVAILABLE = True
	print("✅ ChromaDB and SentenceTransformers imported successfully")
	except ImportError as e:
	VECTOR_STORE_AVAILABLE = False
	print(f"⚠️ Vector store import error: {e}")
	except Exception as e:
	VECTOR_STORE_AVAILABLE = False
	print(f"⚠️ Vector store initialization error: {e}")

	# Import LLM components
	try:
	import openai
	LLM_AVAILABLE = bool(os.getenv("OPENAI_API_KEY"))
	if LLM_AVAILABLE:
	openai.api_key = os.getenv("OPENAI_API_KEY")
	print("✅ OpenAI API key found and configured")
	else:
	print("⚠️ OpenAI API key not found in environment")
	except ImportError:
	LLM_AVAILABLE = False
	print("⚠️ OpenAI library not available")

	# Import our custom modules
	from document_processor import DocumentProcessor
	from auth_system import AuthSystem

	class EnhancedRAGSystem:
	"""Complete RAG system with Vector Store, LLM, and RBAC enforcement"""

	def __init__(self):
	self.document_processor = DocumentProcessor()
	self.auth_system = AuthSystem()
	self.documents = []
	self.initialized = False
	self.query_feedback = {}

	# Vector Store Components
	self.chroma_client = None
	self.collection = None
	self.embedding_model = None
	self.vector_store_initialized = False

	# LLM Components
	self.llm_client = None
	self.llm_model = "gpt-3.5-turbo"
	self.llm_initialized = False

	# Intent classification keywords
	self.intent_keywords = {
	"finance": ["revenue", "profit", "cost", "budget", "financial", "expense", "income", "cash", "margin", "roi", "sales"],
	"marketing": ["campaign", "customer", "acquisition", "brand", "marketing", "advertising", "engagement", "conversion", "retention"],
	"hr": ["employee", "hr", "policy", "leave", "benefits", "salary", "attendance", "performance", "training", "recruitment"],
	"engineering": ["architecture", "technology", "system", "development", "technical", "infrastructure", "deployment", "security", "api"],
	"general": ["company", "about", "overview", "mission", "values", "policy", "contact", "help"]
	}

	def initialize_system(self):
	"""Initialize the complete RAG system with all components"""
	try:
	print("🚀 Initializing Complete RAG System...")

	# Initialize Vector Store (ChromaDB)
	self._initialize_vector_store()

	# Initialize LLM
	self._initialize_llm()

	# Load documents
	self.documents = self.document_processor.get_all_documents()

	# Load documents into vector store if available
	if self.vector_store_initialized:
	self._load_documents_to_vector_store()

	self.initialized = True

	# Print initialization status
	self._print_initialization_status()

	except Exception as e:
	print(f"❌ Error initializing RAG system: {str(e)}")
	# Graceful fallback to template-based system
	self.initialized = True
	print("⚠️ Using fallback mode with template responses")

	def _initialize_vector_store(self):
	"""Initialize ChromaDB vector store with better error handling"""
	if not VECTOR_STORE_AVAILABLE:
	print("⚠️ ChromaDB/SentenceTransformers not available, using in-memory search")
	return

	try:
	print("🔧 Initializing ChromaDB...")

	# Create a writable directory for ChromaDB
	chroma_dir = tempfile.mkdtemp(prefix="chroma_")
	print(f"📁 Using ChromaDB directory: {chroma_dir}")

	# Try different ChromaDB configurations for HuggingFace compatibility
	try:
	# First try: PersistentClient (newer API)
	self.chroma_client = chromadb.PersistentClient(path=chroma_dir)
	print("✅ Using ChromaDB PersistentClient")
	except Exception as e1:
	try:
	# Second try: Client with settings (older API)
	self.chroma_client = chromadb.Client(Settings(
	chroma_db_impl="duckdb+parquet",
	persist_directory=chroma_dir
	))
	print("✅ Using ChromaDB Client with Settings")
	except Exception as e2:
	# Third try: Simple client
	self.chroma_client = chromadb.Client()
	print("✅ Using ChromaDB in-memory client")

	# Get or create collection
	collection_name = "finsolve_documents"
	try:
	self.collection = self.chroma_client.get_collection(collection_name)
	print(f"✅ Loaded existing ChromaDB collection: {collection_name}")
	except:
	self.collection = self.chroma_client.create_collection(
	name=collection_name,
	metadata={"description": "FinSolve documents with RBAC"}
	)
	print(f"✅ Created new ChromaDB collection: {collection_name}")

	# Initialize embedding model with smaller model for HuggingFace
	try:
	# Set cache directory for sentence transformers
	cache_dir = tempfile.mkdtemp(prefix="sentence_transformers_")

	self.embedding_model = SentenceTransformer(
	"all-MiniLM-L6-v2",
	cache_folder=cache_dir
	)
	print("✅ Loaded sentence transformer model: all-MiniLM-L6-v2")
	except Exception as e:
	# Fallback to even smaller model
	try:
	cache_dir = tempfile.mkdtemp(prefix="sentence_transformers_fallback_")
	self.embedding_model = SentenceTransformer(
	"paraphrase-MiniLM-L3-v2",
	cache_folder=cache_dir
	)
	print("✅ Loaded fallback sentence transformer model: paraphrase-MiniLM-L3-v2")
	except Exception as e2:
	print(f"❌ Failed to load embedding model: {e2}")
	raise e2

	self.vector_store_initialized = True

	except Exception as e:
	print(f"⚠️ ChromaDB initialization failed: {str(e)}")
	print("⚠️ Falling back to in-memory search")
	self.vector_store_initialized = False

	def _initialize_llm(self):
	"""Initialize OpenAI LLM"""
	if not LLM_AVAILABLE:
	print("⚠️ OpenAI API key not found, using template responses")
	return

	try:
	# Test OpenAI connection with updated API
	response = openai.ChatCompletion.create(
	model=self.llm_model,
	messages=[{"role": "user", "content": "Hello"}],
	max_tokens=10
	)

	self.llm_client = openai
	self.llm_initialized = True
	print(f"✅ OpenAI LLM initialized: {self.llm_model}")

	except Exception as e:
	print(f"⚠️ OpenAI initialization failed: {str(e)}")
	print("⚠️ Using template-based responses")

	def _load_documents_to_vector_store(self):
	"""Load documents into ChromaDB vector store"""
	if not self.vector_store_initialized or not self.embedding_model:
	return

	try:
	# Check if documents already loaded
	if self.collection.count() > 0:
	print(f"✅ ChromaDB already contains {self.collection.count()} documents")
	return

	print("📄 Loading documents into vector store...")

	texts = []
	metadatas = []
	ids = []

	for i, doc in enumerate(self.documents):
	doc_id = f"doc_{i}_{hash(doc.page_content) % 10000}"

	metadata = {
	"content_type": doc.metadata.get("content_type", "general"),
	"title": doc.metadata.get("title", "Document"),
	"department": doc.metadata.get("department", "General"),
	"type": doc.metadata.get("type", "Document"),
	"chunk_id": str(doc.metadata.get("chunk_id", 0)),
	"source": doc.metadata.get("source", "unknown")
	}

	texts.append(doc.page_content)
	metadatas.append(metadata)
	ids.append(doc_id)

	# Generate embeddings in batches to avoid memory issues
	batch_size = 10
	for i in range(0, len(texts), batch_size):
	batch_texts = texts[i:i+batch_size]
	batch_metadatas = metadatas[i:i+batch_size]
	batch_ids = ids[i:i+batch_size]

	# Generate embeddings
	embeddings = self.embedding_model.encode(batch_texts).tolist()

	# Add to ChromaDB
	self.collection.add(
	embeddings=embeddings,
	documents=batch_texts,
	metadatas=batch_metadatas,
	ids=batch_ids
	)

	print(f"✅ Loaded {len(self.documents)} documents into ChromaDB")

	except Exception as e:
	print(f"⚠️ Error loading documents to vector store: {str(e)}")

	def _print_initialization_status(self):
	"""Print comprehensive initialization status"""
	print("\n" + "="*50)
	print("🤖 FINSOLVE RAG SYSTEM STATUS")
	print("="*50)
	print(f"✅ Python: Core system initialized")
	print(f"{'✅' if self.vector_store_initialized else '⚠️'} ChromaDB Vector Store: {'Ready' if self.vector_store_initialized else 'Fallback mode'}")
	print(f"{'✅' if self.llm_initialized else '⚠️'} OpenAI LLM: {'OpenAI GPT' if self.llm_initialized else 'Template mode'}")
	print(f"✅ Streamlit: UI active")
	print(f"🔄 FastAPI: {'Real FastAPI' if self._check_fastapi_running() else 'Simulated API'}")
	print(f"✅ Authentication: JWT-style RBAC")
	print(f"✅ NLP: Intent classification + {'LLM' if self.llm_initialized else 'Templates'}")
	print(f"✅ RAG: Vector retrieval + context augmentation")
	print(f"📊 Documents loaded: {len(self.documents)}")
	print("="*50)

	def _check_fastapi_running(self) -> bool:
	"""Check if FastAPI server is running"""
	try:
	import requests
	response = requests.get("http://localhost:8000/health", timeout=2)
	return response.status_code == 200
	except:
	return False

	def _vector_similarity_search(self, query: str, role: str, k: int = 5) -> List[Document]:
	"""Perform vector similarity search with role-based filtering"""
	if not self.vector_store_initialized:
	return self._fallback_search(query, role, k)

	try:
	# Generate query embedding
	query_embedding = self.embedding_model.encode([query]).tolist()[0]

	# Build role-based filter
	where_clause = self._build_role_filter(role)

	# Perform vector search
	results = self.collection.query(
	query_embeddings=[query_embedding],
	n_results=k,
	where=where_clause,
	include=["documents", "metadatas", "distances"]
	)

	# Convert to Document objects
	documents = []
	if results['documents'] and results['documents'][0]:
	for i, (doc, metadata) in enumerate(zip(results['documents'][0], results['metadatas'][0])):
	distance = results['distances'][0][i] if results['distances'] else 0
	metadata['similarity_score'] = 1 - distance

	documents.append(Document(
	page_content=doc,
	metadata=metadata
	))

	return documents

	except Exception as e:
	print(f"❌ Vector search error: {str(e)}")
	return self._fallback_search(query, role, k)

	def _build_role_filter(self, role: str) -> Dict:
	"""Build ChromaDB filter based on user role"""
	role_access = {
	"Finance": ["financial_reports", "expense_data", "budget_info"],
	"Marketing": ["marketing_reports", "campaign_data", "customer_metrics"],
	"HR": ["employee_data", "hr_policies", "attendance_records"],
	"Engineering": ["technical_docs", "architecture", "development_processes"],
	"C-Level": ["financial_reports", "marketing_reports", "employee_data", "technical_docs", "all_data"],
	"Employee": ["general_policies", "company_info", "benefits"]
	}

	accessible_types = role_access.get(role, ["general_policies"])

	if len(accessible_types) == 1:
	return {"content_type": {"$eq": accessible_types[0]}}
	else:
	return {"content_type": {"$in": accessible_types}}

	def _fallback_search(self, query: str, role: str, k: int = 5) -> List[Document]:
	"""Fallback search when vector store is not available"""
	# Get role-specific documents
	role_docs = self.document_processor.get_documents_for_role(role)

	# Simple keyword matching
	query_terms = query.lower().split()
	scored_docs = []

	for doc in role_docs:
	content_lower = doc.page_content.lower()
	score = 0

	for term in query_terms:
	score += content_lower.count(term)

	if query.lower() in content_lower:
	score += 10

	if score > 0:
	scored_docs.append((doc, score))

	scored_docs.sort(key=lambda x: x[1], reverse=True)
	return [doc for doc, score in scored_docs[:k]]

	def _classify_query_intent(self, query: str) -> str:
	"""Classify query intent using keyword matching"""
	query_lower = query.lower()
	intent_scores = {}

	for intent, keywords in self.intent_keywords.items():
	score = sum(1 for keyword in keywords if keyword in query_lower)
	if score > 0:
	intent_scores[intent] = score

	if intent_scores:
	return max(intent_scores, key=intent_scores.get)
	return "general"

	def _enforce_rbac_at_retrieval(self, query: str, role: str) -> Tuple[List[Document], bool]:
	"""Enforce RBAC at retrieval level with intent validation"""
	query_intent = self._classify_query_intent(query)

	# Check if user role can access the queried domain
	role_domain_access = {
	"Finance": ["finance", "general"],
	"Marketing": ["marketing", "general"],
	"HR": ["hr", "general"],
	"Engineering": ["engineering", "general"],
	"C-Level": ["finance", "marketing", "hr", "engineering", "general"],
	"Employee": ["general"]
	}

	allowed_domains = role_domain_access.get(role, ["general"])

	if query_intent not in allowed_domains:
	return [], False # Unauthorized access

	# Get relevant documents using vector search or fallback
	relevant_docs = self._vector_similarity_search(query, role)

	return relevant_docs, True

	async def _generate_llm_response(self, query: str, context: str, user_role: str, query_intent: str) -> str:
	"""Generate response using OpenAI LLM"""
	if not self.llm_initialized:
	return self._generate_template_response(query, [], user_role, query_intent)

	try:
	system_prompt = f"""You are an AI assistant for FinSolve Technologies, a leading FinTech company.
	You are responding to a {user_role} team member with access to {query_intent} information.

	Guidelines:
	- Provide accurate, concise, and role-appropriate responses
	- Use the provided context to answer questions
	- If information is not in the context, clearly state this
	- Format responses professionally with clear structure
	- Include relevant metrics and data when available
	- Maintain confidentiality and data access boundaries

	Context: {context}

	User Role: {user_role}
	Query Domain: {query_intent}"""

	user_prompt = f"Question: {query}\n\nPlease provide a comprehensive answer based on the context provided."

	response = self.llm_client.ChatCompletion.create(
	model=self.llm_model,
	messages=[
	{"role": "system", "content": system_prompt},
	{"role": "user", "content": user_prompt}
	],
	max_tokens=1000,
	temperature=0.7,
	)

	return response.choices[0].message.content.strip()

	except Exception as e:
	print(f"❌ LLM error: {str(e)}")
	return self._generate_template_response(query, [], user_role, query_intent)

	def _generate_template_response(self, query: str, context_docs: List[Document], user_role: str, query_intent: str) -> str:
	"""Generate template-based response when LLM is not available"""
	response_parts = []
	response_parts.append(f"Based on your {user_role} access level:\n")

	# Generate intent-specific responses
	if query_intent == "finance":
	response_parts.extend(self._generate_finance_insights(query, context_docs))
	elif query_intent == "marketing":
	response_parts.extend(self._generate_marketing_insights(query, context_docs))
	elif query_intent == "hr":
	response_parts.extend(self._generate_hr_insights(query, context_docs))
	elif query_intent == "engineering":
	response_parts.extend(self._generate_technical_insights(query, context_docs))
	else:
	response_parts.extend(self._generate_general_insights(query, context_docs))

	return "\n".join(response_parts)

	def _generate_finance_insights(self, query: str, context_docs: List[Document]) -> List[str]:
	"""Generate finance-specific insights"""
	insights = ["💰 Financial Insights:", ""]

	# Extract content for analysis
	content = " ".join([doc.page_content for doc in context_docs])

	if "revenue" in query.lower() or "2.6 billion" in content:
	insights.extend([
	"📈 Revenue Performance:",
	"• Q4 2024: $2.6 billion (35% YoY growth)",
	"• Annual 2024: $9.4 billion (28% YoY increase)",
	"• Strong growth trajectory maintained throughout the year",
	""
	])

	if "margin" in query.lower() or "profit" in query.lower():
	insights.extend([
	"📊 Profitability Metrics:",
	"• Gross Margin: 64% (improved from 58% in Q1)",
	"• Net Income: $325M (18% YoY increase)",
	"• Operating Income: $650M",
	""
	])

	if "cost" in query.lower() or "expense" in query.lower():
	insights.extend([
	"💸 Cost Analysis:",
	"• Vendor Services: $30M (18% increase)",
	"• Software Subscriptions: $25M (22% increase)",
	"• Marketing Investment: $2.3B with strong ROI",
	""
	])

	insights.append("🎯 Key Takeaway: Strong revenue growth with improving margins despite increased operational costs.")
	return insights

	def _generate_marketing_insights(self, query: str, context_docs: List[Document]) -> List[str]:
	"""Generate marketing-specific insights"""
	insights = ["📈 Marketing Insights:", ""]

	insights.extend([
	"🎯 Campaign Performance:",
	"• Customer Acquisition: 20% increase year-over-year",
	"• Digital Campaign ROI: 3.5x return on $5M investment",
	"• Q4 Results: 220,000 new customers (exceeded target)",
	"",
	"💰 ROI Analysis:",
	"• Overall Marketing ROI: 4.5x",
	"• Digital Channels: 3.5x return",
	"• Event Marketing: 5.0x return",
	"• Email Marketing: 2.0x return",
	"",
	"🚀 Key Takeaway: Successful global expansion with strong ROI across all marketing channels."
	])

	return insights

	def _generate_hr_insights(self, query: str, context_docs: List[Document]) -> List[str]:
	"""Generate HR-specific insights"""
	insights = ["👥 HR Insights:", ""]

	if "benefits" in query.lower():
	insights.extend([
	"🏥 Employee Benefits:",
	"• Health Insurance: Family floater policy",
	"• Provident Fund: 12% employer contribution",
	"• Maternity Leave: 26 weeks paid leave",
	"• Flexible Work: Up to 2 days/week WFH",
	""
	])

	if "leave" in query.lower():
	insights.extend([
	"📅 Leave Policies:",
	"• Annual Leave: 15-21 days/year",
	"• Sick Leave: 12 days/year",
	"• Casual Leave: 7 days/year",
	"• Emergency Leave: Available with manager approval",
	""
	])

	insights.append("💡 Key Takeaway: Comprehensive benefits package with competitive compensation and flexible work arrangements.")
	return insights

	def _generate_technical_insights(self, query: str, context_docs: List[Document]) -> List[str]:
	"""Generate technical/engineering insights"""
	insights = ["🔧 Technical Insights:", ""]

	if "architecture" in query.lower():
	insights.extend([
	"🏗️ System Architecture:",
	"• Microservices-based, cloud-native design",
	"• AWS infrastructure with Kubernetes orchestration",
	"• PostgreSQL, MongoDB, Redis for data storage",
	"• 99.99% uptime target with auto-scaling",
	""
	])

	if "technology" in query.lower():
	insights.extend([
	"💻 Technology Stack:",
	"• Frontend: React 18, TypeScript, Tailwind CSS",
	"• Backend: Node.js, Python, Go",
	"• Mobile: Swift (iOS), Kotlin (Android)",
	"• Infrastructure: AWS, Kubernetes, Docker",
	""
	])

	insights.append("⚡ Key Takeaway: Modern, scalable architecture with strong security and compliance standards.")
	return insights

	def _generate_general_insights(self, query: str, context_docs: List[Document]) -> List[str]:
	"""Generate general company insights"""
	insights = ["🏢 Company Information:", ""]

	insights.extend([
	"📋 About FinSolve Technologies:",
	"• Founded: 2018",
	"• Headquarters: Bangalore, India",
	"• Global presence: North America, Europe, Asia-Pacific",
	"• Services: Digital banking, payments, wealth management",
	"",
	"🎯 Mission & Values:",
	"• Mission: Empower financial freedom through technology",
	"• Core Values: Integrity, Innovation, Customer Focus",
	"• Commitment: Secure, scalable financial solutions",
	])

	return insights

	def _generate_unauthorized_response(self, query: str, user_role: str, query_intent: str) -> str:
	"""Generate graceful unauthorized access message"""
	intent_role_map = {
	"finance": "Finance and Executive",
	"marketing": "Marketing and Executive",
	"hr": "HR and Executive",
	"engineering": "Engineering and Executive"
	}

	required_roles = intent_role_map.get(query_intent, "appropriate")

	return f"""🛡️ Access Restricted

	This information is restricted to {required_roles} roles only.

	Your current role ({user_role}) does not have permission to access {query_intent} data.

	Available to you:
	{chr(10).join(['• ' + doc.replace('_', ' ').title() for doc in self.auth_system.get_accessible_documents(user_role)])}

	Please contact your administrator if you need access to additional information."""

	def _extract_key_metrics(self, content: str, query_intent: str) -> Dict:
	"""Extract key metrics for visualization"""
	metrics = {}

	if query_intent == "finance":
	revenue_match = re.search(r'revenue[:\s]\$?([\d.,]+)\s(billion\|million)', content.lower())
	if revenue_match:
	amount = revenue_match.group(1).replace(',', '')
	unit = revenue_match.group(2)
	multiplier = 1000 if unit == 'billion' else 1
	metrics['revenue'] = float(amount) * multiplier

	growth_match = re.search(r'(\d+)%\s*(yoy\|growth)', content.lower())
	if growth_match:
	metrics['growth_rate'] = int(growth_match.group(1))

	elif query_intent == "marketing":
	acq_match = re.search(r'(\d+,?\d)\snew customers', content.lower())
	if acq_match:
	metrics['customer_acquisition'] = int(acq_match.group(1).replace(',', ''))

	roi_match = re.search(r'(\d+\.?\d)x\sr[oe]i', content.lower())
	if roi_match:
	metrics['roi'] = float(roi_match.group(1))

	return metrics

	def _create_visualization(self, metrics: Dict, query_intent: str) -> Optional[str]:
	"""Create visualizations for metrics"""
	if not metrics:
	return None

	try:
	if query_intent == "finance" and 'revenue' in metrics:
	quarters = ['Q1', 'Q2', 'Q3', 'Q4']
	revenues = [2100, 2300, 2400, 2600]

	fig = px.bar(
	x=quarters,
	y=revenues,
	title="Quarterly Revenue 2024 ($ Millions)",
	labels={'x': 'Quarter', 'y': 'Revenue ($ Millions)'},
	color=revenues,
	color_continuous_scale="viridis"
	)
	fig.update_layout(height=400, showlegend=False)
	return fig.to_html(include_plotlyjs='cdn', div_id="revenue_chart")

	elif query_intent == "marketing" and 'customer_acquisition' in metrics:
	months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun']
	acquisitions = [18000, 22000, 25000, 28000, 32000, 35000]

	fig = px.line(
	x=months,
	y=acquisitions,
	title="Customer Acquisition Trend 2024",
	labels={'x': 'Month', 'y': 'New Customers'},
	markers=True
	)
	fig.update_layout(height=400, showlegend=False)
	return fig.to_html(include_plotlyjs='cdn', div_id="acquisition_chart")

	return None

	except Exception as e:
	print(f"❌ Error creating visualization: {str(e)}")
	return None

	def _create_data_table(self, content: str, query_intent: str) -> Optional[str]:
	"""Create data tables from content"""
	try:
	if query_intent == "finance":
	data = {
	'Metric': ['Q4 Revenue', 'Annual Revenue', 'Net Income', 'Gross Margin', 'ROI'],
	'Value': ['$2.6B', '$9.4B', '$325M', '64%', '15%'],
	'YoY Growth': ['+35%', '+28%', '+18%', '+6%', '+3%']
	}
	df = pd.DataFrame(data)
	return df.to_html(index=False, classes='table table-striped', table_id='financial-metrics')

	elif query_intent == "marketing":
	data = {
	'Campaign': ['Digital Ads', 'Influencer', 'Email', 'Events'],
	'Spend': ['$5M', '$1.5M', '$0.2M', '$2M'],
	'ROI': ['3.5x', '4.2x', '2.0x', '5.0x'],
	'Leads': ['180K', '60K', '25K', '300']
	}
	df = pd.DataFrame(data)
	return df.to_html(index=False, classes='table table-striped', table_id='marketing-metrics')

	return None

	except Exception as e:
	print(f"❌ Error creating table: {str(e)}")
	return None

	def store_feedback(self, query: str, response: str, rating: int, role: str):
	"""Store user feedback for system improvement"""
	feedback_id = len(self.query_feedback)
	self.query_feedback[feedback_id] = {
	'query': query,
	'response': response,
	'rating': rating,
	'role': role,
	'timestamp': pd.Timestamp.now(),
	'intent': self._classify_query_intent(query)
	}

	def query(self, query: str, user_role: str) -> Tuple[str, List[str], Optional[str], Optional[str]]:
	"""Enhanced query method with complete RAG pipeline"""
	try:
	if not self.initialized:
	return "System not initialized. Please try again.", [], None, None

	# Enforce RBAC at retrieval level
	relevant_docs, authorized = self._enforce_rbac_at_retrieval(query, user_role)

	if not authorized:
	query_intent = self._classify_query_intent(query)
	unauthorized_msg = self._generate_unauthorized_response(query, user_role, query_intent)
	return unauthorized_msg, [], None, None

	if not relevant_docs:
	return f"No relevant information found in your accessible documents for: {query}", [], None, None

	# Generate response using LLM or templates
	query_intent = self._classify_query_intent(query)

	if self.llm_initialized:
	# Prepare context for LLM
	context = "\n\n".join([doc.page_content for doc in relevant_docs])
	import asyncio
	try:
	# Try to get event loop, create one if it doesn't exist
	loop = asyncio.get_event_loop()
	except RuntimeError:
	loop = asyncio.new_event_loop()
	asyncio.set_event_loop(loop)

	response = loop.run_until_complete(
	self._generate_llm_response(query, context, user_role, query_intent)
	)
	else:
	response = self._generate_template_response(query, relevant_docs, user_role, query_intent)

	# Extract sources
	sources = []
	for doc in relevant_docs:
	source = doc.metadata.get('title', 'Company Documents')
	if source not in sources:
	sources.append(source)

	# Generate visualizations and tables
	context_content = " ".join([doc.page_content for doc in relevant_docs])
	metrics = self._extract_key_metrics(context_content, query_intent)
	visualization = self._create_visualization(metrics, query_intent)
	table = self._create_data_table(context_content, query_intent)

	return response, sources, visualization, table

	except Exception as e:
	error_response = f"I apologize, but I encountered an error while processing your query: {str(e)}"
	return error_response, [], None, None

	def get_system_status(self) -> Dict:
	"""Get comprehensive system status"""
	return {
	"documents_loaded": len(self.documents),
	"system_initialized": self.initialized,
	"vector_store_available": self.vector_store_initialized,
	"llm_available": self.llm_initialized,
	"feedback_entries": len(self.query_feedback),
	"tech_stack": {
	"python": "✅ Active",
	"streamlit": "✅ Active",
	"vector_store": "✅ ChromaDB" if self.vector_store_initialized else "⚠️ Fallback",
	"llm": f"✅ {self.llm_model}" if self.llm_initialized else "⚠️ Templates",
	"fastapi": "✅ Real FastAPI" if self._check_fastapi_running() else "🔄 Simulated",
	"authentication": "✅ JWT-style RBAC"
	}
	}

	def get_available_documents_for_role(self, role: str) -> List[Dict]:
	"""Get list of documents available for a specific role"""
	accessible_docs = self.auth_system.get_accessible_documents(role)
	doc_info = self.document_processor.get_document_info()

	available = []
	for doc_name in accessible_docs:
	if doc_name in doc_info:
	available.append({
	"content_type": doc_name,
	**doc_info[doc_name]
	})

	return available