File size: 9,051 Bytes
1dab660 5411262 1dab660 86d06c6 1dab660 86d06c6 1dab660 5fb63e2 5411262 1dab660 b3cdeaa 1dab660 86d06c6 1dab660 b3cdeaa 1dab660 b3cdeaa 1dab660 b3cdeaa 1dab660 b3cdeaa 86d06c6 b3cdeaa 1dab660 b3cdeaa 1dab660 b3cdeaa 1dab660 5c9813c 1dab660 5c9813c 1dab660 5c9813c 1dab660 86d06c6 1dab660 86d06c6 5411262 1dab660 5411262 1dab660 5411262 1dab660 787b7ff 1dab660 787b7ff 1dab660 f9779bd f2986d3 787b7ff f2986d3 f21ff52 f2986d3 f21ff52 1dab660 787b7ff 1dab660 787b7ff 1dab660 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 | """A11y Expert - Main accessibility question-answering agent."""
from typing import Optional, Generator
from openai import OpenAI
from langdetect import detect, LangDetectException
from config import get_settings
from agent.prompts import get_system_prompt
from agent.tools import search_knowledge_base
from database.vector_store_client import VectorStoreClient
from loguru import logger
class A11yExpertAgent:
"""Accessibility expert agent using OpenAI with RAG."""
def __init__(
self,
vector_store: VectorStoreClient,
llm_client: OpenAI,
language: str = "en",
expertise: str = "general"
):
"""
Initialize the A11y Expert agent.
Args:
vector_store: An instance of VectorStoreClient.
llm_client: An instance of OpenAI client.
language: 'pl' for Polish, 'en' for English.
expertise: 'general', 'wcag', or 'aria'.
"""
self.vector_store = vector_store
self.llm_client = llm_client
self.language = language
self.expertise = expertise
# Stateless agent - no internal conversation history
settings = get_settings()
self.model = settings.llm_model
self.system_prompt = get_system_prompt(language, expertise)
logger.info(f"A11yExpertAgent initialized (lang={language}, expertise={expertise}, stateless=True)")
def close(self):
"""Close agent resources."""
try:
if self.vector_store:
self.vector_store.close()
if hasattr(self.llm_client, 'close'):
self.llm_client.close()
logger.info("A11yExpertAgent resources closed")
except Exception as e:
logger.warning(f"Error closing A11yExpertAgent: {e}")
def ask(self, question: str) -> Generator[str, None, None]:
"""
Ask a question and get a streaming answer with RAG.
Args:
question: Question about accessibility
Yields:
Answer chunks from the agent
"""
logger.info(f"Question: {question}")
try:
detected_lang = detect(question)
language = "pl" if detected_lang.startswith("pl") else "en"
except LangDetectException:
language = self.language
logger.info(f"Detected language: {language}")
# Dynamically update system prompt based on detected language
current_system_prompt = get_system_prompt(language, self.expertise)
logger.info("Searching knowledge base...")
context, sources = search_knowledge_base(question, self.vector_store, language=language)
messages = [
{"role": "system", "content": current_system_prompt},
# Stateless: no conversation history in context
{"role": "user", "content": self._build_prompt_with_context(question, context, language)}
]
full_answer = ""
try:
response_stream = self.llm_client.chat.completions.create(
model=self.model,
messages=messages,
temperature=0.3,
max_tokens=1500,
top_p=0.9,
stream=True
)
for chunk in response_stream:
content = chunk.choices[0].delta.content
if content:
full_answer += content
yield content
# Add sources at the end
if sources:
sources_text = self._format_sources(sources, language)
full_answer += sources_text
yield sources_text
# Log Q&A pair for dataset collection
self._log_qa_pair(question, full_answer, sources, language)
logger.info(f"Answer generated ({len(full_answer)} chars)")
except Exception as e:
logger.error(f"OpenAI API error: {e}")
yield f"Error during response generation: {e}"
def _format_sources(self, sources: list, language: str) -> str:
"""Format source citations for display."""
if not sources:
return ""
# Get unique sources
unique_sources = {}
for src in sources:
source_name = src.get('source', 'unknown')
doc_type = src.get('doc_type', 'document')
key = f"{source_name}_{doc_type}"
if key not in unique_sources:
unique_sources[key] = {"source": source_name, "doc_type": doc_type}
if language == "pl":
header = "\n\n---\n📚 **Źródła:**\n"
else:
header = "\n\n---\n📚 **Sources:**\n"
source_lines = [f"- {s['source']} ({s['doc_type']})" for s in unique_sources.values()]
return header + "\n".join(source_lines)
def _build_prompt_with_context(self, question: str, context: str, language: str) -> str:
"""Build the prompt with context and language-specific instructions."""
if language == "pl":
return f"""Na podstawie poniższego kontekstu z bazy wiedzy o dostępności, odpowiedz na pytanie PO POLSKU.
=== KONTEKST Z BAZY WIEDZY ===
{context}
=== PYTANIE ===
{question}
=== INSTRUKCJA ===
Odpowiedz na pytanie WYŁĄCZNIE PO POLSKU. Nawet jeśli kontekst jest po angielsku, tłumacz go i odpowiadaj po polsku.
Twoja odpowiedź:"""
else:
return f"""
Based on the following accessibility knowledge base context, answer the question.
=== KNOWLEDGE BASE CONTEXT ===
{context}
=== QUESTION ===
{question}
=== ANSWER ===
CRITICAL: Respond ONLY in ENGLISH. This question is in English, so your entire response MUST be in English.
Remember to:
- Answer ONLY in English (this is most important!)
- Cite specific criteria and sources
- Provide practical examples if relevant
- Be clear and concise
"""
def _log_qa_pair(self, question: str, answer: str, sources: list, language: str):
"""
Log Q&A pair to JSONL file for dataset collection.
Args:
question: User's question
answer: Agent's answer (including sources)
sources: List of source documents used
language: Language of the conversation
"""
try:
import json
from datetime import datetime
qa_entry = {
"timestamp": datetime.now().isoformat(),
"question": question,
"answer": answer,
"language": language,
"sources": [
{
"source": s.get("source", "unknown"),
"doc_type": s.get("doc_type", "document")
}
for s in sources
] if sources else [],
"model": self.model
}
# Append to JSONL file (one JSON per line)
with open("qa_dataset.jsonl", "a", encoding="utf-8") as f:
f.write(json.dumps(qa_entry, ensure_ascii=False) + "\n")
logger.debug(f"Logged Q&A pair to qa_dataset.jsonl")
except Exception as e:
logger.warning(f"Failed to log Q&A pair: {e}")
def clear_history(self):
"""No-op method for backward compatibility (agent is now stateless)."""
logger.info("Agent is stateless - no history to clear")
def batch_ask(self, questions: list[str]) -> list[dict]:
"""Ask multiple questions in sequence."""
results = []
for question in questions:
try:
answer_chunks = [chunk for chunk in self.ask(question)]
answer = "".join(answer_chunks)
results.append({"question": question, "answer": answer, "success": True})
except Exception as e:
logger.error(f"Failed to answer '{question}': {e}")
results.append({"question": question, "answer": str(e), "success": False})
return results
def create_agent(language: Optional[str] = None) -> A11yExpertAgent:
"""Factory function to create and initialize agent."""
language = language or "en"
logger.info(f"Creating agent with language: {language}")
settings = get_settings()
# Create vector store with lazy connection (no DB access yet)
logger.info("Initializing vector store client...")
vector_store = VectorStoreClient(uri=settings.lancedb_uri)
api_key = settings.openai_api_key
logger.info("Initializing OpenAI client...")
client_args = {"api_key": api_key}
if settings.llm_base_url:
client_args["base_url"] = settings.llm_base_url
llm_client = OpenAI(**client_args)
logger.info("Creating A11yExpertAgent instance...")
agent = A11yExpertAgent(
vector_store=vector_store,
llm_client=llm_client,
language=language
)
logger.info("Agent creation complete")
return agent
|