from typing import List, Tuple from src.config import Settings def compose_answer_retrieval_only(query: str, hits: List[Tuple[str, float]], settings: Settings) -> str: """Return formatted excerpts without LLM processing.""" lines = [] lines.append("**Top relevant excerpts** (no model used):\n") for i, (txt, score) in enumerate(hits, start=1): source = "unknown" body = txt if "[Source:" in txt: parts = txt.rsplit("[Source:", 1) body = parts[0].strip() source = "[Source:" + parts[1] lines.append(f"**{i}.** {body}\n\n*{source}* \n*similarity: {score:.3f}*") return "\n\n---\n\n".join(lines) def compose_answer_with_llm(query: str, hits: List[Tuple[str, float]], settings: Settings) -> str: """Use HF Inference API to generate a natural language answer from retrieved documents.""" try: from huggingface_hub import InferenceClient # Prepare context from retrieved documents chunks = [t for t, _ in hits] context = "\n\n---\n\n".join(chunks) if len(context) > settings.max_context_chars: context = context[:settings.max_context_chars] + "\n\n[Context truncated]" # System prompt for RAG system_message = ( "You are a knowledgeable assistant analyzing declassified KGB documents. " "Answer the user's question based ONLY on the provided context. " "Cite sources using [Source: ...] notation when referencing specific documents. " "If the context doesn't contain enough information to answer, say so clearly." ) user_message = f"CONTEXT:\n{context}\n\n---\n\nQUESTION: {query}" # Initialize HF Inference Client client = InferenceClient(token=settings.hf_token) # Call the model with streaming disabled for simplicity messages = [ {"role": "system", "content": system_message}, {"role": "user", "content": user_message} ] response = client.chat.completions.create( model="Qwen/Qwen2.5-7B-Instruct:fastest", messages=messages, max_tokens=512, temperature=0.3, # Lower temperature for more focused answers ) return response.choices[0].message.content.strip() except Exception as e: # Fallback to retrieval-only if LLM fails return f"⚠️ LLM Error: {str(e)}\n\n---\n\n" + compose_answer_retrieval_only(query, hits, settings) def compose_answer(query: str, hits: List[Tuple[str, float]], settings: Settings) -> str: """Main entry point - routes to retrieval-only or LLM-based answer.""" if settings.mode == "retrieval": return compose_answer_retrieval_only(query, hits, settings) elif settings.mode == "rag": return compose_answer_with_llm(query, hits, settings) else: return compose_answer_retrieval_only(query, hits, settings)