| from pathlib import Path |
| import os |
| import re |
| from dotenv import load_dotenv |
| from langchain_chroma import Chroma |
| from langchain_huggingface import HuggingFaceEmbeddings |
| from langchain_core.documents import Document |
| from huggingface_hub import InferenceClient |
| load_dotenv(override=True) |
| client = InferenceClient( |
| model="dnotitia/Qwen3-4B-Instruct-2507:featherless-ai", |
| token=os.getenv("HF_TOKEN"), |
| ) |
| DB_NAME = str(Path(__file__).parent.parent / "vector_db") |
| embeddings = HuggingFaceEmbeddings( |
| model_name="Qwen/Qwen3-Embedding-0.6B", |
| model_kwargs={"trust_remote_code": True}, |
| ) |
| vectorstore = Chroma( |
| persist_directory=DB_NAME, |
| embedding_function=embeddings, |
| collection_name="docs", |
| ) |
| |
| |
| |
| retriever = vectorstore.as_retriever( |
| search_type="mmr", |
| search_kwargs={"k": 9, "fetch_k": 30, "lambda_mult": 0.7}, |
| ) |
| SYSTEM_PROMPT = """ |
| You are Abot β a concise AI assistant on Anurag's portfolio website. Answer questions about Anurag Bhusare using only the context provided. |
| Visitors are typically recruiters or engineers wanting quick, clear answers. |
| Refer to Anurag in third person. Never talk about yourself. |
| RULES: |
| - Match your answer length to the question β a simple question gets a short answer, a broad question gets a broader one |
| - Default to concise β lead with the most important information first |
| - Never include deployment details, setup steps, config, or implementation specifics unless directly asked |
| - No headers or emoji sections for short answers |
| - If you don't know something, say: "I don't have that information." |
| Context: |
| {context} |
| """ |
| def combined_query(question: str, history: list[dict]) -> str: |
| """Prepend recent user turns so follow-up questions retrieve correctly.""" |
| prior = [m["content"] for m in history[-4:] if m["role"] == "user"] |
| return " ".join(prior + [question]) |
|
|
| |
| def build_context(docs: list[Document], max_chars: int = 6000) -> str: |
| """Deduplicate overlap chunks and cap total chars to stay within HF token limits.""" |
| seen: set[str] = set() |
| parts: list[str] = [] |
| total = 0 |
| for d in docs: |
| content = d.page_content.strip() |
| fingerprint = content[:120] |
| if fingerprint in seen: |
| continue |
| seen.add(fingerprint) |
| if total + len(content) > max_chars: |
| break |
| parts.append(content) |
| total += len(content) |
| return "\n\n".join(parts) |
|
|
| |
| def clean_output(text: str) -> str: |
| text = text.strip() |
|
|
| |
| if "</think>" in text: |
| text = text.split("</think>")[-1].strip() |
| elif "<think>" in text: |
| text = text.split("<think>")[0].strip() |
|
|
| text = text.removeprefix("/no_think").strip() |
|
|
| text = text.replace("β’", "-") |
| text = re.sub(r"\n{3,}", "\n\n", text) |
|
|
| return text |
|
|
| |
| def answer_question(question: str, history: list[dict] = []) -> tuple[str, list[Document]]: |
| docs = retriever.invoke(combined_query(question, history)) |
| context = build_context(docs) |
| messages = [{"role": "system", "content": SYSTEM_PROMPT.format(context=context)}] |
| valid_roles = {"user", "assistant"} |
| clean_history = [ |
| m for m in history |
| if m.get("role") in valid_roles and m.get("content", "").strip() |
| ] |
| for m in clean_history[-4:]: |
| messages.append({"role": m["role"], "content": m["content"]}) |
| messages.append({"role": "user", "content": question}) |
| messages.append({"role": "assistant", "content": "/no_think"}) |
| response = client.chat_completion(messages=messages, max_tokens=512) |
| return clean_output(response.choices[0].message.content), docs |