Spaces:

anuragbb
/

Abot

Sleeping

App Files Files Community

Abot / implementation /answer_qwen.py

anuragbb

Update implementation/answer_qwen.py

afc4d5a verified 28 days ago

raw

history blame contribute delete

3.86 kB

	from pathlib import Path
	import os
	import re
	from dotenv import load_dotenv
	from langchain_chroma import Chroma
	from langchain_huggingface import HuggingFaceEmbeddings
	from langchain_core.documents import Document
	from huggingface_hub import InferenceClient
	load_dotenv(override=True)
	client = InferenceClient(
	model="dnotitia/Qwen3-4B-Instruct-2507:featherless-ai",
	token=os.getenv("HF_TOKEN"),
	)
	DB_NAME = str(Path(__file__).parent.parent / "vector_db")
	embeddings = HuggingFaceEmbeddings(
	model_name="Qwen/Qwen3-Embedding-0.6B",
	model_kwargs={"trust_remote_code": True},
	)
	vectorstore = Chroma(
	persist_directory=DB_NAME,
	embedding_function=embeddings,
	collection_name="docs",
	)
	# MMR diversifies retrieved chunks — handles spelling drift and avoids
	# pulling 20 near-identical overlap chunks about the same topic.
	# fetch_k=50 candidates, pick k=20 that balance relevance + diversity.
	retriever = vectorstore.as_retriever(
	search_type="mmr",
	search_kwargs={"k": 9, "fetch_k": 30, "lambda_mult": 0.7},
	)
	SYSTEM_PROMPT = """
	You are Abot — a concise AI assistant on Anurag's portfolio website. Answer questions about Anurag Bhusare using only the context provided.
	Visitors are typically recruiters or engineers wanting quick, clear answers.
	Refer to Anurag in third person. Never talk about yourself.
	RULES:
	- Match your answer length to the question — a simple question gets a short answer, a broad question gets a broader one
	- Default to concise — lead with the most important information first
	- Never include deployment details, setup steps, config, or implementation specifics unless directly asked
	- No headers or emoji sections for short answers
	- If you don't know something, say: "I don't have that information."
	Context:
	{context}
	"""
	def combined_query(question: str, history: list[dict]) -> str:
	"""Prepend recent user turns so follow-up questions retrieve correctly."""
	prior = [m["content"] for m in history[-4:] if m["role"] == "user"]
	return " ".join(prior + [question])


	def build_context(docs: list[Document], max_chars: int = 6000) -> str:
	"""Deduplicate overlap chunks and cap total chars to stay within HF token limits."""
	seen: set[str] = set()
	parts: list[str] = []
	total = 0
	for d in docs:
	content = d.page_content.strip()
	fingerprint = content[:120]
	if fingerprint in seen:
	continue
	seen.add(fingerprint)
	if total + len(content) > max_chars:
	break
	parts.append(content)
	total += len(content)
	return "\n\n".join(parts)


	def clean_output(text: str) -> str:
	text = text.strip()

	# Remove model thinking blocks
	if "</think>" in text:
	text = text.split("</think>")[-1].strip()
	elif "<think>" in text:
	text = text.split("<think>")[0].strip() # malformed block fallback

	text = text.removeprefix("/no_think").strip()

	text = text.replace("•", "-")
	text = re.sub(r"\n{3,}", "\n\n", text)

	return text


	def answer_question(question: str, history: list[dict] = []) -> tuple[str, list[Document]]:
	docs = retriever.invoke(combined_query(question, history))
	context = build_context(docs)
	messages = [{"role": "system", "content": SYSTEM_PROMPT.format(context=context)}]
	valid_roles = {"user", "assistant"}
	clean_history = [
	m for m in history
	if m.get("role") in valid_roles and m.get("content", "").strip()
	]
	for m in clean_history[-4:]:
	messages.append({"role": m["role"], "content": m["content"]})
	messages.append({"role": "user", "content": question})
	messages.append({"role": "assistant", "content": "/no_think"})
	response = client.chat_completion(messages=messages, max_tokens=512)
	return clean_output(response.choices[0].message.content), docs