Spaces:

maheensaleh40
/

PersonalChatbot

Sleeping

+def build_chain(retriever, model_name: str = LLM_MODEL_NAME):
+    # Local HF pipeline
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
+    model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
+    gen = pipeline(
+        "text2text-generation",
+        model=model,
+        tokenizer=tokenizer,
+        max_new_tokens=512,
+    )
+    llm = HuggingFacePipeline(pipeline=gen)
+    prompt = PromptTemplate(
+        input_variables=["context", "question"],
+        template=PROMPT_TMPL,
+    )
+    qa = RetrievalQA.from_chain_type(
+        llm=llm,
+        chain_type="stuff",
+        retriever=retriever,
+        chain_type_kwargs={"prompt": prompt},
+        return_source_documents=True,
+    )
+    return qa
+from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
+from langchain_community.llms import HuggingFacePipeline
+from langchain.prompts import PromptTemplate
+from langchain.chains import RetrievalQA
+def build_chain_qwen(retriever, model_name: str = "Qwen/Qwen2.5-7B-Instruct"):
+    # Qwen2.5 is a causal LM (decoder-only), not seq2seq.
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
+    # Ensure padding token exists (use EOS as pad for causal models if missing)
+    if tokenizer.pad_token_id is None:
+        tokenizer.pad_token_id = tokenizer.eos_token_id
+    model = AutoModelForCausalLM.from_pretrained(model_name)
+    gen = pipeline(
+        task="text-generation",
+        model=model,
+        tokenizer=tokenizer,
+        max_new_tokens=512,
+        do_sample=False,            # deterministic for QA
+        truncation=True,            # avoid context overruns
+        return_full_text=False,     # only the generated answer
+        eos_token_id=tokenizer.eos_token_id,
+        pad_token_id=tokenizer.pad_token_id,
+    )
+    llm = HuggingFacePipeline(pipeline=gen)
+    prompt = PromptTemplate(
+        input_variables=["context", "question"],
+        template=PROMPT_TMPL,
+    )
+    qa = RetrievalQA.from_chain_type(
+        llm=llm,
+        chain_type="stuff",                 # keep as in your snippet
+        retriever=retriever,
+        chain_type_kwargs={"prompt": prompt},
+        return_source_documents=True,
+    )
+    return qa
+from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
+from langchain_community.llms import HuggingFacePipeline
+from langchain.prompts import PromptTemplate
+from langchain.chains import RetrievalQA
+def build_chain_gemma(retriever, model_name: str = "google/gemma-2-2b-it"):
+    # Gemma 2 is a causal LM (decoder-only)
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
+    if tokenizer.pad_token_id is None:
+        tokenizer.pad_token_id = tokenizer.eos_token_id
+    model = AutoModelForCausalLM.from_pretrained(model_name)
+    gen = pipeline(
+        task="text-generation",
+        model=model,
+        tokenizer=tokenizer,
+        max_new_tokens=512,
+        do_sample=False,            # deterministic for QA
+        truncation=True,            # avoid context overruns
+        return_full_text=False,     # only generated continuation
+        eos_token_id=tokenizer.eos_token_id,
+        pad_token_id=tokenizer.pad_token_id,
+    )
+    llm = HuggingFacePipeline(pipeline=gen)
+    prompt = PromptTemplate(
+        input_variables=["context", "question"],
+        template=PROMPT_TMPL,
+    )
+    qa = RetrievalQA.from_chain_type(
+        llm=llm,
+        chain_type="stuff",                 # keep your current behavior
+        retriever=retriever,
+        chain_type_kwargs={"prompt": prompt},
+        return_source_documents=True,
+    )
+    return qa

src/ingest.py ADDED Viewed

	@@ -0,0 +1,72 @@

+from pathlib import Path
+import argparse
+import sys
+import os
+from langchain_community.document_loaders import TextLoader, PyPDFLoader
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_community.vectorstores import FAISS
+from langchain_community.embeddings import HuggingFaceEmbeddings
+import os
+from dotenv import load_dotenv
+load_dotenv()  # still works locally
+HF_API_TOKEN = os.getenv("HUGGING_FACE_API_TOKEN")
+GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
+EMBED_MODEL_NAME = os.getenv("HUGGING_FACE_EMBEDDING_MODEL")
+LLM_MODEL_NAME = os.getenv("LLM_MODEL")
+ROOT_DIR = Path(__file__).parent
+INDEX_DIR = Path(f"{ROOT_DIR}/data_index")
+ROOT_DIR = Path(__file__).parent
+INDEX_DIR = Path(f"{ROOT_DIR}/data_index")
+DATA_DIR = Path(f"{ROOT_DIR}/data")
+def load_documents(data_dir: Path):
+    docs = []
+    for path in data_dir.rglob("*"):
+        if path.is_dir():
+            continue
+        try:
+            if path.suffix.lower() in [".txt", ".md"]:
+                docs.extend(TextLoader(str(path), encoding="utf-8").load())
+            elif path.suffix.lower() == ".pdf":
+                docs.extend(PyPDFLoader(str(path)).load())
+        except Exception as e:
+            print(f"[skip] {path.name}: {e}", file=sys.stderr)
+    if not docs:
+        raise RuntimeError(f"No documents found in {data_dir}. Put .txt/.md/.pdf files there.")
+    return docs
+def build_vectorstore(docs):
+    splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=120)
+    chunks = splitter.split_documents(docs)
+    embeddings = HuggingFaceEmbeddings(model_name=EMBED_MODEL_NAME)
+    vs = FAISS.from_documents(chunks, embeddings)
+    return vs
+def main():
+    parser = argparse.ArgumentParser(description="Ingest documents and build FAISS index.")
+    args = parser.parse_args()
+    print(f"Loading documents from {DATA_DIR}")
+    docs = load_documents(DATA_DIR)
+    print(f"Loaded {len(docs)} documents. Building index…")
+    vs = build_vectorstore(docs)
+    INDEX_DIR.mkdir(parents=True, exist_ok=True)
+    vs.save_local(str(INDEX_DIR))
+    # Persist embedding model name for safety
+    (INDEX_DIR / "embeddings_model.txt").write_text(EMBED_MODEL_NAME, encoding="utf-8")
+    print(f"Index saved to {INDEX_DIR.resolve()}")
+if __name__ == "__main__":
+    main()

src/qa_chain_cli.py ADDED Viewed

	@@ -0,0 +1,101 @@

+import argparse
+import textwrap
+from pathlib import Path
+import os
+from dotenv import load_dotenv
+from qa_prompts import PROMPT_TMPL
+from langchain_community.vectorstores import FAISS
+from langchain_community.embeddings import HuggingFaceEmbeddings
+from langchain.prompts import PromptTemplate
+from langchain.chains import RetrievalQA
+from langchain_google_genai import ChatGoogleGenerativeAI
+load_dotenv()
+HF_API_TOKEN = os.getenv("HUGGING_FACE_API_TOKEN")
+GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
+EMBED_MODEL_NAME = os.getenv("HUGGING_FACE_EMBEDDING_MODEL")
+LLM_MODEL_NAME = os.getenv("LLM_MODEL")
+ROOT_DIR = Path(__file__).parent
+INDEX_DIR = Path(f"{ROOT_DIR}/data_index")
+def load_retriever(index_dir: Path, k: int = 4):
+    # Ensure we use the same embedding model that was used during ingest
+    embed_model_name_path = index_dir / "embeddings_model.txt"
+    if not embed_model_name_path.exists():
+        raise RuntimeError(f"Missing {embed_model_name_path}. Re-run ingest.py.")
+    embed_model_name = embed_model_name_path.read_text(encoding="utf-8").strip()
+    embeddings = HuggingFaceEmbeddings(model_name=embed_model_name)
+    vs = FAISS.load_local(str(index_dir), embeddings, allow_dangerous_deserialization=True)
+    return vs.as_retriever(search_kwargs={"k": k})
+def build_chain_gemini(retriever):
+    if not GOOGLE_API_KEY:
+        raise RuntimeError("Set GOOGLE_API_KEY in your .env to use the Gemini inference endpoint.")
+    # Uses Google Generative AI (Gemini) hosted inference endpoint
+    llm = ChatGoogleGenerativeAI(
+        model=LLM_MODEL_NAME,
+        api_key=GOOGLE_API_KEY,
+        temperature=0.1,
+        max_output_tokens=512,
+        convert_system_message_to_human=True,
+    )
+    prompt = PromptTemplate(
+        input_variables=["context", "question"],
+        template=PROMPT_TMPL,
+    )
+    # map_reduce keeps per-call size manageable and robust
+    qa = RetrievalQA.from_chain_type(
+        llm=llm,
+        chain_type="stuff",
+        retriever=retriever,
+        chain_type_kwargs={"prompt": prompt},
+        return_source_documents=True,
+    )
+    return qa
+def main():
+    parser = argparse.ArgumentParser(description="Run recruiter Q/A over a saved FAISS index.")
+    args = parser.parse_args()
+    retriever = load_retriever(INDEX_DIR)
+    chain = build_chain_gemini(retriever)
+    print("\My Profile Chatbot ready. Ask about me.")
+    print("Type 'exit' to quit.\n")
+    while True:
+        try:
+            q = input("You: ").strip()
+        except (EOFError, KeyboardInterrupt):
+            print("\nBye!")
+            break
+        if not q:
+            continue
+        if q.lower() in {"exit", "quit", "q"}:
+            print("Bye!")
+            break
+        try:
+            res = chain.invoke({"query": q})
+            answer = res["result"] if isinstance(res, dict) else str(res)
+        except Exception as e:
+            answer = f"[error] {e}"
+        print("\nMaheen:", textwrap.fill(answer, width=100))
+        print()
+if __name__ == "__main__":
+    main()

src/qa_prompts.py ADDED Viewed

	@@ -0,0 +1,9 @@

+PROMPT_TMPL = """You are a helpful chatbot that answers questions about the candidate's profile for recruiters.
+Use ONLY the provided context. If the answer is not in the context, say you don't know. Be concise and factual.
+Context:
+{context}
+Question: {question}
+Answer:"""

src/streamlit_app.py ADDED Viewed

	@@ -0,0 +1,180 @@

+import subprocess
+from pathlib import Path
+from typing import List
+import streamlit as st
+from qa_prompts import PROMPT_TMPL
+from langchain_community.vectorstores import FAISS
+from langchain.chains import RetrievalQA
+from langchain.prompts import PromptTemplate
+from langchain.embeddings.base import Embeddings
+from langchain_google_genai import ChatGoogleGenerativeAI
+from huggingface_hub import InferenceClient
+import os, streamlit as st
+from dotenv import load_dotenv
+load_dotenv()  # still works locally
+GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
+HF_API_TOKEN =  os.getenv("HUGGING_FACE_API_TOKEN")
+EMBED_MODEL_NAME = os.getenv("HUGGING_FACE_EMBEDDING_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
+LLM_MODEL_NAME = os.getenv("LLM_MODEL", "gemini-1.5-flash")
+ROOT_DIR = Path(__file__).parent
+INDEX_DIR = Path(f"{ROOT_DIR}/data_index")
+###### run ingest.py ######
+if not INDEX_DIR.exists():
+    with st.spinner("Index not found. Building FAISS index (first run)…"):
+        # Ensure ingest.py reads the same env/secrets model and paths
+        proc = subprocess.run(["python", "src/ingest.py"], capture_output=True, text=True)
+        if proc.returncode != 0:
+            st.error(f"ingest.py failed:\n{proc.stderr}")
+            st.stop()
+class HFAPIEmbeddings(Embeddings):
+    def __init__(self, repo_id: str, token: str | None = None, timeout: float = 120.0):
+        self.client = InferenceClient(model=repo_id, token=token, timeout=timeout)
+    def embed_documents(self, texts: List[str]) -> List[List[float]]:
+        return self.client.feature_extraction(texts)
+    def embed_query(self, text: str) -> List[float]:
+        vec = self.client.feature_extraction(text)
+        return vec[0] if (isinstance(vec, list) and vec and isinstance(vec[0], list)) else vec
+def build_chain_gemini(retriever, _llm_repo, _max_new, _temp, _show_sources):
+    if not GOOGLE_API_KEY:
+        raise RuntimeError("Set GOOGLE_API_KEY in your .env to use the Gemini inference endpoint.")
+    # Uses Google Generative AI (Gemini) hosted inference endpoint
+    llm = ChatGoogleGenerativeAI(
+        model=_llm_repo,
+        api_key=GOOGLE_API_KEY,
+        temperature=_temp,
+        max_output_tokens=_max_new,
+        convert_system_message_to_human=True,
+    )
+    prompt = PromptTemplate(
+        input_variables=["context", "question"],
+        template=PROMPT_TMPL,
+    )
+    #map reduce or stuff
+    qa = RetrievalQA.from_chain_type(
+        llm=llm,
+        chain_type="stuff",
+        retriever=retriever,
+        chain_type_kwargs={"prompt": prompt},
+        return_source_documents=_show_sources,
+    )
+    return qa
+# ========================= Streamlit UI =========================
+st.set_page_config(page_title="Maheen's Profile Chatbot", page_icon="💬", layout="centered")
+st.title("Maheen's Profile Chatbot")
+st.caption("RAG over my profile docs using FAISS + Hugging Face Inference API")
+# Sidebar settings
+st.sidebar.header("Settings")
+hf_token = HF_API_TOKEN
+if not hf_token:
+    st.sidebar.warning("HUGGINGFACEHUB_API_TOKEN is not set. Set it in your shell before running the app.")
+# store_dir = st.sidebar.text_input("FAISS store path", value=INDEX_DIR)
+# llm_repo_id = st.sidebar.text_input("LLM repo (HF)", value=LLM_MODEL_NAME)
+# embed_repo_id = st.sidebar.text_input("Embedding model (HF)", value=EMBED_MODEL_NAME)
+# Display model names as text (read-only)
+st.sidebar.markdown(f"**Embedding Model:** `{EMBED_MODEL_NAME}`")
+st.sidebar.markdown(f"**Chat Model:** `{LLM_MODEL_NAME}`")
+# k = st.sidebar.number_input("Top-k retrieved chunks", min_value=1, max_value=20, value=4, step=1)
+k = 4
+# max_new_tokens = st.sidebar.number_input("Max new tokens", min_value=64, max_value=2048, value=512, step=64)
+max_new_tokens = 512
+# temperature = st.sidebar.slider("Temperature", min_value=0.0, max_value=1.0, value=0.1, step=0.05)
+temperature = 0.1
+# show_sources = st.sidebar.checkbox("Show sources", value=False)
+show_sources = False
+###################
+# Session state for chat history
+if "history" not in st.session_state:
+    st.session_state.history = []  # list of (user, assistant, sources)
+# Load vector store & chain lazily, cache across reruns
+@st.cache_resource(show_spinner=True)
+def _load_chain(_store_dir: str, _embed_repo: str, _llm_repo: str, _k: int, _max_new: int, _temp: float, _show_sources: bool):
+    if not Path(_store_dir).exists():
+        raise FileNotFoundError(f"FAISS store not found at '{_store_dir}'. Run ingest.py first.")
+    embeddings = HFAPIEmbeddings(repo_id=_embed_repo, token=hf_token)
+    vs = FAISS.load_local(
+        _store_dir,
+        embeddings,
+        allow_dangerous_deserialization=True,  # required by newer LC versions
+    )
+    retriever = vs.as_retriever(search_kwargs={"k": 4}) # hardcoded, change later
+    chain = build_chain_gemini(retriever, _llm_repo, _max_new, _temp, _show_sources)
+    return chain
+# Prepare chain
+with st.spinner("Preparing retriever & LLM���"):
+    chain = _load_chain(INDEX_DIR, EMBED_MODEL_NAME, LLM_MODEL_NAME, k, max_new_tokens, temperature, show_sources)
+def render_sources(docs):
+    if not docs:
+        return
+    st.markdown("**Sources**")
+    for i, d in enumerate(docs, start=1):
+        src = d.metadata.get("source", "unknown")
+        page = d.metadata.get("page", None)
+        label = f"{Path(src).name}" + (f" (page {page+1})" if isinstance(page, int) else "")
+        with st.expander(f"{i}. {label}"):
+            st.write(d.page_content[:1500] + ("…" if len(d.page_content) > 1500 else ""))
+# --- Chat input with Enter submit ---
+with st.form("chat-form", clear_on_submit=True):
+    user_input = st.text_input(
+        "Ask about my profile:",
+        placeholder="e.g., What are your key projects?"
+    )
+    submitted = st.form_submit_button("Ask")
+if submitted and user_input.strip():
+    with st.spinner("Thinking…"):
+        try:
+            res = chain.invoke({"query": user_input.strip()})
+            if isinstance(res, dict):
+                answer = res.get("result", "")
+                sources = res.get("source_documents", []) if show_sources else []
+            else:
+                answer, sources = str(res), []
+        except Exception as e:
+            answer, sources = f"[error] {e}", []
+    st.session_state.history.append((user_input.strip(), answer, sources))
+# Display history
+for q, a, srcs in st.session_state.history:
+    st.markdown(f"**You:** {q}")
+    st.markdown(f"**Assistant:** {a}")
+    if show_sources:
+        render_sources(srcs)
+    st.markdown("---")
+# Footer
+# st.caption("Enter submits. Datastore path fixed from code/env. Models shown read-only.")