Spaces:
Sleeping
Sleeping
| import os | |
| import sys | |
| import requests | |
| # SQLite workaround (needed for Chroma on HF Spaces) | |
| try: | |
| __import__("pysqlite3") | |
| sys.modules["sqlite3"] = sys.modules.pop("pysqlite3") | |
| except Exception: | |
| pass | |
| import gradio as gr | |
| from langchain_community.document_loaders import PyPDFLoader, Docx2txtLoader, TextLoader | |
| from langchain_text_splitters import CharacterTextSplitter | |
| from langchain_chroma import Chroma | |
| from langchain_huggingface import HuggingFaceEmbeddings | |
| # ======================== | |
| # CONFIG | |
| # ======================== | |
| DOCS_DIR = "multiple_docs" | |
| DB_DIR = "./db" | |
| DEEPSEEK_API_KEY = os.getenv("DEEPSEEK_API_KEY") | |
| DEEPSEEK_API_URL = "https://api.deepseek.com/v1/chat/completions" | |
| WELCOME_MESSAGE = ( | |
| "Hello, I'm Thierry Decae's chatbot. You can ask me recruitment-related " | |
| "questions about my experience, skills, availability, work eligibility, " | |
| "projects, and background. You can chat with me in multiple languages." | |
| ) | |
| # ======================== | |
| # DEEPSEEK CALL | |
| # ======================== | |
| def call_deepseek(messages): | |
| if not DEEPSEEK_API_KEY: | |
| return "Missing DEEPSEEK_API_KEY." | |
| headers = { | |
| "Authorization": f"Bearer {DEEPSEEK_API_KEY}", | |
| "Content-Type": "application/json", | |
| } | |
| payload = { | |
| "model": "deepseek-chat", | |
| "messages": messages, | |
| "temperature": 0.4, | |
| "max_tokens": 700, | |
| } | |
| response = requests.post(DEEPSEEK_API_URL, headers=headers, json=payload, timeout=60) | |
| response.raise_for_status() | |
| return response.json()["choices"][0]["message"]["content"].strip() | |
| # ======================== | |
| # LOAD DOCS | |
| # ======================== | |
| def load_documents(): | |
| docs = [] | |
| for f in os.listdir(DOCS_DIR): | |
| path = os.path.join(DOCS_DIR, f) | |
| try: | |
| if f.endswith(".pdf"): | |
| docs.extend(PyPDFLoader(path).load()) | |
| elif f.endswith(".docx"): | |
| docs.extend(Docx2txtLoader(path).load()) | |
| elif f.endswith(".txt"): | |
| docs.extend(TextLoader(path, encoding="utf-8").load()) | |
| except Exception as e: | |
| print(f"Error loading {f}: {e}", flush=True) | |
| if not docs: | |
| raise ValueError("No documents found") | |
| splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100) | |
| return splitter.split_documents(docs) | |
| # ======================== | |
| # VECTORSTORE | |
| # ======================== | |
| def build_vectorstore(): | |
| print("Loading embeddings...", flush=True) | |
| embedding = HuggingFaceEmbeddings( | |
| model_name="sentence-transformers/all-MiniLM-L6-v2" | |
| ) | |
| docs = load_documents() | |
| print(f"Loaded {len(docs)} chunks", flush=True) | |
| return Chroma.from_documents( | |
| docs, | |
| embedding, | |
| persist_directory=DB_DIR, | |
| ) | |
| vectorstore = build_vectorstore() | |
| retriever = vectorstore.as_retriever(search_kwargs={"k": 6}) | |
| # ======================== | |
| # HISTORY FORMAT | |
| # ======================== | |
| def format_history(history): | |
| if not history: | |
| return "" | |
| lines = [] | |
| for msg in history[-8:]: | |
| role = msg.get("role") | |
| content = msg.get("content") | |
| if role and content: | |
| lines.append(f"{role}: {content}") | |
| return "\n".join(lines) | |
| # ======================== | |
| # MAIN QA FUNCTION | |
| # ======================== | |
| def answer_question(query, history): | |
| if history is None: | |
| history = [{"role": "assistant", "content": WELCOME_MESSAGE}] | |
| if not query.strip(): | |
| return "", history | |
| try: | |
| docs = retriever.invoke(query) | |
| context = "\n\n".join(d.page_content for d in docs if d.page_content) | |
| history_text = format_history(history) | |
| system_prompt = """ | |
| You are Thierry Decae's recruitment chatbot. | |
| Answer questions about Thierry's experience, skills, and career. | |
| Use only provided context. | |
| If unsure, say "I'm not sure about that." | |
| Always answer as Thierry ("I", "my"). | |
| """ | |
| user_prompt = f""" | |
| Conversation: | |
| {history_text} | |
| Context: | |
| {context} | |
| Question: | |
| {query} | |
| Answer: | |
| """ | |
| answer = call_deepseek([ | |
| {"role": "system", "content": system_prompt}, | |
| {"role": "user", "content": user_prompt}, | |
| ]) | |
| except Exception as e: | |
| print(e, flush=True) | |
| answer = "Error while answering." | |
| history.append({"role": "user", "content": query}) | |
| history.append({"role": "assistant", "content": answer}) | |
| return "", history | |
| def clear_chat(): | |
| return [{"role": "assistant", "content": WELCOME_MESSAGE}] | |
| # ======================== | |
| # UI | |
| # ======================== | |
| guest_img = os.path.join(DOCS_DIR, "Guest.jpg") | |
| thierry_img = os.path.join(DOCS_DIR, "Thierry Picture.jpg") | |
| avatars = None | |
| if os.path.exists(guest_img) and os.path.exists(thierry_img): | |
| avatars = [guest_img, thierry_img] | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# Thierry Decae's Personal Assistant") | |
| chatbot = gr.Chatbot( | |
| value=[{"role": "assistant", "content": WELCOME_MESSAGE}], | |
| avatar_images=avatars, | |
| height=500, | |
| ) | |
| msg = gr.Textbox(placeholder="Ask a question...") | |
| clear = gr.Button("Clear") | |
| msg.submit(answer_question, [msg, chatbot], [msg, chatbot]) | |
| clear.click(clear_chat, None, chatbot) | |
| demo.launch( | |
| server_name="0.0.0.0", | |
| server_port=int(os.getenv("PORT", 7860)), | |
| ) |