from langchain.chains import create_history_aware_retriever, create_retrieval_chain from langchain.chains.combine_documents import create_stuff_documents_chain from langchain_core.prompts import ChatPromptTemplate from langchain.memory import ConversationBufferMemory from langchain_community.vectorstores import FAISS from langchain_community.embeddings import HuggingFaceEmbeddings from transformers import AutoTokenizer, pipeline ,AutoModelForSeq2SeqLM,AutoModelForCausalLM,GenerationConfig from langchain_huggingface import HuggingFacePipeline from langchain.document_loaders import PyPDFLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.tools import Tool from langchain.agents import initialize_agent, AgentType from langdetect import detect import re import os import warnings import gradio as gr warnings.filterwarnings('ignore') from ctransformers import AutoModelForCausalLM, AutoTokenizer from langchain.llms import CTransformers BASE_PATH = os.getcwd() INDEX_PATHS = { "en": os.path.join(BASE_PATH, "faiss_index_en"), "fr": os.path.join(BASE_PATH, "faiss_index_fr"), } embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2") #retriever = FAISS.load_local(folder_path=path, embeddings=embedding,allow_dangerous_deserialization=True).as_retriever() model = AutoModelForCausalLM.from_pretrained("TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF", model_file="tinyllama-1.1b-chat-v1.0.Q4_0.gguf",model_type="llama" ) llm = CTransformers( model="TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF", model_file="tinyllama-1.1b-chat-v1.0.Q4_0.gguf", model_type="llama", config={"max_new_tokens": 512, "temperature": 0.0,"context_length": 4096} ) system_template = ("You are Bot — an intelligent assistant trained on cameroon penal code data." "You exist to help individuals answer questions about the Cameroonian Penal Code." " You always provide the source penal code section or article number, clear, compliant, and factual answers grounded in official penal code documentation." "When given an law question and information, you explain all components." "If a query is ambiguous or unsupported, you politely defer or recommend reviewing the relevant penal code manually." "You do not speculate or make law interpretations — you clarify with precision and data.") condense_question_prompt = ChatPromptTemplate.from_messages( [ ("system", system_template), ("placeholder", "{chat_history}"), ("human", "{input}"), ] ) #history_aware_retriever = create_history_aware_retriever( # llm, retriever, condense_question_prompt #) system_prompt = ( "You are an assistant for question-answering tasks. " "Use the following pieces of retrieved context to answer " "the question. If you don't know the answer, say that you " "don't know. Use three sentences maximum and keep the " "answer concise." "\n\n" "{context}" ) from langchain.prompts import PromptTemplate qa_prompt = PromptTemplate.from_template( "You are a legal assistant. Use and highlight the following penal or article code number and context and conversation history to answer the current question.\n\n" "Context:\n{context}\n\n" "Conversation History:\n{chat_history}\n\n" "Current Question:\n{input}\n" "Answer:" ) llm_chain = create_stuff_documents_chain(llm=llm, prompt=qa_prompt) #rag_chain = create_retrieval_chain(history_aware_retriever, llm_chain) # Preload retrievers once retrievers = { "en": FAISS.load_local( folder_path=INDEX_PATHS["en"], embeddings=embedding, allow_dangerous_deserialization=True ).as_retriever(search_kwargs={"k": 2}), "fr": FAISS.load_local( folder_path=INDEX_PATHS["fr"], embeddings=embedding, allow_dangerous_deserialization=True ).as_retriever(search_kwargs={"k": 2}), } # Truncate long history def truncate_history(chat_history, max_chars=1500): total = 0 trimmed = [] for q, a in reversed(chat_history): pair_len = len(q) + len(a) if total + pair_len > max_chars: break trimmed.insert(0, (q, a)) total += pair_len return trimmed # Simpler, faster RAG function def rag_tool_func(input_question: str, chat_history: list = None) -> str: lang = detect(input_question) lang = "fr" if lang == "fr" else "en" retriever = retrievers[lang] # Format chat history (optional, for prompt context) chat_history = truncate_history(chat_history) history_str = "" if isinstance(chat_history, list): for q, a in chat_history: history_str += f"User: {q}\nAssistant: {a}\n" rag_chain = create_retrieval_chain(retriever, create_stuff_documents_chain(llm=llm, prompt=qa_prompt)) result = rag_chain.invoke({ "input": input_question, "chat_history": history_str }) return result["answer"] chat_history = [] # Global chat history def chatbot_interface(user_input, history): if history is None or not isinstance(history, list): history = [] trimmed_history = truncate_history(history) answer = rag_tool_func(user_input, trimmed_history) history.append((user_input, answer)) return history, history # For chatbot + state with gr.Blocks() as demo: gr.Markdown("# 🇨🇲 Cameroon Penal Code Chatbot") chatbot_ui = gr.Chatbot(label="Ask me anything about the Cameroon Penal Code") with gr.Row(): question_box = gr.Textbox(placeholder="Ask a legal question...", label="Your question") send_btn = gr.Button("Send") chat_state = gr.State([]) send_btn.click(fn=chatbot_interface, inputs=[question_box, chat_state], outputs=[chatbot_ui, chat_state]) question_box.submit(fn=chatbot_interface, inputs=[question_box, chat_state], outputs=[chatbot_ui, chat_state]) if __name__ == "__main__": demo.launch()