from fastapi import FastAPI from pydantic import BaseModel from langchain_groq import ChatGroq from langchain_huggingface import HuggingFaceEmbeddings from langchain_community.vectorstores import Chroma from langchain_core.prompts import ChatPromptTemplate from dotenv import load_dotenv import os import re load_dotenv() app = FastAPI() CHROMA_PATH = "./chroma_db_wilson" PROMPT_TEMPLATE = """ Answer the question based only on the following context: {context} --- Answer the question based on the above context: {question} """ llm = ChatGroq( model="qwen/qwen3-32b", api_key=os.getenv("GROQ_API_KEY") ) # Load RAG components sekali saat startup embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2") db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embeddings) class PromptRequest(BaseModel): prompt: str def clean_text(text: str) -> str: # Hapus tags text = re.sub(r'.*?', '', text, flags=re.DOTALL) # Hapus bold/italic (**text**, *text*, __text__) text = re.sub(r'\*\*(.+?)\*\*', r'\1', text) text = re.sub(r'\*(.+?)\*', r'\1', text) text = re.sub(r'__(.+?)__', r'\1', text) # Hapus bullet points (- item atau * item) text = re.sub(r'^\s*[-*]\s+', '', text, flags=re.MULTILINE) # Hapus heading (## Title) text = re.sub(r'#+\s+', '', text) # Hapus \n (newline characters) text = text.replace('\n', ' ') # ← tambahkan ini # Hapus spasi berlebihan text = re.sub(r' {2,}', ' ', text) # ← tambahkan ini return text.strip() @app.post("/generate") def generate(request: PromptRequest): # 1. Cari dokumen relevan dari Chroma results = db.similarity_search_with_relevance_scores(request.prompt, k=3) # 2. Cek threshold if len(results) == 0 or results[0][1] < 0.2: return {"response": "I’m not sure yet. For more information, please contact Wilson in the email below."} # 3. Gabungkan context context_text = "\n\n---\n\n".join([doc.page_content for doc, _ in results]) # 4. Buat prompt dengan context prompt = ChatPromptTemplate.from_template(PROMPT_TEMPLATE).format( context=context_text, question=request.prompt ) # 5. Kirim ke LLM response = llm.invoke(prompt) clean_content = clean_text(response.content) return {"response": clean_content}