File size: 2,365 Bytes
033ecc0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3bcae4c
033ecc0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5d4fb7a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
from fastapi import FastAPI
from pydantic import BaseModel
from langchain_groq import ChatGroq
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_core.prompts import ChatPromptTemplate
from dotenv import load_dotenv
import os
import re

load_dotenv()

app = FastAPI()

CHROMA_PATH = "./chroma_db_wilson"

PROMPT_TEMPLATE = """
Answer the question based only on the following context:

{context}

---

Answer the question based on the above context: {question}
"""

llm = ChatGroq(
    model="qwen/qwen3-32b",
    api_key=os.getenv("GROQ_API_KEY")
)

# Load RAG components sekali saat startup
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embeddings)

class PromptRequest(BaseModel):
    prompt: str


def clean_text(text: str) -> str:
    # Hapus <think> tags
    text = re.sub(r'<think>.*?</think>', '', text, flags=re.DOTALL)
    # Hapus bold/italic (**text**, *text*, __text__)
    text = re.sub(r'\*\*(.+?)\*\*', r'\1', text)
    text = re.sub(r'\*(.+?)\*', r'\1', text)
    text = re.sub(r'__(.+?)__', r'\1', text)
    # Hapus bullet points (- item atau * item)
    text = re.sub(r'^\s*[-*]\s+', '', text, flags=re.MULTILINE)
    # Hapus heading (## Title)
    text = re.sub(r'#+\s+', '', text)
    # Hapus \n (newline characters)
    text = text.replace('\n', ' ')   # ← tambahkan ini
    # Hapus spasi berlebihan
    text = re.sub(r' {2,}', ' ', text)  # ← tambahkan ini
    return text.strip()

@app.post("/generate")
def generate(request: PromptRequest):
    # 1. Cari dokumen relevan dari Chroma
    results = db.similarity_search_with_relevance_scores(request.prompt, k=3)

    # 2. Cek threshold
    if len(results) == 0 or results[0][1] < 0.2:
        return {"response": "I’m not sure yet. For more information, please contact Wilson in the email below."}

    # 3. Gabungkan context
    context_text = "\n\n---\n\n".join([doc.page_content for doc, _ in results])

    # 4. Buat prompt dengan context
    prompt = ChatPromptTemplate.from_template(PROMPT_TEMPLATE).format(
        context=context_text,
        question=request.prompt
    )

    

    # 5. Kirim ke LLM
    response = llm.invoke(prompt)
    clean_content = clean_text(response.content)

    return {"response": clean_content}