Spaces:

Ahmad-01
/

Knowledgebase_RAG_APP

Running

File size: 3,539 Bytes

ba765de

import os
import gradio as gr
import faiss
import numpy as np
import gdown

from groq import Groq
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
from langchain_huggingface import HuggingFaceEmbeddings


# ==============================
# 🔐 Load Groq API Key Securely
# ==============================
groq_api_key = os.environ.get("GROQ_API_KEY")
client = Groq(api_key=groq_api_key)


# ==============================
# 📥 Download Knowledge Base
# ==============================
FILE_ID = "1ppfRoaQik3h1Gr9A15xSOLGVpNQtm8eH"
DOWNLOAD_URL = f"https://drive.google.com/uc?id={FILE_ID}"
PDF_PATH = "knowledge_base.pdf"

if not os.path.exists(PDF_PATH):
    gdown.download(DOWNLOAD_URL, PDF_PATH, quiet=False)


# ==============================
# 📚 Create Vector Database
# ==============================
embedding_model = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)

loader = PyPDFLoader(PDF_PATH)
documents = loader.load()

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=600,
    chunk_overlap=150
)

chunks = text_splitter.split_documents(documents)
texts = [chunk.page_content for chunk in chunks]

embeddings = embedding_model.embed_documents(texts)
embeddings = np.array(embeddings).astype("float32")

dimension = embeddings.shape[1]
vector_store = faiss.IndexFlatL2(dimension)
vector_store.add(embeddings)

print("✅ Knowledge Base Loaded Successfully")


# ==============================
# 🤖 RAG Function
# ==============================
def ask_question(question):
    question_embedding = embedding_model.embed_query(question)
    question_embedding = np.array([question_embedding]).astype("float32")

    distances, indices = vector_store.search(question_embedding, k=4)

    retrieved_texts = [texts[i] for i in indices[0]]
    context = "\n\n".join(retrieved_texts)

    prompt = f"""
You are an expert assistant.

Use ONLY the context below to answer clearly.
Format with headings and bullet points if needed.

CONTEXT:
{context}

QUESTION:
{question}
"""

    chat_completion = client.chat.completions.create(
        messages=[{"role": "user", "content": prompt}],
        model="llama-3.3-70b-versatile",
    )

    answer = chat_completion.choices[0].message.content

    return f"""
## 📌 Answer

{answer}
"""


# ==============================
# 🎨 Professional Yellow UI
# ==============================
custom_css = """
body {
    background-color: #ffffff;
    font-family: Arial, sans-serif;
}

.gradio-container {
    background-color: #fffbea;
    border-radius: 15px;
    padding: 25px;
}

button {
    background-color: #ffc107 !important;
    color: black !important;
    font-weight: bold !important;
    border-radius: 10px !important;
}

textarea {
    border-radius: 10px !important;
}

.answer-box {
    background-color: white;
    border: 2px solid #ffc107;
    padding: 20px;
    border-radius: 12px;
    min-height: 250px;
}
"""


with gr.Blocks(css=custom_css) as app:

    gr.Markdown(
        """
        # 🟡 KnowledgeBase AI Assistant
        ### Ask questions from my curated knowledge base
        """
    )

    question_input = gr.Textbox(
        label="Enter Your Question",
        placeholder="Ask something from the knowledge base..."
    )

    ask_button = gr.Button("Get Answer")

    answer_output = gr.Markdown(elem_classes="answer-box")

    ask_button.click(ask_question, inputs=question_input, outputs=answer_output)

app.launch()