Spaces:
Sleeping
Sleeping
UPDATE
Browse files- .DS_Store +0 -0
- .gitattributes +1 -0
- Dockerfile +17 -0
- __pycache__/main.cpython-310.pyc +0 -0
- config.py +19 -0
- llm_chain.py +69 -0
- main.py +47 -0
- pyproject.toml +18 -0
- retriever.py +61 -0
- uv.lock +0 -0
- vectorStore/chroma.sqlite3 +3 -0
.DS_Store
ADDED
|
Binary file (6.15 kB). View file
|
|
|
.gitattributes
CHANGED
|
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
vectorStore/chroma.sqlite3 filter=lfs diff=lfs merge=lfs -text
|
Dockerfile
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.10-slim
|
| 2 |
+
|
| 3 |
+
COPY . /app
|
| 4 |
+
|
| 5 |
+
WORKDIR /app
|
| 6 |
+
|
| 7 |
+
USER root
|
| 8 |
+
|
| 9 |
+
RUN pip install uv
|
| 10 |
+
|
| 11 |
+
RUN uv sync
|
| 12 |
+
|
| 13 |
+
RUN chmod -R 777 /app
|
| 14 |
+
|
| 15 |
+
EXPOSE 7860
|
| 16 |
+
|
| 17 |
+
CMD ["uv", "run", "main.py"]
|
__pycache__/main.cpython-310.pyc
ADDED
|
Binary file (3.58 kB). View file
|
|
|
config.py
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# config.py
|
| 2 |
+
# This file stores all the configuration variables and constants for the application.
|
| 3 |
+
|
| 4 |
+
# Embedding Model Configuration
|
| 5 |
+
MODEL_NAME = "jinaai/jina-embeddings-v3-base-en"
|
| 6 |
+
MODEL_KWARGS = {'device': 'cpu', 'trust_remote_code': True}
|
| 7 |
+
ENCODE_KWARGS = {'normalize_embeddings': False}
|
| 8 |
+
|
| 9 |
+
# Vector Store Configuration
|
| 10 |
+
VECTOR_STORE_DIRECTORY = "vectorStore"
|
| 11 |
+
|
| 12 |
+
# LLM Configuration
|
| 13 |
+
LLM_MODEL = "meta-llama/llama-4-maverick-17b-128e-instruct"
|
| 14 |
+
LLM_TEMPERATURE = 0.8
|
| 15 |
+
|
| 16 |
+
# Retriever Configuration
|
| 17 |
+
DENSE_RETRIEVER_K = 3
|
| 18 |
+
KEYWORD_RETRIEVER_K = 3
|
| 19 |
+
ENSEMBLE_WEIGHTS = [0.5, 0.5]
|
llm_chain.py
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# llm_chain.py
|
| 2 |
+
# This file configures the language model, prompt template, and the final processing chain.
|
| 3 |
+
|
| 4 |
+
from langchain_core.output_parsers import StrOutputParser
|
| 5 |
+
from langchain_core.runnables import RunnablePassthrough
|
| 6 |
+
from langchain_core.prompts import ChatPromptTemplate
|
| 7 |
+
from langchain_groq import ChatGroq
|
| 8 |
+
from config import LLM_MODEL, LLM_TEMPERATURE
|
| 9 |
+
|
| 10 |
+
def get_llm():
|
| 11 |
+
"""Initializes and returns the ChatGroq LLM."""
|
| 12 |
+
return ChatGroq(
|
| 13 |
+
model=LLM_MODEL,
|
| 14 |
+
temperature=LLM_TEMPERATURE
|
| 15 |
+
)
|
| 16 |
+
|
| 17 |
+
def get_prompt_template():
|
| 18 |
+
"""Creates and returns the ChatPromptTemplate for the RAG chain."""
|
| 19 |
+
prompt_text = """
|
| 20 |
+
You are an AI assistant specialized exclusively in answering questions from **three mathematics books authored by Ice Venkatesh**.
|
| 21 |
+
Your role is to provide **clear, precise, and complete answers strictly based on the retrieved context from those books**.
|
| 22 |
+
|
| 23 |
+
---
|
| 24 |
+
|
| 25 |
+
### 🔹 Core Rules
|
| 26 |
+
|
| 27 |
+
1. **Strict Reliance on Context**: Only use the retrieved context (`context`) from Ice Venkatesh’s books. Never invent, guess, or rely on outside knowledge. If the context is insufficient, state this clearly.
|
| 28 |
+
|
| 29 |
+
2. **No External Sources**: Do not use prior training knowledge or the internet. All answers, definitions, proofs, and examples must come **only from the provided context**.
|
| 30 |
+
|
| 31 |
+
3. **Mathematical Rigor & Clarity**: Provide correct, step-by-step reasoning when solving problems. Keep explanations simple, precise, and mathematically flawless.
|
| 32 |
+
|
| 33 |
+
4. **Handling Off-Topic Queries**: If the user asks something unrelated to the books or mathematics, politely decline. Example: "I can only help with questions related to the three mathematics books by Ice Venkatesh. Unfortunately, I cannot assist with topics outside that scope."
|
| 34 |
+
|
| 35 |
+
5. **Style & Professionalism**: Always be polite, clear, and professional. Avoid filler or speculation.
|
| 36 |
+
|
| 37 |
+
---
|
| 38 |
+
|
| 39 |
+
### 🔹 Response Instructions
|
| 40 |
+
|
| 41 |
+
* Use the `context` to extract the necessary information.
|
| 42 |
+
* Answer the `query` directly, grounded **only in the retrieved book content**.
|
| 43 |
+
* If insufficient context is found, respond with: "The books by Ice Venkatesh do not provide enough information to answer this question."
|
| 44 |
+
|
| 45 |
+
---
|
| 46 |
+
|
| 47 |
+
**Retrieved context:**
|
| 48 |
+
{context}
|
| 49 |
+
|
| 50 |
+
**User question:**
|
| 51 |
+
{query}
|
| 52 |
+
"""
|
| 53 |
+
return ChatPromptTemplate.from_template(prompt_text)
|
| 54 |
+
|
| 55 |
+
def create_rag_chain(retriever):
|
| 56 |
+
"""Creates and returns the full RAG chain, accepting a retriever as an argument."""
|
| 57 |
+
print("Creating RAG chain...")
|
| 58 |
+
llm = get_llm()
|
| 59 |
+
prompt = get_prompt_template()
|
| 60 |
+
output_parser = StrOutputParser()
|
| 61 |
+
|
| 62 |
+
chain = (
|
| 63 |
+
{"context": retriever, "query": RunnablePassthrough()}
|
| 64 |
+
| prompt
|
| 65 |
+
| llm
|
| 66 |
+
| output_parser
|
| 67 |
+
)
|
| 68 |
+
print("RAG chain created successfully.")
|
| 69 |
+
return chain
|
main.py
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# main.py
|
| 2 |
+
# This is the main file that runs the Sanic web server.
|
| 3 |
+
|
| 4 |
+
from sanic import Sanic, response
|
| 5 |
+
from retriever import get_ensemble_retriever
|
| 6 |
+
from llm_chain import create_rag_chain
|
| 7 |
+
|
| 8 |
+
app = Sanic("VibbaBackend")
|
| 9 |
+
|
| 10 |
+
@app.before_server_start
|
| 11 |
+
async def setup_model(app_instance, loop):
|
| 12 |
+
"""
|
| 13 |
+
Initializes the retriever and RAG chain and attaches them
|
| 14 |
+
to the application context before the server starts.
|
| 15 |
+
"""
|
| 16 |
+
print("Server starting up... Initializing model pipeline.")
|
| 17 |
+
retriever = get_ensemble_retriever()
|
| 18 |
+
rag_chain = create_rag_chain(retriever)
|
| 19 |
+
app_instance.ctx.rag_chain = rag_chain
|
| 20 |
+
print("Model pipeline is ready.")
|
| 21 |
+
|
| 22 |
+
@app.get("/getResponse")
|
| 23 |
+
async def get_response_endpoint(request):
|
| 24 |
+
"""
|
| 25 |
+
Endpoint to get an answer to a question using the RAG chain.
|
| 26 |
+
Expects a 'question' query parameter.
|
| 27 |
+
"""
|
| 28 |
+
question = request.args.get("question")
|
| 29 |
+
if not question:
|
| 30 |
+
return response.json(
|
| 31 |
+
{"error": "Please provide a 'question' query parameter."},
|
| 32 |
+
status=400
|
| 33 |
+
)
|
| 34 |
+
|
| 35 |
+
try:
|
| 36 |
+
chain = request.app.ctx.rag_chain
|
| 37 |
+
result = chain.invoke(question)
|
| 38 |
+
return response.text(result)
|
| 39 |
+
except Exception as e:
|
| 40 |
+
print(f"An error occurred during invocation: {e}")
|
| 41 |
+
return response.json(
|
| 42 |
+
{"error": "An internal error occurred while processing your request."},
|
| 43 |
+
status=500
|
| 44 |
+
)
|
| 45 |
+
|
| 46 |
+
if __name__ == "__main__":
|
| 47 |
+
app.run(host="0.0.0.0", port=7860)
|
pyproject.toml
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[project]
|
| 2 |
+
name = "bookai"
|
| 3 |
+
version = "0.1.0"
|
| 4 |
+
description = "Add your description here"
|
| 5 |
+
readme = "README.md"
|
| 6 |
+
requires-python = ">=3.12"
|
| 7 |
+
dependencies = [
|
| 8 |
+
"einops>=0.8.1",
|
| 9 |
+
"langchain>=0.3.27",
|
| 10 |
+
"langchain-chroma>=0.2.6",
|
| 11 |
+
"langchain-community>=0.3.29",
|
| 12 |
+
"langchain-core>=0.3.76",
|
| 13 |
+
"langchain-groq>=0.3.8",
|
| 14 |
+
"langchain-huggingface>=0.3.1",
|
| 15 |
+
"rank-bm25>=0.2.2",
|
| 16 |
+
"sanic>=25.3.0",
|
| 17 |
+
"sentence-transformers>=5.1.1",
|
| 18 |
+
]
|
retriever.py
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# retriever.py
|
| 2 |
+
# This file handles the setup of embeddings, vector stores, and the ensemble retriever.
|
| 3 |
+
|
| 4 |
+
from langchain_huggingface import HuggingFaceEmbeddings
|
| 5 |
+
from langchain_chroma import Chroma
|
| 6 |
+
from langchain_community.retrievers import BM25Retriever
|
| 7 |
+
from langchain.retrievers import EnsembleRetriever
|
| 8 |
+
from config import (
|
| 9 |
+
MODEL_NAME, MODEL_KWARGS, ENCODE_KWARGS, VECTOR_STORE_DIRECTORY,
|
| 10 |
+
DENSE_RETRIEVER_K, KEYWORD_RETRIEVER_K, ENSEMBLE_WEIGHTS
|
| 11 |
+
)
|
| 12 |
+
|
| 13 |
+
def get_embedding_function():
|
| 14 |
+
"""Initializes and returns the HuggingFace embedding model."""
|
| 15 |
+
return HuggingFaceEmbeddings(
|
| 16 |
+
model_name=MODEL_NAME,
|
| 17 |
+
model_kwargs=MODEL_KWARGS,
|
| 18 |
+
encode_kwargs=ENCODE_KWARGS
|
| 19 |
+
)
|
| 20 |
+
|
| 21 |
+
def get_vector_store(embedding_function):
|
| 22 |
+
"""Initializes and returns the Chroma vector store."""
|
| 23 |
+
return Chroma(
|
| 24 |
+
embedding_function=embedding_function,
|
| 25 |
+
persist_directory=VECTOR_STORE_DIRECTORY
|
| 26 |
+
)
|
| 27 |
+
|
| 28 |
+
def get_ensemble_retriever():
|
| 29 |
+
"""
|
| 30 |
+
Creates and returns an ensemble retriever combining dense and keyword-based search.
|
| 31 |
+
"""
|
| 32 |
+
print("Initializing embeddings and vector store...")
|
| 33 |
+
embeddings = get_embedding_function()
|
| 34 |
+
vector_store = get_vector_store(embeddings)
|
| 35 |
+
|
| 36 |
+
dense_vector_retriever = vector_store.as_retriever(k=DENSE_RETRIEVER_K)
|
| 37 |
+
|
| 38 |
+
print("Loading documents for BM25 retriever...")
|
| 39 |
+
ids = vector_store.get().get("ids", [])
|
| 40 |
+
|
| 41 |
+
if not ids:
|
| 42 |
+
all_documents = []
|
| 43 |
+
else:
|
| 44 |
+
all_documents = vector_store.get_by_ids(ids)
|
| 45 |
+
|
| 46 |
+
keyword_search_retriever = BM25Retriever.from_documents(
|
| 47 |
+
documents=all_documents, k=KEYWORD_RETRIEVER_K
|
| 48 |
+
) if all_documents else None
|
| 49 |
+
|
| 50 |
+
if keyword_search_retriever:
|
| 51 |
+
print("Creating ensemble retriever...")
|
| 52 |
+
ensemble_retriever = EnsembleRetriever(
|
| 53 |
+
retrievers=[dense_vector_retriever, keyword_search_retriever],
|
| 54 |
+
weights=ENSEMBLE_WEIGHTS
|
| 55 |
+
)
|
| 56 |
+
else:
|
| 57 |
+
print("Creating dense-only retriever...")
|
| 58 |
+
ensemble_retriever = dense_vector_retriever
|
| 59 |
+
|
| 60 |
+
print("Retriever setup complete.")
|
| 61 |
+
return ensemble_retriever
|
uv.lock
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
vectorStore/chroma.sqlite3
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7081f2a48908387f519e62c823ef7a9f3487b0e17a4f3a68d7ab81cd949a4a29
|
| 3 |
+
size 5992448
|