Rauhan commited on
Commit
5b8afdb
·
1 Parent(s): 5e3ddd8
.DS_Store ADDED
Binary file (6.15 kB). View file
 
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ vectorStore/chroma.sqlite3 filter=lfs diff=lfs merge=lfs -text
Dockerfile ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ COPY . /app
4
+
5
+ WORKDIR /app
6
+
7
+ USER root
8
+
9
+ RUN pip install uv
10
+
11
+ RUN uv sync
12
+
13
+ RUN chmod -R 777 /app
14
+
15
+ EXPOSE 7860
16
+
17
+ CMD ["uv", "run", "main.py"]
__pycache__/main.cpython-310.pyc ADDED
Binary file (3.58 kB). View file
 
config.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # config.py
2
+ # This file stores all the configuration variables and constants for the application.
3
+
4
+ # Embedding Model Configuration
5
+ MODEL_NAME = "jinaai/jina-embeddings-v3-base-en"
6
+ MODEL_KWARGS = {'device': 'cpu', 'trust_remote_code': True}
7
+ ENCODE_KWARGS = {'normalize_embeddings': False}
8
+
9
+ # Vector Store Configuration
10
+ VECTOR_STORE_DIRECTORY = "vectorStore"
11
+
12
+ # LLM Configuration
13
+ LLM_MODEL = "meta-llama/llama-4-maverick-17b-128e-instruct"
14
+ LLM_TEMPERATURE = 0.8
15
+
16
+ # Retriever Configuration
17
+ DENSE_RETRIEVER_K = 3
18
+ KEYWORD_RETRIEVER_K = 3
19
+ ENSEMBLE_WEIGHTS = [0.5, 0.5]
llm_chain.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # llm_chain.py
2
+ # This file configures the language model, prompt template, and the final processing chain.
3
+
4
+ from langchain_core.output_parsers import StrOutputParser
5
+ from langchain_core.runnables import RunnablePassthrough
6
+ from langchain_core.prompts import ChatPromptTemplate
7
+ from langchain_groq import ChatGroq
8
+ from config import LLM_MODEL, LLM_TEMPERATURE
9
+
10
+ def get_llm():
11
+ """Initializes and returns the ChatGroq LLM."""
12
+ return ChatGroq(
13
+ model=LLM_MODEL,
14
+ temperature=LLM_TEMPERATURE
15
+ )
16
+
17
+ def get_prompt_template():
18
+ """Creates and returns the ChatPromptTemplate for the RAG chain."""
19
+ prompt_text = """
20
+ You are an AI assistant specialized exclusively in answering questions from **three mathematics books authored by Ice Venkatesh**.
21
+ Your role is to provide **clear, precise, and complete answers strictly based on the retrieved context from those books**.
22
+
23
+ ---
24
+
25
+ ### 🔹 Core Rules
26
+
27
+ 1. **Strict Reliance on Context**: Only use the retrieved context (`context`) from Ice Venkatesh’s books. Never invent, guess, or rely on outside knowledge. If the context is insufficient, state this clearly.
28
+
29
+ 2. **No External Sources**: Do not use prior training knowledge or the internet. All answers, definitions, proofs, and examples must come **only from the provided context**.
30
+
31
+ 3. **Mathematical Rigor & Clarity**: Provide correct, step-by-step reasoning when solving problems. Keep explanations simple, precise, and mathematically flawless.
32
+
33
+ 4. **Handling Off-Topic Queries**: If the user asks something unrelated to the books or mathematics, politely decline. Example: "I can only help with questions related to the three mathematics books by Ice Venkatesh. Unfortunately, I cannot assist with topics outside that scope."
34
+
35
+ 5. **Style & Professionalism**: Always be polite, clear, and professional. Avoid filler or speculation.
36
+
37
+ ---
38
+
39
+ ### 🔹 Response Instructions
40
+
41
+ * Use the `context` to extract the necessary information.
42
+ * Answer the `query` directly, grounded **only in the retrieved book content**.
43
+ * If insufficient context is found, respond with: "The books by Ice Venkatesh do not provide enough information to answer this question."
44
+
45
+ ---
46
+
47
+ **Retrieved context:**
48
+ {context}
49
+
50
+ **User question:**
51
+ {query}
52
+ """
53
+ return ChatPromptTemplate.from_template(prompt_text)
54
+
55
+ def create_rag_chain(retriever):
56
+ """Creates and returns the full RAG chain, accepting a retriever as an argument."""
57
+ print("Creating RAG chain...")
58
+ llm = get_llm()
59
+ prompt = get_prompt_template()
60
+ output_parser = StrOutputParser()
61
+
62
+ chain = (
63
+ {"context": retriever, "query": RunnablePassthrough()}
64
+ | prompt
65
+ | llm
66
+ | output_parser
67
+ )
68
+ print("RAG chain created successfully.")
69
+ return chain
main.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # main.py
2
+ # This is the main file that runs the Sanic web server.
3
+
4
+ from sanic import Sanic, response
5
+ from retriever import get_ensemble_retriever
6
+ from llm_chain import create_rag_chain
7
+
8
+ app = Sanic("VibbaBackend")
9
+
10
+ @app.before_server_start
11
+ async def setup_model(app_instance, loop):
12
+ """
13
+ Initializes the retriever and RAG chain and attaches them
14
+ to the application context before the server starts.
15
+ """
16
+ print("Server starting up... Initializing model pipeline.")
17
+ retriever = get_ensemble_retriever()
18
+ rag_chain = create_rag_chain(retriever)
19
+ app_instance.ctx.rag_chain = rag_chain
20
+ print("Model pipeline is ready.")
21
+
22
+ @app.get("/getResponse")
23
+ async def get_response_endpoint(request):
24
+ """
25
+ Endpoint to get an answer to a question using the RAG chain.
26
+ Expects a 'question' query parameter.
27
+ """
28
+ question = request.args.get("question")
29
+ if not question:
30
+ return response.json(
31
+ {"error": "Please provide a 'question' query parameter."},
32
+ status=400
33
+ )
34
+
35
+ try:
36
+ chain = request.app.ctx.rag_chain
37
+ result = chain.invoke(question)
38
+ return response.text(result)
39
+ except Exception as e:
40
+ print(f"An error occurred during invocation: {e}")
41
+ return response.json(
42
+ {"error": "An internal error occurred while processing your request."},
43
+ status=500
44
+ )
45
+
46
+ if __name__ == "__main__":
47
+ app.run(host="0.0.0.0", port=7860)
pyproject.toml ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "bookai"
3
+ version = "0.1.0"
4
+ description = "Add your description here"
5
+ readme = "README.md"
6
+ requires-python = ">=3.12"
7
+ dependencies = [
8
+ "einops>=0.8.1",
9
+ "langchain>=0.3.27",
10
+ "langchain-chroma>=0.2.6",
11
+ "langchain-community>=0.3.29",
12
+ "langchain-core>=0.3.76",
13
+ "langchain-groq>=0.3.8",
14
+ "langchain-huggingface>=0.3.1",
15
+ "rank-bm25>=0.2.2",
16
+ "sanic>=25.3.0",
17
+ "sentence-transformers>=5.1.1",
18
+ ]
retriever.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # retriever.py
2
+ # This file handles the setup of embeddings, vector stores, and the ensemble retriever.
3
+
4
+ from langchain_huggingface import HuggingFaceEmbeddings
5
+ from langchain_chroma import Chroma
6
+ from langchain_community.retrievers import BM25Retriever
7
+ from langchain.retrievers import EnsembleRetriever
8
+ from config import (
9
+ MODEL_NAME, MODEL_KWARGS, ENCODE_KWARGS, VECTOR_STORE_DIRECTORY,
10
+ DENSE_RETRIEVER_K, KEYWORD_RETRIEVER_K, ENSEMBLE_WEIGHTS
11
+ )
12
+
13
+ def get_embedding_function():
14
+ """Initializes and returns the HuggingFace embedding model."""
15
+ return HuggingFaceEmbeddings(
16
+ model_name=MODEL_NAME,
17
+ model_kwargs=MODEL_KWARGS,
18
+ encode_kwargs=ENCODE_KWARGS
19
+ )
20
+
21
+ def get_vector_store(embedding_function):
22
+ """Initializes and returns the Chroma vector store."""
23
+ return Chroma(
24
+ embedding_function=embedding_function,
25
+ persist_directory=VECTOR_STORE_DIRECTORY
26
+ )
27
+
28
+ def get_ensemble_retriever():
29
+ """
30
+ Creates and returns an ensemble retriever combining dense and keyword-based search.
31
+ """
32
+ print("Initializing embeddings and vector store...")
33
+ embeddings = get_embedding_function()
34
+ vector_store = get_vector_store(embeddings)
35
+
36
+ dense_vector_retriever = vector_store.as_retriever(k=DENSE_RETRIEVER_K)
37
+
38
+ print("Loading documents for BM25 retriever...")
39
+ ids = vector_store.get().get("ids", [])
40
+
41
+ if not ids:
42
+ all_documents = []
43
+ else:
44
+ all_documents = vector_store.get_by_ids(ids)
45
+
46
+ keyword_search_retriever = BM25Retriever.from_documents(
47
+ documents=all_documents, k=KEYWORD_RETRIEVER_K
48
+ ) if all_documents else None
49
+
50
+ if keyword_search_retriever:
51
+ print("Creating ensemble retriever...")
52
+ ensemble_retriever = EnsembleRetriever(
53
+ retrievers=[dense_vector_retriever, keyword_search_retriever],
54
+ weights=ENSEMBLE_WEIGHTS
55
+ )
56
+ else:
57
+ print("Creating dense-only retriever...")
58
+ ensemble_retriever = dense_vector_retriever
59
+
60
+ print("Retriever setup complete.")
61
+ return ensemble_retriever
uv.lock ADDED
The diff for this file is too large to render. See raw diff
 
vectorStore/chroma.sqlite3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7081f2a48908387f519e62c823ef7a9f3487b0e17a4f3a68d7ab81cd949a4a29
3
+ size 5992448