Zubaish
commited on
Commit
·
3f76bb4
1
Parent(s):
2d4b429
Fix langchain_chroma error; pin stable deps
Browse files- Dockerfile +0 -6
- ingest.py +2 -1
- rag.py +4 -4
- requirements.txt +6 -6
Dockerfile
CHANGED
|
@@ -2,23 +2,17 @@ FROM python:3.10-slim
|
|
| 2 |
|
| 3 |
WORKDIR /app
|
| 4 |
|
| 5 |
-
# System dependencies
|
| 6 |
RUN apt-get update && apt-get install -y \
|
| 7 |
git \
|
| 8 |
&& rm -rf /var/lib/apt/lists/*
|
| 9 |
|
| 10 |
-
# Python dependencies
|
| 11 |
COPY requirements.txt .
|
| 12 |
RUN pip install --no-cache-dir -r requirements.txt
|
| 13 |
|
| 14 |
-
# Application code
|
| 15 |
COPY app.py rag.py ingest.py guardrails.py config.py ./
|
| 16 |
|
| 17 |
-
# Create empty kb_docs directory (PDFs added later via HF UI)
|
| 18 |
RUN mkdir -p kb_docs
|
| 19 |
|
| 20 |
-
# Hugging Face Spaces port
|
| 21 |
EXPOSE 7860
|
| 22 |
|
| 23 |
-
# Start FastAPI
|
| 24 |
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
|
|
|
|
| 2 |
|
| 3 |
WORKDIR /app
|
| 4 |
|
|
|
|
| 5 |
RUN apt-get update && apt-get install -y \
|
| 6 |
git \
|
| 7 |
&& rm -rf /var/lib/apt/lists/*
|
| 8 |
|
|
|
|
| 9 |
COPY requirements.txt .
|
| 10 |
RUN pip install --no-cache-dir -r requirements.txt
|
| 11 |
|
|
|
|
| 12 |
COPY app.py rag.py ingest.py guardrails.py config.py ./
|
| 13 |
|
|
|
|
| 14 |
RUN mkdir -p kb_docs
|
| 15 |
|
|
|
|
| 16 |
EXPOSE 7860
|
| 17 |
|
|
|
|
| 18 |
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
|
ingest.py
CHANGED
|
@@ -11,8 +11,8 @@ def load_and_split_docs(path="kb_docs"):
|
|
| 11 |
glob="**/*.pdf",
|
| 12 |
loader_cls=PyPDFLoader
|
| 13 |
)
|
| 14 |
-
docs = loader.load()
|
| 15 |
|
|
|
|
| 16 |
if not docs:
|
| 17 |
return []
|
| 18 |
|
|
@@ -20,4 +20,5 @@ def load_and_split_docs(path="kb_docs"):
|
|
| 20 |
chunk_size=800,
|
| 21 |
chunk_overlap=100
|
| 22 |
)
|
|
|
|
| 23 |
return splitter.split_documents(docs)
|
|
|
|
| 11 |
glob="**/*.pdf",
|
| 12 |
loader_cls=PyPDFLoader
|
| 13 |
)
|
|
|
|
| 14 |
|
| 15 |
+
docs = loader.load()
|
| 16 |
if not docs:
|
| 17 |
return []
|
| 18 |
|
|
|
|
| 20 |
chunk_size=800,
|
| 21 |
chunk_overlap=100
|
| 22 |
)
|
| 23 |
+
|
| 24 |
return splitter.split_documents(docs)
|
rag.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
|
|
| 1 |
from langchain_huggingface import HuggingFaceEmbeddings, ChatHuggingFace
|
| 2 |
-
from langchain_chroma import Chroma
|
| 3 |
from langchain.schema import SystemMessage, HumanMessage
|
| 4 |
|
| 5 |
from ingest import load_and_split_docs
|
|
@@ -14,7 +14,7 @@ embeddings = HuggingFaceEmbeddings(
|
|
| 14 |
|
| 15 |
if documents:
|
| 16 |
vectorstore = Chroma.from_documents(
|
| 17 |
-
documents,
|
| 18 |
embedding=embeddings
|
| 19 |
)
|
| 20 |
retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
|
|
@@ -32,14 +32,14 @@ def ask_rag_with_status(question: str):
|
|
| 32 |
if not retriever:
|
| 33 |
return {
|
| 34 |
"status": ["⚠️ No documents uploaded yet"],
|
| 35 |
-
"answer": "Please upload PDF files to the kb_docs folder."
|
| 36 |
}
|
| 37 |
|
| 38 |
docs = retriever.get_relevant_documents(question)
|
| 39 |
context = "\n\n".join(d.page_content for d in docs)
|
| 40 |
|
| 41 |
messages = [
|
| 42 |
-
SystemMessage(content="Answer using the provided context."),
|
| 43 |
HumanMessage(content=f"Context:\n{context}\n\nQuestion: {question}")
|
| 44 |
]
|
| 45 |
|
|
|
|
| 1 |
+
from langchain_community.vectorstores import Chroma
|
| 2 |
from langchain_huggingface import HuggingFaceEmbeddings, ChatHuggingFace
|
|
|
|
| 3 |
from langchain.schema import SystemMessage, HumanMessage
|
| 4 |
|
| 5 |
from ingest import load_and_split_docs
|
|
|
|
| 14 |
|
| 15 |
if documents:
|
| 16 |
vectorstore = Chroma.from_documents(
|
| 17 |
+
documents=documents,
|
| 18 |
embedding=embeddings
|
| 19 |
)
|
| 20 |
retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
|
|
|
|
| 32 |
if not retriever:
|
| 33 |
return {
|
| 34 |
"status": ["⚠️ No documents uploaded yet"],
|
| 35 |
+
"answer": "Please upload PDF files to the kb_docs folder and restart the Space."
|
| 36 |
}
|
| 37 |
|
| 38 |
docs = retriever.get_relevant_documents(question)
|
| 39 |
context = "\n\n".join(d.page_content for d in docs)
|
| 40 |
|
| 41 |
messages = [
|
| 42 |
+
SystemMessage(content="Answer using only the provided context."),
|
| 43 |
HumanMessage(content=f"Context:\n{context}\n\nQuestion: {question}")
|
| 44 |
]
|
| 45 |
|
requirements.txt
CHANGED
|
@@ -3,12 +3,12 @@ uvicorn
|
|
| 3 |
pydantic
|
| 4 |
python-dotenv
|
| 5 |
|
| 6 |
-
langchain
|
| 7 |
-
langchain-community
|
| 8 |
-
langchain-
|
| 9 |
-
langchain-huggingface
|
| 10 |
|
| 11 |
-
chromadb
|
| 12 |
sentence-transformers
|
|
|
|
| 13 |
pypdf
|
| 14 |
-
huggingface_hub
|
|
|
|
| 3 |
pydantic
|
| 4 |
python-dotenv
|
| 5 |
|
| 6 |
+
langchain==0.2.17
|
| 7 |
+
langchain-community==0.2.17
|
| 8 |
+
langchain-huggingface==0.1.0
|
|
|
|
| 9 |
|
| 10 |
+
chromadb==0.5.5
|
| 11 |
sentence-transformers
|
| 12 |
+
|
| 13 |
pypdf
|
| 14 |
+
huggingface_hub>=0.33.4,<1.0.0
|