Spaces:
Sleeping
Sleeping
update model
Browse files- .gitignore +3 -1
- Dockerfile +8 -10
- app.py +2 -2
- requirements.txt +1 -1
- src/helper.py +1 -1
- store_index.py +39 -0
.gitignore
CHANGED
|
@@ -210,4 +210,6 @@ __marimo__/
|
|
| 210 |
# Virtual environments created by tools like venv, virtualenv, or conda.
|
| 211 |
# These directories contain the Python interpreter and installed packages, which are not needed
|
| 212 |
# in version control.
|
| 213 |
-
|
|
|
|
|
|
|
|
|
| 210 |
# Virtual environments created by tools like venv, virtualenv, or conda.
|
| 211 |
# These directories contain the Python interpreter and installed packages, which are not needed
|
| 212 |
# in version control.
|
| 213 |
+
medchatbot2/
|
| 214 |
+
research/
|
| 215 |
+
Data/
|
Dockerfile
CHANGED
|
@@ -4,25 +4,23 @@ FROM python:3.9-slim
|
|
| 4 |
WORKDIR /app
|
| 5 |
|
| 6 |
# Copy dependencies
|
| 7 |
-
COPY requirements.txt
|
| 8 |
|
| 9 |
-
#
|
| 10 |
RUN pip install --no-cache-dir -r requirements.txt
|
| 11 |
|
| 12 |
-
# cache directory
|
| 13 |
ENV HF_HOME=/app/.cache
|
| 14 |
-
|
| 15 |
-
#
|
| 16 |
RUN mkdir -p /app/.cache && chmod -R 777 /app/.cache
|
| 17 |
|
| 18 |
-
# Pre-download model
|
| 19 |
RUN python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')"
|
| 20 |
|
| 21 |
-
#
|
| 22 |
COPY . .
|
| 23 |
|
| 24 |
-
#
|
| 25 |
-
EXPOSE
|
| 26 |
|
| 27 |
-
# Run app
|
| 28 |
CMD ["python", "app.py"]
|
|
|
|
| 4 |
WORKDIR /app
|
| 5 |
|
| 6 |
# Copy dependencies
|
| 7 |
+
COPY requirements.txt ./
|
| 8 |
|
| 9 |
+
# Install dependencies
|
| 10 |
RUN pip install --no-cache-dir -r requirements.txt
|
| 11 |
|
| 12 |
+
# Hugging Face cache directory
|
| 13 |
ENV HF_HOME=/app/.cache
|
|
|
|
|
|
|
| 14 |
RUN mkdir -p /app/.cache && chmod -R 777 /app/.cache
|
| 15 |
|
| 16 |
+
# Pre-download model to cache
|
| 17 |
RUN python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')"
|
| 18 |
|
| 19 |
+
# Copy rest of the project
|
| 20 |
COPY . .
|
| 21 |
|
| 22 |
+
# Expose the port expected by HF Spaces
|
| 23 |
+
EXPOSE 8080
|
| 24 |
|
| 25 |
+
# Run app
|
| 26 |
CMD ["python", "app.py"]
|
app.py
CHANGED
|
@@ -33,7 +33,7 @@ docsearch = PineconeVectorStore.from_existing_index(
|
|
| 33 |
retriever = docsearch.as_retriever(search_type="similarity", search_kwargs={"k":3})
|
| 34 |
|
| 35 |
llm = ChatGoogleGenerativeAI(
|
| 36 |
-
model="gemini-2.
|
| 37 |
google_api_key=GEMINI_API_KEY,
|
| 38 |
temperature=0.4,
|
| 39 |
max_output_tokens=2048
|
|
@@ -63,4 +63,4 @@ def chat():
|
|
| 63 |
return str(response["answer"])
|
| 64 |
|
| 65 |
if __name__ == '__main__':
|
| 66 |
-
app.run(host="0.0.0.0", port=
|
|
|
|
| 33 |
retriever = docsearch.as_retriever(search_type="similarity", search_kwargs={"k":3})
|
| 34 |
|
| 35 |
llm = ChatGoogleGenerativeAI(
|
| 36 |
+
model="gemini-2.0-flash-lite",
|
| 37 |
google_api_key=GEMINI_API_KEY,
|
| 38 |
temperature=0.4,
|
| 39 |
max_output_tokens=2048
|
|
|
|
| 63 |
return str(response["answer"])
|
| 64 |
|
| 65 |
if __name__ == '__main__':
|
| 66 |
+
app.run(host="0.0.0.0", port= 8080, debug= True)
|
requirements.txt
CHANGED
|
@@ -7,7 +7,7 @@ pinecone[grpc]
|
|
| 7 |
langchain-pinecone
|
| 8 |
langchain_community
|
| 9 |
langchain_openai
|
| 10 |
-
langchain-huggingface
|
| 11 |
langchain_experimental
|
| 12 |
langchain_google_genai
|
|
|
|
| 13 |
-e .
|
|
|
|
| 7 |
langchain-pinecone
|
| 8 |
langchain_community
|
| 9 |
langchain_openai
|
|
|
|
| 10 |
langchain_experimental
|
| 11 |
langchain_google_genai
|
| 12 |
+
langchain-huggingface
|
| 13 |
-e .
|
src/helper.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
from langchain_community.document_loaders import
|
| 2 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
| 3 |
from langchain_huggingface import HuggingFaceEmbeddings
|
| 4 |
|
|
|
|
| 1 |
+
from langchain_community.document_loaders import DirectoryLoader, PyPDFLoader
|
| 2 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
| 3 |
from langchain_huggingface import HuggingFaceEmbeddings
|
| 4 |
|
store_index.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from src.helper import load_pdf_file, text_split, download_hugging_face_embeddings
|
| 2 |
+
from pinecone.grpc import PineconeGRPC as Pinecone
|
| 3 |
+
from pinecone import ServerlessSpec
|
| 4 |
+
from langchain_pinecone import PineconeVectorStore
|
| 5 |
+
from dotenv import load_dotenv
|
| 6 |
+
import os
|
| 7 |
+
|
| 8 |
+
load_dotenv()
|
| 9 |
+
|
| 10 |
+
PINECONE_API_KEY=os.environ.get('PINECONE_API_KEY')
|
| 11 |
+
os.environ["PINECONE_API_KEY"] = PINECONE_API_KEY
|
| 12 |
+
|
| 13 |
+
extracted_data=load_pdf_file(data='Data/')
|
| 14 |
+
text_chunks=text_split(extracted_data)
|
| 15 |
+
embeddings = download_hugging_face_embeddings()
|
| 16 |
+
|
| 17 |
+
pc = Pinecone(api_key=PINECONE_API_KEY)
|
| 18 |
+
|
| 19 |
+
index_name = "medchatbot"
|
| 20 |
+
|
| 21 |
+
pc.create_index(
|
| 22 |
+
name=index_name,
|
| 23 |
+
dimension=384,
|
| 24 |
+
metric="cosine",
|
| 25 |
+
spec=ServerlessSpec(
|
| 26 |
+
cloud="aws",
|
| 27 |
+
region="us-east-1"
|
| 28 |
+
)
|
| 29 |
+
)
|
| 30 |
+
|
| 31 |
+
# Embed each chunk and upsert the embeddings into your Pinecone index.
|
| 32 |
+
docsearch = PineconeVectorStore.from_documents(
|
| 33 |
+
documents=text_chunks,
|
| 34 |
+
index_name=index_name,
|
| 35 |
+
embedding=embeddings,
|
| 36 |
+
)
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
|