Spaces:

tmt3103
/

MedChatBot

Sleeping

tmt3103 commited on Aug 19, 2025

Commit

8d802f2

1 Parent(s): 26e329e

update model

Files changed (6) hide show

.gitignore CHANGED Viewed

@@ -210,4 +210,6 @@ __marimo__/
 #  Virtual environments created by tools like venv, virtualenv, or conda.
 #  These directories contain the Python interpreter and installed packages, which are not needed
 #  in version control.
-medchatbot/

 #  Virtual environments created by tools like venv, virtualenv, or conda.
 #  These directories contain the Python interpreter and installed packages, which are not needed
 #  in version control.
+medchatbot2/
+research/
+Data/

Dockerfile CHANGED Viewed

@@ -4,25 +4,23 @@ FROM python:3.9-slim
 WORKDIR /app
 # Copy dependencies
-COPY requirements.txt setup.py ./
-#  dependencies
 RUN pip install --no-cache-dir -r requirements.txt
-# cache directory for Hugging Face
 ENV HF_HOME=/app/.cache
-#
 RUN mkdir -p /app/.cache && chmod -R 777 /app/.cache
-# Pre-download model (sentence-transformers/all-MiniLM-L6-v2)
 RUN python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')"
-#
 COPY . .
-#
-EXPOSE 7860
-# Run app
 CMD ["python", "app.py"]

 WORKDIR /app
 # Copy dependencies
+COPY requirements.txt ./
+# Install dependencies
 RUN pip install --no-cache-dir -r requirements.txt
+# Hugging Face cache directory
 ENV HF_HOME=/app/.cache
 RUN mkdir -p /app/.cache && chmod -R 777 /app/.cache
+# Pre-download model to cache
 RUN python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')"
+# Copy rest of the project
 COPY . .
+# Expose the port expected by HF Spaces
+EXPOSE 8080
+# Run app
 CMD ["python", "app.py"]

app.py CHANGED Viewed

@@ -33,7 +33,7 @@ docsearch = PineconeVectorStore.from_existing_index(
 retriever = docsearch.as_retriever(search_type="similarity", search_kwargs={"k":3})
 llm = ChatGoogleGenerativeAI(
-    model="gemini-2.5-pro",
     google_api_key=GEMINI_API_KEY,
     temperature=0.4,
     max_output_tokens=2048
@@ -63,4 +63,4 @@ def chat():
     return str(response["answer"])
 if __name__ == '__main__':
-    app.run(host="0.0.0.0", port= 7860, debug= True)

 retriever = docsearch.as_retriever(search_type="similarity", search_kwargs={"k":3})
 llm = ChatGoogleGenerativeAI(
+    model="gemini-2.0-flash-lite",
     google_api_key=GEMINI_API_KEY,
     temperature=0.4,
     max_output_tokens=2048
     return str(response["answer"])
 if __name__ == '__main__':
+    app.run(host="0.0.0.0", port= 8080, debug= True)

requirements.txt CHANGED Viewed

@@ -7,7 +7,7 @@ pinecone[grpc]
 langchain-pinecone
 langchain_community
 langchain_openai
-langchain-huggingface
 langchain_experimental
 langchain_google_genai
 -e .

 langchain-pinecone
 langchain_community
 langchain_openai
 langchain_experimental
 langchain_google_genai
+langchain-huggingface
 -e .

src/helper.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_huggingface import HuggingFaceEmbeddings

+from langchain_community.document_loaders import DirectoryLoader, PyPDFLoader
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_huggingface import HuggingFaceEmbeddings

store_index.py ADDED Viewed

+from src.helper import load_pdf_file, text_split, download_hugging_face_embeddings
+from pinecone.grpc import PineconeGRPC as Pinecone
+from pinecone import ServerlessSpec
+from langchain_pinecone import PineconeVectorStore
+from dotenv import load_dotenv
+import os
+load_dotenv()
+PINECONE_API_KEY=os.environ.get('PINECONE_API_KEY')
+os.environ["PINECONE_API_KEY"] = PINECONE_API_KEY
+extracted_data=load_pdf_file(data='Data/')
+text_chunks=text_split(extracted_data)
+embeddings = download_hugging_face_embeddings()
+pc = Pinecone(api_key=PINECONE_API_KEY)
+index_name = "medchatbot"
+pc.create_index(
+    name=index_name,
+    dimension=384,
+    metric="cosine",
+    spec=ServerlessSpec(
+        cloud="aws",
+        region="us-east-1"
+    )
+)
+# Embed each chunk and upsert the embeddings into your Pinecone index.
+docsearch = PineconeVectorStore.from_documents(
+    documents=text_chunks,
+    index_name=index_name,
+    embedding=embeddings,
+)