tmt3103 commited on
Commit
8d802f2
·
1 Parent(s): 26e329e

update model

Browse files
Files changed (6) hide show
  1. .gitignore +3 -1
  2. Dockerfile +8 -10
  3. app.py +2 -2
  4. requirements.txt +1 -1
  5. src/helper.py +1 -1
  6. store_index.py +39 -0
.gitignore CHANGED
@@ -210,4 +210,6 @@ __marimo__/
210
  # Virtual environments created by tools like venv, virtualenv, or conda.
211
  # These directories contain the Python interpreter and installed packages, which are not needed
212
  # in version control.
213
- medchatbot/
 
 
 
210
  # Virtual environments created by tools like venv, virtualenv, or conda.
211
  # These directories contain the Python interpreter and installed packages, which are not needed
212
  # in version control.
213
+ medchatbot2/
214
+ research/
215
+ Data/
Dockerfile CHANGED
@@ -4,25 +4,23 @@ FROM python:3.9-slim
4
  WORKDIR /app
5
 
6
  # Copy dependencies
7
- COPY requirements.txt setup.py ./
8
 
9
- # dependencies
10
  RUN pip install --no-cache-dir -r requirements.txt
11
 
12
- # cache directory for Hugging Face
13
  ENV HF_HOME=/app/.cache
14
-
15
- #
16
  RUN mkdir -p /app/.cache && chmod -R 777 /app/.cache
17
 
18
- # Pre-download model (sentence-transformers/all-MiniLM-L6-v2)
19
  RUN python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')"
20
 
21
- #
22
  COPY . .
23
 
24
- #
25
- EXPOSE 7860
26
 
27
- # Run app
28
  CMD ["python", "app.py"]
 
4
  WORKDIR /app
5
 
6
  # Copy dependencies
7
+ COPY requirements.txt ./
8
 
9
+ # Install dependencies
10
  RUN pip install --no-cache-dir -r requirements.txt
11
 
12
+ # Hugging Face cache directory
13
  ENV HF_HOME=/app/.cache
 
 
14
  RUN mkdir -p /app/.cache && chmod -R 777 /app/.cache
15
 
16
+ # Pre-download model to cache
17
  RUN python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')"
18
 
19
+ # Copy rest of the project
20
  COPY . .
21
 
22
+ # Expose the port expected by HF Spaces
23
+ EXPOSE 8080
24
 
25
+ # Run app
26
  CMD ["python", "app.py"]
app.py CHANGED
@@ -33,7 +33,7 @@ docsearch = PineconeVectorStore.from_existing_index(
33
  retriever = docsearch.as_retriever(search_type="similarity", search_kwargs={"k":3})
34
 
35
  llm = ChatGoogleGenerativeAI(
36
- model="gemini-2.5-pro",
37
  google_api_key=GEMINI_API_KEY,
38
  temperature=0.4,
39
  max_output_tokens=2048
@@ -63,4 +63,4 @@ def chat():
63
  return str(response["answer"])
64
 
65
  if __name__ == '__main__':
66
- app.run(host="0.0.0.0", port= 7860, debug= True)
 
33
  retriever = docsearch.as_retriever(search_type="similarity", search_kwargs={"k":3})
34
 
35
  llm = ChatGoogleGenerativeAI(
36
+ model="gemini-2.0-flash-lite",
37
  google_api_key=GEMINI_API_KEY,
38
  temperature=0.4,
39
  max_output_tokens=2048
 
63
  return str(response["answer"])
64
 
65
  if __name__ == '__main__':
66
+ app.run(host="0.0.0.0", port= 8080, debug= True)
requirements.txt CHANGED
@@ -7,7 +7,7 @@ pinecone[grpc]
7
  langchain-pinecone
8
  langchain_community
9
  langchain_openai
10
- langchain-huggingface
11
  langchain_experimental
12
  langchain_google_genai
 
13
  -e .
 
7
  langchain-pinecone
8
  langchain_community
9
  langchain_openai
 
10
  langchain_experimental
11
  langchain_google_genai
12
+ langchain-huggingface
13
  -e .
src/helper.py CHANGED
@@ -1,4 +1,4 @@
1
- from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
2
  from langchain.text_splitter import RecursiveCharacterTextSplitter
3
  from langchain_huggingface import HuggingFaceEmbeddings
4
 
 
1
+ from langchain_community.document_loaders import DirectoryLoader, PyPDFLoader
2
  from langchain.text_splitter import RecursiveCharacterTextSplitter
3
  from langchain_huggingface import HuggingFaceEmbeddings
4
 
store_index.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from src.helper import load_pdf_file, text_split, download_hugging_face_embeddings
2
+ from pinecone.grpc import PineconeGRPC as Pinecone
3
+ from pinecone import ServerlessSpec
4
+ from langchain_pinecone import PineconeVectorStore
5
+ from dotenv import load_dotenv
6
+ import os
7
+
8
+ load_dotenv()
9
+
10
+ PINECONE_API_KEY=os.environ.get('PINECONE_API_KEY')
11
+ os.environ["PINECONE_API_KEY"] = PINECONE_API_KEY
12
+
13
+ extracted_data=load_pdf_file(data='Data/')
14
+ text_chunks=text_split(extracted_data)
15
+ embeddings = download_hugging_face_embeddings()
16
+
17
+ pc = Pinecone(api_key=PINECONE_API_KEY)
18
+
19
+ index_name = "medchatbot"
20
+
21
+ pc.create_index(
22
+ name=index_name,
23
+ dimension=384,
24
+ metric="cosine",
25
+ spec=ServerlessSpec(
26
+ cloud="aws",
27
+ region="us-east-1"
28
+ )
29
+ )
30
+
31
+ # Embed each chunk and upsert the embeddings into your Pinecone index.
32
+ docsearch = PineconeVectorStore.from_documents(
33
+ documents=text_chunks,
34
+ index_name=index_name,
35
+ embedding=embeddings,
36
+ )
37
+
38
+
39
+