deepak-cse-jha commited on
Commit
e66f178
·
0 Parent(s):

Clean deploy version for Hugging Face Space

Browse files
.gitignore ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ .env
2
+ data/
3
+ vectorstore/
4
+ venv/
Dockerfile ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ WORKDIR /app
4
+
5
+ COPY requirements.txt .
6
+
7
+ RUN pip install --no-cache-dir -r requirements.txt
8
+
9
+ COPY . .
10
+
11
+ EXPOSE 7860
12
+
13
+ CMD ["streamlit", "run", "app.py", "--server.port=7860", "--server.address=0.0.0.0"]
README.md ADDED
File without changes
app.py ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import streamlit as st
3
+ from dotenv import load_dotenv
4
+
5
+ from langchain_huggingface import HuggingFaceEmbeddings
6
+ from langchain_community.vectorstores import FAISS
7
+ from langchain.chains import RetrievalQA
8
+ from langchain_core.prompts import PromptTemplate
9
+ from langchain_groq import ChatGroq
10
+
11
+ load_dotenv()
12
+
13
+ DB_FAISS_PATH = "vectorstore/db_faiss"
14
+
15
+ @st.cache_resource
16
+ def get_vectorstore():
17
+ if not os.path.exists(DB_FAISS_PATH):
18
+ st.error("FAISS vectorstore not found")
19
+ st.stop()
20
+
21
+ embeddings = HuggingFaceEmbeddings(
22
+ model_name="sentence-transformers/all-MiniLM-L6-v2"
23
+ )
24
+
25
+ return FAISS.load_local(
26
+ DB_FAISS_PATH,
27
+ embeddings,
28
+ allow_dangerous_deserialization=True
29
+ )
30
+
31
+
32
+ def get_prompt():
33
+ return PromptTemplate(
34
+ template="""
35
+ Use the information in the context to answer the question.
36
+ If you do not know the answer, say you do not know.
37
+ Do not add anything outside the context.
38
+
39
+ Context:
40
+ {context}
41
+
42
+ Question:
43
+ {question}
44
+
45
+ Answer directly.
46
+ """,
47
+ input_variables=["context", "question"],
48
+ )
49
+
50
+
51
+ def main():
52
+ st.set_page_config(page_title="Medical Chatbot")
53
+ st.title("Medical Chatbot")
54
+
55
+ groq_api_key = os.getenv("GROQ_API_KEY")
56
+ if not groq_api_key:
57
+ st.error("GROQ_API_KEY not set")
58
+ st.stop()
59
+
60
+ if "messages" not in st.session_state:
61
+ st.session_state.messages = []
62
+
63
+ for msg in st.session_state.messages:
64
+ st.chat_message(msg["role"]).markdown(msg["content"])
65
+
66
+ user_input = st.chat_input("Ask your question")
67
+
68
+ if user_input:
69
+ st.chat_message("user").markdown(user_input)
70
+ st.session_state.messages.append(
71
+ {"role": "user", "content": user_input}
72
+ )
73
+
74
+ vectorstore = get_vectorstore()
75
+
76
+ qa_chain = RetrievalQA.from_chain_type(
77
+ llm=ChatGroq(
78
+ model_name="meta-llama/llama-4-maverick-17b-128e-instruct",
79
+ temperature=0.0,
80
+ groq_api_key=groq_api_key,
81
+ ),
82
+ chain_type="stuff",
83
+ retriever=vectorstore.as_retriever(search_kwargs={"k": 3}),
84
+ return_source_documents=True,
85
+ chain_type_kwargs={"prompt": get_prompt()},
86
+ )
87
+
88
+ response = qa_chain.invoke({"query": user_input})
89
+
90
+ answer = response["result"]
91
+ sources = response["source_documents"]
92
+
93
+ st.chat_message("assistant").markdown(answer)
94
+ st.session_state.messages.append(
95
+ {"role": "assistant", "content": answer}
96
+ )
97
+
98
+ st.chat_message("assistant").markdown(
99
+ "Source Docs:\n\n" + str(sources)
100
+ )
101
+ st.session_state.messages.append(
102
+ {"role": "assistant", "content": str(sources)}
103
+ )
104
+
105
+
106
+ if __name__ == "__main__":
107
+ main()
requirements.txt ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ -i https://pypi.org/simple
2
+ aiohappyeyeballs==2.6.1; python_version >= '3.9'
3
+ aiohttp==3.12.14; python_version >= '3.9'
4
+ aiosignal==1.4.0; python_version >= '3.9'
5
+ altair==5.5.0; python_version >= '3.9'
6
+ annotated-types==0.7.0; python_version >= '3.8'
7
+ anyio==4.9.0; python_version >= '3.9'
8
+ attrs==25.3.0; python_version >= '3.8'
9
+ blinker==1.9.0; python_version >= '3.9'
10
+ cachetools==6.1.0; python_version >= '3.9'
11
+ certifi==2025.7.9; python_version >= '3.7'
12
+ charset-normalizer==3.4.2; python_version >= '3.7'
13
+ click==8.2.1; python_version >= '3.10'
14
+ dataclasses-json==0.6.7; python_version >= '3.7' and python_version < '4.0'
15
+ distro==1.9.0; python_version >= '3.6'
16
+ faiss-cpu==1.11.0; python_version >= '3.9'
17
+ filelock==3.18.0; python_version >= '3.9'
18
+ frozenlist==1.7.0; python_version >= '3.9'
19
+ fsspec==2025.5.1; python_version >= '3.9'
20
+ gitdb==4.0.12; python_version >= '3.7'
21
+ gitpython==3.1.44; python_version >= '3.7'
22
+ groq==0.29.0; python_version >= '3.8'
23
+ h11==0.16.0; python_version >= '3.8'
24
+ hf-xet==1.1.5; platform_machine == 'x86_64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'aarch64'
25
+ httpcore==1.0.9; python_version >= '3.8'
26
+ httpx==0.28.1; python_version >= '3.8'
27
+ httpx-sse==0.4.1; python_version >= '3.9'
28
+ huggingface-hub==0.33.2; python_full_version >= '3.8.0'
29
+ idna==3.10; python_version >= '3.6'
30
+ jinja2==3.1.6; python_version >= '3.7'
31
+ joblib==1.5.1; python_version >= '3.9'
32
+ jsonpatch==1.33; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5, 3.6'
33
+ jsonpointer==3.0.0; python_version >= '3.7'
34
+ jsonschema==4.24.0; python_version >= '3.9'
35
+ jsonschema-specifications==2025.4.1; python_version >= '3.9'
36
+ langchain==0.3.26; python_version >= '3.9'
37
+ langchain-community==0.3.27; python_version >= '3.9'
38
+ langchain-core==0.3.68; python_version >= '3.9'
39
+ langchain-groq==0.3.5; python_version >= '3.9'
40
+ langchain-huggingface==0.3.0; python_version >= '3.9'
41
+ langchain-text-splitters==0.3.8; python_version >= '3.9' and python_version < '4.0'
42
+ langsmith==0.4.4; python_version >= '3.9'
43
+ markupsafe==3.0.2; python_version >= '3.9'
44
+ marshmallow==3.26.1; python_version >= '3.9'
45
+ mpmath==1.3.0
46
+ multidict==6.6.3; python_version >= '3.9'
47
+ mypy-extensions==1.1.0; python_version >= '3.8'
48
+ narwhals==1.46.0; python_version >= '3.9'
49
+ networkx==3.5; python_version >= '3.11'
50
+ numpy==2.3.1; python_version < '3.13'
51
+ orjson==3.10.18; platform_python_implementation != 'PyPy'
52
+ packaging==24.2; python_version >= '3.8'
53
+ pandas==2.3.1; python_version >= '3.9'
54
+ pillow==11.3.0; python_version >= '3.9'
55
+ propcache==0.3.2; python_version >= '3.9'
56
+ protobuf==6.31.1; python_version >= '3.9'
57
+ pyarrow==20.0.0; python_version >= '3.9'
58
+ pydantic==2.11.7; python_version >= '3.9'
59
+ pydantic-core==2.33.2; python_version >= '3.9'
60
+ pydantic-settings==2.10.1; python_version >= '3.9'
61
+ pydeck==0.9.1; python_version >= '3.8'
62
+ pypdf==5.7.0; python_version >= '3.8'
63
+ python-dateutil==2.9.0.post0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
64
+ python-dotenv==1.1.1; python_version >= '3.9'
65
+ pytz==2025.2
66
+ pyyaml==6.0.2; python_version >= '3.8'
67
+ referencing==0.36.2; python_version >= '3.9'
68
+ regex==2024.11.6; python_version >= '3.8'
69
+ requests==2.32.4; python_version >= '3.8'
70
+ requests-toolbelt==1.0.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
71
+ rpds-py==0.26.0; python_version >= '3.9'
72
+ safetensors==0.5.3; python_version >= '3.7'
73
+ scikit-learn==1.7.0; python_version >= '3.10'
74
+ scipy==1.16.0; python_version >= '3.11'
75
+ sentence-transformers==5.0.0; python_version >= '3.9'
76
+ setuptools==80.9.0; python_version >= '3.12'
77
+ six==1.17.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
78
+ smmap==5.0.2; python_version >= '3.7'
79
+ sniffio==1.3.1; python_version >= '3.7'
80
+ sqlalchemy==2.0.41; python_version >= '3.7'
81
+ streamlit==1.46.1; python_version >= '3.9' and python_full_version != '3.9.7'
82
+ sympy==1.14.0; python_version >= '3.9'
83
+ tenacity==9.1.2; python_version >= '3.9'
84
+ threadpoolctl==3.6.0; python_version >= '3.9'
85
+ tokenizers==0.21.2; python_version >= '3.9'
86
+ toml==0.10.2; python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'
87
+ torch==2.7.1; python_full_version >= '3.9.0'
88
+ tornado==6.5.1; python_version >= '3.9'
89
+ tqdm==4.67.1; python_version >= '3.7'
90
+ transformers==4.53.1; python_full_version >= '3.9.0'
91
+ typing-extensions==4.14.1; python_version >= '3.9'
92
+ typing-inspect==0.9.0
93
+ typing-inspection==0.4.1; python_version >= '3.9'
94
+ tzdata==2025.2; python_version >= '2'
95
+ urllib3==2.5.0; python_version >= '3.9'
96
+ yarl==1.20.1; python_version >= '3.9'
97
+ zstandard==0.23.0; python_version >= '3.8'
utils/connect_memory_with_llm.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ from langchain_huggingface import HuggingFaceEndpoint
4
+ from langchain_core.prompts import PromptTemplate
5
+ from langchain.chains import RetrievalQA
6
+ from langchain_huggingface import HuggingFaceEmbeddings
7
+ from langchain_community.vectorstores import FAISS
8
+
9
+ from dotenv import load_dotenv, find_dotenv
10
+ load_dotenv(find_dotenv())
11
+
12
+
13
+ HF_TOKEN=os.environ.get("HF_TOKEN")
14
+ HUGGINGFACE_REPO_ID="mistralai/Mistral-7B-Instruct-v0.3"
15
+
16
+ def load_llm(huggingface_repo_id):
17
+ llm=HuggingFaceEndpoint(
18
+ repo_id=huggingface_repo_id,
19
+ temperature=0.5,
20
+ model_kwargs={"token":HF_TOKEN,
21
+ "max_length":"512"}
22
+ )
23
+ return llm
24
+
25
+
26
+ CUSTOM_PROMPT_TEMPLATE = """
27
+ Use the pieces of information provided in the context to answer user's question.
28
+ If you dont know the answer, just say that you dont know, dont try to make up an answer.
29
+ Dont provide anything out of the given context
30
+
31
+ Context: {context}
32
+ Question: {question}
33
+
34
+ Start the answer directly. No small talk please.
35
+ """
36
+
37
+ def set_custom_prompt(custom_prompt_template):
38
+ prompt=PromptTemplate(template=custom_prompt_template, input_variables=["context", "question"])
39
+ return prompt
40
+
41
+ DB_FAISS_PATH= "../vectorstore/db_faiss"
42
+ embedding_model=HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
43
+ db=FAISS.load_local(DB_FAISS_PATH, embedding_model, allow_dangerous_deserialization=True)
44
+
45
+ qa_chain=RetrievalQA.from_chain_type(
46
+ llm=load_llm(HUGGINGFACE_REPO_ID),
47
+ chain_type="stuff",
48
+ retriever=db.as_retriever(search_kwargs={'k':3}),
49
+ return_source_documents=True,
50
+ chain_type_kwargs={'prompt':set_custom_prompt(CUSTOM_PROMPT_TEMPLATE)}
51
+ )
52
+
53
+ user_query=input("Write Query Here: ")
54
+ response=qa_chain.invoke({'query': user_query})
55
+ print("RESULT: ", response["result"])
56
+ print("SOURCE DOCUMENTS: ", response["source_documents"])
utils/create_memory_for_llm.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
2
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
3
+ from langchain_huggingface import HuggingFaceEmbeddings
4
+ from langchain_community.vectorstores import FAISS
5
+
6
+ from dotenv import load_dotenv, find_dotenv
7
+ load_dotenv(find_dotenv())
8
+
9
+
10
+ DATA_PATH= "../data/"
11
+ def load_pdf_files(data):
12
+ loader = DirectoryLoader(data,
13
+ glob='*.pdf',
14
+ loader_cls=PyPDFLoader)
15
+
16
+ documents=loader.load()
17
+ return documents
18
+
19
+ documents=load_pdf_files(data=DATA_PATH)
20
+
21
+
22
+ def create_chunks(extracted_data):
23
+ text_splitter=RecursiveCharacterTextSplitter(chunk_size=500,
24
+ chunk_overlap=50)
25
+ text_chunks=text_splitter.split_documents(extracted_data)
26
+ return text_chunks
27
+
28
+ text_chunks=create_chunks(extracted_data=documents)
29
+
30
+
31
+ def get_embedding_model():
32
+ embedding_model=HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
33
+ return embedding_model
34
+
35
+ embedding_model=get_embedding_model()
36
+
37
+ DB_FAISS_PATH= "../vectorstore/db_faiss"
38
+ db=FAISS.from_documents(text_chunks, embedding_model)
39
+ db.save_local(DB_FAISS_PATH)