Spaces:

deepak-cse-jha
/

Medical-Chatbot

Running

App Files Files Community

deepak-cse-jha commited on Jan 26

Commit

e66f178

0 Parent(s):

Clean deploy version for Hugging Face Space

Browse files

Files changed (7) hide show

.gitignore +4 -0
Dockerfile +13 -0
README.md +0 -0
app.py +107 -0
requirements.txt +97 -0
utils/connect_memory_with_llm.py +56 -0
utils/create_memory_for_llm.py +39 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,4 @@

+.env
+data/
+vectorstore/
+venv/

Dockerfile ADDED Viewed

	@@ -0,0 +1,13 @@

+FROM python:3.10-slim
+WORKDIR /app
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+COPY . .
+EXPOSE 7860
+CMD ["streamlit", "run", "app.py", "--server.port=7860", "--server.address=0.0.0.0"]

README.md ADDED Viewed

File without changes

app.py ADDED Viewed

	@@ -0,0 +1,107 @@

+import os
+import streamlit as st
+from dotenv import load_dotenv
+from langchain_huggingface import HuggingFaceEmbeddings
+from langchain_community.vectorstores import FAISS
+from langchain.chains import RetrievalQA
+from langchain_core.prompts import PromptTemplate
+from langchain_groq import ChatGroq
+load_dotenv()
+DB_FAISS_PATH = "vectorstore/db_faiss"
+@st.cache_resource
+def get_vectorstore():
+    if not os.path.exists(DB_FAISS_PATH):
+        st.error("FAISS vectorstore not found")
+        st.stop()
+    embeddings = HuggingFaceEmbeddings(
+        model_name="sentence-transformers/all-MiniLM-L6-v2"
+    )
+    return FAISS.load_local(
+        DB_FAISS_PATH,
+        embeddings,
+        allow_dangerous_deserialization=True
+    )
+def get_prompt():
+    return PromptTemplate(
+        template="""
+Use the information in the context to answer the question.
+If you do not know the answer, say you do not know.
+Do not add anything outside the context.
+Context:
+{context}
+Question:
+{question}
+Answer directly.
+""",
+        input_variables=["context", "question"],
+    )
+def main():
+    st.set_page_config(page_title="Medical Chatbot")
+    st.title("Medical Chatbot")
+    groq_api_key = os.getenv("GROQ_API_KEY")
+    if not groq_api_key:
+        st.error("GROQ_API_KEY not set")
+        st.stop()
+    if "messages" not in st.session_state:
+        st.session_state.messages = []
+    for msg in st.session_state.messages:
+        st.chat_message(msg["role"]).markdown(msg["content"])
+    user_input = st.chat_input("Ask your question")
+    if user_input:
+        st.chat_message("user").markdown(user_input)
+        st.session_state.messages.append(
+            {"role": "user", "content": user_input}
+        )
+        vectorstore = get_vectorstore()
+        qa_chain = RetrievalQA.from_chain_type(
+            llm=ChatGroq(
+                model_name="meta-llama/llama-4-maverick-17b-128e-instruct",
+                temperature=0.0,
+                groq_api_key=groq_api_key,
+            ),
+            chain_type="stuff",
+            retriever=vectorstore.as_retriever(search_kwargs={"k": 3}),
+            return_source_documents=True,
+            chain_type_kwargs={"prompt": get_prompt()},
+        )
+        response = qa_chain.invoke({"query": user_input})
+        answer = response["result"]
+        sources = response["source_documents"]
+        st.chat_message("assistant").markdown(answer)
+        st.session_state.messages.append(
+            {"role": "assistant", "content": answer}
+        )
+        st.chat_message("assistant").markdown(
+            "Source Docs:\n\n" + str(sources)
+        )
+        st.session_state.messages.append(
+            {"role": "assistant", "content": str(sources)}
+        )
+if __name__ == "__main__":
+    main()

requirements.txt ADDED Viewed

	@@ -0,0 +1,97 @@

+-i https://pypi.org/simple
+aiohappyeyeballs==2.6.1; python_version >= '3.9'
+aiohttp==3.12.14; python_version >= '3.9'
+aiosignal==1.4.0; python_version >= '3.9'
+altair==5.5.0; python_version >= '3.9'
+annotated-types==0.7.0; python_version >= '3.8'
+anyio==4.9.0; python_version >= '3.9'
+attrs==25.3.0; python_version >= '3.8'
+blinker==1.9.0; python_version >= '3.9'
+cachetools==6.1.0; python_version >= '3.9'
+certifi==2025.7.9; python_version >= '3.7'
+charset-normalizer==3.4.2; python_version >= '3.7'
+click==8.2.1; python_version >= '3.10'
+dataclasses-json==0.6.7; python_version >= '3.7' and python_version < '4.0'
+distro==1.9.0; python_version >= '3.6'
+faiss-cpu==1.11.0; python_version >= '3.9'
+filelock==3.18.0; python_version >= '3.9'
+frozenlist==1.7.0; python_version >= '3.9'
+fsspec==2025.5.1; python_version >= '3.9'
+gitdb==4.0.12; python_version >= '3.7'
+gitpython==3.1.44; python_version >= '3.7'
+groq==0.29.0; python_version >= '3.8'
+h11==0.16.0; python_version >= '3.8'
+hf-xet==1.1.5; platform_machine == 'x86_64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'aarch64'
+httpcore==1.0.9; python_version >= '3.8'
+httpx==0.28.1; python_version >= '3.8'
+httpx-sse==0.4.1; python_version >= '3.9'
+huggingface-hub==0.33.2; python_full_version >= '3.8.0'
+idna==3.10; python_version >= '3.6'
+jinja2==3.1.6; python_version >= '3.7'
+joblib==1.5.1; python_version >= '3.9'
+jsonpatch==1.33; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5, 3.6'
+jsonpointer==3.0.0; python_version >= '3.7'
+jsonschema==4.24.0; python_version >= '3.9'
+jsonschema-specifications==2025.4.1; python_version >= '3.9'
+langchain==0.3.26; python_version >= '3.9'
+langchain-community==0.3.27; python_version >= '3.9'
+langchain-core==0.3.68; python_version >= '3.9'
+langchain-groq==0.3.5; python_version >= '3.9'
+langchain-huggingface==0.3.0; python_version >= '3.9'
+langchain-text-splitters==0.3.8; python_version >= '3.9' and python_version < '4.0'
+langsmith==0.4.4; python_version >= '3.9'
+markupsafe==3.0.2; python_version >= '3.9'
+marshmallow==3.26.1; python_version >= '3.9'
+mpmath==1.3.0
+multidict==6.6.3; python_version >= '3.9'
+mypy-extensions==1.1.0; python_version >= '3.8'
+narwhals==1.46.0; python_version >= '3.9'
+networkx==3.5; python_version >= '3.11'
+numpy==2.3.1; python_version < '3.13'
+orjson==3.10.18; platform_python_implementation != 'PyPy'
+packaging==24.2; python_version >= '3.8'
+pandas==2.3.1; python_version >= '3.9'
+pillow==11.3.0; python_version >= '3.9'
+propcache==0.3.2; python_version >= '3.9'
+protobuf==6.31.1; python_version >= '3.9'
+pyarrow==20.0.0; python_version >= '3.9'
+pydantic==2.11.7; python_version >= '3.9'
+pydantic-core==2.33.2; python_version >= '3.9'
+pydantic-settings==2.10.1; python_version >= '3.9'
+pydeck==0.9.1; python_version >= '3.8'
+pypdf==5.7.0; python_version >= '3.8'
+python-dateutil==2.9.0.post0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
+python-dotenv==1.1.1; python_version >= '3.9'
+pytz==2025.2
+pyyaml==6.0.2; python_version >= '3.8'
+referencing==0.36.2; python_version >= '3.9'
+regex==2024.11.6; python_version >= '3.8'
+requests==2.32.4; python_version >= '3.8'
+requests-toolbelt==1.0.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
+rpds-py==0.26.0; python_version >= '3.9'
+safetensors==0.5.3; python_version >= '3.7'
+scikit-learn==1.7.0; python_version >= '3.10'
+scipy==1.16.0; python_version >= '3.11'
+sentence-transformers==5.0.0; python_version >= '3.9'
+setuptools==80.9.0; python_version >= '3.12'
+six==1.17.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
+smmap==5.0.2; python_version >= '3.7'
+sniffio==1.3.1; python_version >= '3.7'
+sqlalchemy==2.0.41; python_version >= '3.7'
+streamlit==1.46.1; python_version >= '3.9' and python_full_version != '3.9.7'
+sympy==1.14.0; python_version >= '3.9'
+tenacity==9.1.2; python_version >= '3.9'
+threadpoolctl==3.6.0; python_version >= '3.9'
+tokenizers==0.21.2; python_version >= '3.9'
+toml==0.10.2; python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'
+torch==2.7.1; python_full_version >= '3.9.0'
+tornado==6.5.1; python_version >= '3.9'
+tqdm==4.67.1; python_version >= '3.7'
+transformers==4.53.1; python_full_version >= '3.9.0'
+typing-extensions==4.14.1; python_version >= '3.9'
+typing-inspect==0.9.0
+typing-inspection==0.4.1; python_version >= '3.9'
+tzdata==2025.2; python_version >= '2'
+urllib3==2.5.0; python_version >= '3.9'
+yarl==1.20.1; python_version >= '3.9'
+zstandard==0.23.0; python_version >= '3.8'

utils/connect_memory_with_llm.py ADDED Viewed

	@@ -0,0 +1,56 @@

+import os
+from langchain_huggingface import HuggingFaceEndpoint
+from langchain_core.prompts import PromptTemplate
+from langchain.chains import RetrievalQA
+from langchain_huggingface import HuggingFaceEmbeddings
+from langchain_community.vectorstores import FAISS
+from dotenv import load_dotenv, find_dotenv
+load_dotenv(find_dotenv())
+HF_TOKEN=os.environ.get("HF_TOKEN")
+HUGGINGFACE_REPO_ID="mistralai/Mistral-7B-Instruct-v0.3"
+def load_llm(huggingface_repo_id):
+    llm=HuggingFaceEndpoint(
+        repo_id=huggingface_repo_id,
+        temperature=0.5,
+        model_kwargs={"token":HF_TOKEN,
+                      "max_length":"512"}
+    )
+    return llm
+CUSTOM_PROMPT_TEMPLATE = """
+Use the pieces of information provided in the context to answer user's question.
+If you dont know the answer, just say that you dont know, dont try to make up an answer.
+Dont provide anything out of the given context
+Context: {context}
+Question: {question}
+Start the answer directly. No small talk please.
+"""
+def set_custom_prompt(custom_prompt_template):
+    prompt=PromptTemplate(template=custom_prompt_template, input_variables=["context", "question"])
+    return prompt
+DB_FAISS_PATH= "../vectorstore/db_faiss"
+embedding_model=HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
+db=FAISS.load_local(DB_FAISS_PATH, embedding_model, allow_dangerous_deserialization=True)
+qa_chain=RetrievalQA.from_chain_type(
+    llm=load_llm(HUGGINGFACE_REPO_ID),
+    chain_type="stuff",
+    retriever=db.as_retriever(search_kwargs={'k':3}),
+    return_source_documents=True,
+    chain_type_kwargs={'prompt':set_custom_prompt(CUSTOM_PROMPT_TEMPLATE)}
+)
+user_query=input("Write Query Here: ")
+response=qa_chain.invoke({'query': user_query})
+print("RESULT: ", response["result"])
+print("SOURCE DOCUMENTS: ", response["source_documents"])

utils/create_memory_for_llm.py ADDED Viewed

	@@ -0,0 +1,39 @@

+from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_huggingface import HuggingFaceEmbeddings
+from langchain_community.vectorstores import FAISS
+from dotenv import load_dotenv, find_dotenv
+load_dotenv(find_dotenv())
+DATA_PATH= "../data/"
+def load_pdf_files(data):
+    loader = DirectoryLoader(data,
+                             glob='*.pdf',
+                             loader_cls=PyPDFLoader)
+    documents=loader.load()
+    return documents
+documents=load_pdf_files(data=DATA_PATH)
+def create_chunks(extracted_data):
+    text_splitter=RecursiveCharacterTextSplitter(chunk_size=500,
+                                                 chunk_overlap=50)
+    text_chunks=text_splitter.split_documents(extracted_data)
+    return text_chunks
+text_chunks=create_chunks(extracted_data=documents)
+def get_embedding_model():
+    embedding_model=HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
+    return embedding_model
+embedding_model=get_embedding_model()
+DB_FAISS_PATH= "../vectorstore/db_faiss"
+db=FAISS.from_documents(text_chunks, embedding_model)
+db.save_local(DB_FAISS_PATH)