YASHMANIC commited on
Commit
699a338
·
1 Parent(s): 9dd2937

Final commit

Browse files
test.py ADDED
File without changes
Dockerfile ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Stage 1: Build the Python environment and install dependencies
2
+ FROM python:3.11-slim-bullseye AS builder
3
+
4
+ WORKDIR /app
5
+
6
+ # Copy only the requirements file first to leverage Docker layer caching
7
+ COPY requirements.txt .
8
+ COPY setup.py .
9
+ RUN pip install --upgrade pip && pip install --no-cache-dir -r requirements.txt
10
+
11
+ COPY . .
12
+
13
+ # Clean up after install (if necessary)
14
+ RUN rm -rf /root/.cache
15
+
16
+ # Run the app
17
+ CMD ["python", "app.py"]
Generative_AI_Project.egg-info/PKG-INFO ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ Metadata-Version: 2.1
2
+ Name: Generative-AI-Project
3
+ Version: 0.0.0
4
+ Author: Yaswanth
5
+ Author-email: yashmanic96@gmail.com
6
+ License-File: LICENSE
Generative_AI_Project.egg-info/SOURCES.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ LICENSE
2
+ README.md
3
+ setup.py
4
+ Generative_AI_Project.egg-info/PKG-INFO
5
+ Generative_AI_Project.egg-info/SOURCES.txt
6
+ Generative_AI_Project.egg-info/dependency_links.txt
7
+ Generative_AI_Project.egg-info/top_level.txt
8
+ src/__init__.py
9
+ src/helper.py
10
+ src/prompt.py
Generative_AI_Project.egg-info/dependency_links.txt ADDED
@@ -0,0 +1 @@
 
 
1
+
Generative_AI_Project.egg-info/top_level.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ src
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2025 yaswanth
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
README.md CHANGED
@@ -1,10 +1 @@
1
- ---
2
- title: MediBot
3
- emoji: 📊
4
- colorFrom: purple
5
- colorTo: indigo
6
- sdk: docker
7
- pinned: false
8
- ---
9
-
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ # MediBot
 
 
 
 
 
 
 
 
 
app.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, render_template, jsonify, request
2
+ from src.helper import download_hugging_face_embeddings
3
+ from langchain_pinecone import PineconeVectorStore
4
+ from langchain_groq import ChatGroq
5
+ from langchain_core.prompts import ChatPromptTemplate
6
+ from langchain_core.output_parsers import JsonOutputParser
7
+ import json
8
+ from langchain.chains import create_retrieval_chain
9
+ from langchain.chains.combine_documents import create_stuff_documents_chain
10
+ from langchain_core.prompts import ChatPromptTemplate
11
+ from dotenv import load_dotenv
12
+ from src.prompt import *
13
+ import os
14
+
15
+ app = Flask(__name__)
16
+
17
+ load_dotenv()
18
+
19
+ PINECONE_API_KEY=os.environ.get('PINECONE_API_KEY')
20
+ GROQ_API_KEY=os.environ.get('GROQ_API_KEY')
21
+
22
+ os.environ["PINECONE_API_KEY"] = PINECONE_API_KEY
23
+ os.environ["GROQ_API_KEY"] = GROQ_API_KEY
24
+
25
+ embeddings = download_hugging_face_embeddings()
26
+
27
+
28
+ index_name = "medicalbot"
29
+
30
+ # Embed each chunk and upsert the embeddings into your Pinecone index.
31
+ docsearch = PineconeVectorStore.from_existing_index(
32
+ index_name=index_name,
33
+ embedding=embeddings
34
+ )
35
+
36
+ retriever = docsearch.as_retriever(search_type="similarity", search_kwargs={"k":3})
37
+
38
+ llm = ChatGroq(
39
+ model_name="llama-3.3-70b-versatile",
40
+ temperature=0.7
41
+ )
42
+ prompt = ChatPromptTemplate.from_messages(
43
+ [
44
+ ("system", system_prompt),
45
+ ("human", "{input}"),
46
+ ]
47
+ )
48
+
49
+ question_answer_chain = create_stuff_documents_chain(llm, prompt)
50
+ rag_chain = create_retrieval_chain(retriever, question_answer_chain)
51
+
52
+
53
+ @app.route("/")
54
+ def index():
55
+ return render_template('chat.html')
56
+
57
+
58
+ @app.route("/get", methods=["GET", "POST"])
59
+ def chat():
60
+ msg = request.form["msg"]
61
+ input = msg
62
+ print(input)
63
+ response = rag_chain.invoke({"input": msg})
64
+ print("Response : ", response["answer"])
65
+ return str(response["answer"])
66
+
67
+
68
+
69
+
70
+ if __name__ == '__main__':
71
+ app.run(host="0.0.0.0", port= 8080)
requirements.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ sentence-transformers == 3.3.1
2
+ gunicorn>=20.0.4
3
+ langchain
4
+ flask
5
+ pypdf
6
+ python-dotenv
7
+ pinecone[grpc]
8
+ langchain-pinecone
9
+ langchain_community
10
+ langchain_experimental
11
+ langchain-groq
12
+ -e .
research/trials.ipynb ADDED
@@ -0,0 +1,355 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [
8
+ {
9
+ "data": {
10
+ "text/plain": [
11
+ "'/home/yaswanth/Yaswanth/Ai/project/MediBot'"
12
+ ]
13
+ },
14
+ "execution_count": 1,
15
+ "metadata": {},
16
+ "output_type": "execute_result"
17
+ }
18
+ ],
19
+ "source": [
20
+ "import os\n",
21
+ "os.chdir(\"../\")\n",
22
+ "%pwd"
23
+ ]
24
+ },
25
+ {
26
+ "cell_type": "code",
27
+ "execution_count": 2,
28
+ "metadata": {},
29
+ "outputs": [],
30
+ "source": [
31
+ "from langchain_community.document_loaders import PyPDFLoader,DirectoryLoader\n",
32
+ "from langchain.text_splitter import RecursiveCharacterTextSplitter"
33
+ ]
34
+ },
35
+ {
36
+ "cell_type": "code",
37
+ "execution_count": 3,
38
+ "metadata": {},
39
+ "outputs": [],
40
+ "source": [
41
+ "#Extract Data From the PDF File\n",
42
+ "def load_pdf_file(data):\n",
43
+ " loader= DirectoryLoader(data,\n",
44
+ " glob=\"*.pdf\",\n",
45
+ " loader_cls=PyPDFLoader)\n",
46
+ "\n",
47
+ " documents=loader.load()\n",
48
+ "\n",
49
+ " return documents"
50
+ ]
51
+ },
52
+ {
53
+ "cell_type": "code",
54
+ "execution_count": 4,
55
+ "metadata": {},
56
+ "outputs": [],
57
+ "source": [
58
+ "extracted_data=load_pdf_file(\"Data/\")"
59
+ ]
60
+ },
61
+ {
62
+ "cell_type": "code",
63
+ "execution_count": 5,
64
+ "metadata": {},
65
+ "outputs": [],
66
+ "source": [
67
+ "# extracted_data"
68
+ ]
69
+ },
70
+ {
71
+ "cell_type": "code",
72
+ "execution_count": 6,
73
+ "metadata": {},
74
+ "outputs": [],
75
+ "source": [
76
+ "#Split the Data into Text Chunks\n",
77
+ "def text_split(extracted_data):\n",
78
+ " text_splitter=RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)\n",
79
+ " text_chunks=text_splitter.split_documents(extracted_data)\n",
80
+ " return text_chunks"
81
+ ]
82
+ },
83
+ {
84
+ "cell_type": "code",
85
+ "execution_count": 7,
86
+ "metadata": {},
87
+ "outputs": [],
88
+ "source": [
89
+ "text_chunks=text_split(extracted_data)\n",
90
+ "# print(\"Length of Text Chunks\", len(text_chunks))"
91
+ ]
92
+ },
93
+ {
94
+ "cell_type": "code",
95
+ "execution_count": 8,
96
+ "metadata": {},
97
+ "outputs": [],
98
+ "source": [
99
+ "from langchain_community.embeddings import HuggingFaceEmbeddings\n",
100
+ "from langchain_huggingface import HuggingFaceEmbeddings"
101
+ ]
102
+ },
103
+ {
104
+ "cell_type": "code",
105
+ "execution_count": 9,
106
+ "metadata": {},
107
+ "outputs": [],
108
+ "source": [
109
+ "#Download the Embeddings from Hugging Face\n",
110
+ "def download_hugging_face_embeddings():\n",
111
+ " embeddings=HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')\n",
112
+ " return embeddings"
113
+ ]
114
+ },
115
+ {
116
+ "cell_type": "code",
117
+ "execution_count": 10,
118
+ "metadata": {},
119
+ "outputs": [
120
+ {
121
+ "name": "stderr",
122
+ "output_type": "stream",
123
+ "text": [
124
+ "/tmp/ipykernel_5549/2661704553.py:3: LangChainDeprecationWarning: The class `HuggingFaceEmbeddings` was deprecated in LangChain 0.2.2 and will be removed in 1.0. An updated version of the class exists in the :class:`~langchain-huggingface package and should be used instead. To use it run `pip install -U :class:`~langchain-huggingface` and import as `from :class:`~langchain_huggingface import HuggingFaceEmbeddings``.\n",
125
+ " embeddings=HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')\n",
126
+ "/home/yaswanth/anaconda3/envs/MediBot/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
127
+ " from .autonotebook import tqdm as notebook_tqdm\n"
128
+ ]
129
+ }
130
+ ],
131
+ "source": [
132
+ "embeddings = download_hugging_face_embeddings()"
133
+ ]
134
+ },
135
+ {
136
+ "cell_type": "code",
137
+ "execution_count": 11,
138
+ "metadata": {},
139
+ "outputs": [
140
+ {
141
+ "data": {
142
+ "text/plain": [
143
+ "True"
144
+ ]
145
+ },
146
+ "execution_count": 11,
147
+ "metadata": {},
148
+ "output_type": "execute_result"
149
+ }
150
+ ],
151
+ "source": [
152
+ "from dotenv import load_dotenv\n",
153
+ "load_dotenv()"
154
+ ]
155
+ },
156
+ {
157
+ "cell_type": "code",
158
+ "execution_count": 12,
159
+ "metadata": {},
160
+ "outputs": [],
161
+ "source": [
162
+ "PINECONE_API_KEY=os.environ.get('PINECONE_API_KEY')\n",
163
+ "GROQ_API_KEY=os.environ.get('GROQ_API_KEY')"
164
+ ]
165
+ },
166
+ {
167
+ "cell_type": "code",
168
+ "execution_count": 13,
169
+ "metadata": {},
170
+ "outputs": [],
171
+ "source": [
172
+ "from pinecone.grpc import PineconeGRPC as Pinecone\n",
173
+ "from pinecone import ServerlessSpec\n",
174
+ "import os\n",
175
+ "\n",
176
+ "pc = Pinecone(api_key=PINECONE_API_KEY)\n",
177
+ "\n",
178
+ "index_name = \"medicalbot\"\n",
179
+ "\n",
180
+ "\n",
181
+ "pc.create_index(\n",
182
+ " name=index_name,\n",
183
+ " dimension=384, \n",
184
+ " metric=\"cosine\", \n",
185
+ " spec=ServerlessSpec(\n",
186
+ " cloud=\"aws\", \n",
187
+ " region=\"us-east-1\"\n",
188
+ " ) \n",
189
+ ") \n"
190
+ ]
191
+ },
192
+ {
193
+ "cell_type": "code",
194
+ "execution_count": 14,
195
+ "metadata": {},
196
+ "outputs": [],
197
+ "source": [
198
+ "import os\n",
199
+ "os.environ[\"PINECONE_API_KEY\"] = PINECONE_API_KEY\n",
200
+ "os.environ[\"GROQ_API_KEY\"] = GROQ_API_KEY"
201
+ ]
202
+ },
203
+ {
204
+ "cell_type": "code",
205
+ "execution_count": 15,
206
+ "metadata": {},
207
+ "outputs": [],
208
+ "source": [
209
+ "# Embed each chunk and upsert the embeddings into your Pinecone index.\n",
210
+ "from langchain_pinecone import PineconeVectorStore\n",
211
+ "\n",
212
+ "docsearch = PineconeVectorStore.from_documents(\n",
213
+ " documents=text_chunks,\n",
214
+ " index_name=index_name,\n",
215
+ " embedding=embeddings, \n",
216
+ ")"
217
+ ]
218
+ },
219
+ {
220
+ "cell_type": "code",
221
+ "execution_count": 16,
222
+ "metadata": {},
223
+ "outputs": [],
224
+ "source": [
225
+ "from langchain_pinecone import PineconeVectorStore\n",
226
+ "# Embed each chunk and upsert the embeddings into your Pinecone index.\n",
227
+ "docsearch = PineconeVectorStore.from_existing_index(\n",
228
+ " index_name=index_name,\n",
229
+ " embedding=embeddings\n",
230
+ ")"
231
+ ]
232
+ },
233
+ {
234
+ "cell_type": "code",
235
+ "execution_count": 17,
236
+ "metadata": {},
237
+ "outputs": [
238
+ {
239
+ "data": {
240
+ "text/plain": [
241
+ "[Document(id='455231a4-b774-4555-9236-d54f10fa02d2', metadata={'page': 274.0, 'source': 'Data/Medical_book.pdf'}, page_content='Antidiabetic drugs\\nGEM - 0001 to 0432 - A 10/22/03 1:42 PM Page 261'),\n",
242
+ " Document(id='7a7a7635-5026-4c9f-9b66-a4abc3b803cb', metadata={'page': 274.0, 'source': 'Data/Medical_book.pdf'}, page_content='with a physician or pharmacist before combining tri-\\ncyclic antidepressants with any other prescription or non-\\nprescription (over-the-counter) medicine.\\nNancy Ross-Flanigan\\nAntidiabetic drugs\\nDefinition\\nAntidiabetic drugs are medicines that help control\\nblood sugar levels in people with diabetes mellitus\\n(sugar diabetes).\\nPurpose\\nDiabetes may be divided into type I and type II, for-\\nmerly termed juvenile onset or insulin-dependent, and\\nGALE ENCYCLOPEDIA OF MEDICINE 2 261\\nAntidiabetic drugs'),\n",
243
+ " Document(id='567f7646-9628-4890-93d2-23cc548e096f', metadata={'page': 543.0, 'source': 'Data/Medical_book.pdf'}, page_content='National Institute of Diabetes and Digestive and Kidney Dis-\\neases (NIDDK). Building 31, Room 9A04, 31 Center\\nDrive, MSC 2560, Bethesda, MD 208792-2560. (301)\\n496-3583. <http://www.niddk.nih.gov>.\\nNancy J. Nordenson\\nBlood thinners see Anticoagulant and\\nantiplatelet drugs\\nBlood transfusion see Transfusion\\nGALE ENCYCLOPEDIA OF MEDICINE 2530\\nBlood sugar tests\\nGEM -0433 to 0624 - B 10/22/03 6:08 PM Page 530')]"
244
+ ]
245
+ },
246
+ "execution_count": 17,
247
+ "metadata": {},
248
+ "output_type": "execute_result"
249
+ }
250
+ ],
251
+ "source": [
252
+ "retriever = docsearch.as_retriever(search_type=\"similarity\", search_kwargs={\"k\":3})\n",
253
+ "retriever_docs = retriever.invoke(\"What is the best treatment for diabetes?\")\n",
254
+ "retriever_docs"
255
+ ]
256
+ },
257
+ {
258
+ "cell_type": "code",
259
+ "execution_count": 18,
260
+ "metadata": {},
261
+ "outputs": [],
262
+ "source": [
263
+ "from langchain_groq import ChatGroq\n",
264
+ "from langchain_core.prompts import ChatPromptTemplate\n",
265
+ "from langchain_core.output_parsers import JsonOutputParser\n",
266
+ "import json\n",
267
+ "\n",
268
+ "# Initialize Groq LLM\n",
269
+ "llm = ChatGroq(\n",
270
+ " model_name=\"llama-3.3-70b-versatile\",\n",
271
+ " temperature=0.7\n",
272
+ ")"
273
+ ]
274
+ },
275
+ {
276
+ "cell_type": "code",
277
+ "execution_count": 19,
278
+ "metadata": {},
279
+ "outputs": [],
280
+ "source": [
281
+ "from langchain.chains import create_retrieval_chain\n",
282
+ "from langchain.chains.combine_documents import create_stuff_documents_chain\n",
283
+ "from langchain_core.prompts import ChatPromptTemplate\n",
284
+ "\n",
285
+ "\n",
286
+ "system_prompt = (\n",
287
+ " \"You are an assistant for question-answering tasks. \"\n",
288
+ " \"Use the following pieces of retrieved context to answer \"\n",
289
+ " \"the question. If you don't know the answer, say that you \"\n",
290
+ " \"don't know. Use three sentences maximum and keep the \"\n",
291
+ " \"answer concise.\"\n",
292
+ " \"\\n\\n\"\n",
293
+ " \"{context}\"\n",
294
+ ")\n",
295
+ "\n",
296
+ "\n",
297
+ "prompt = ChatPromptTemplate.from_messages(\n",
298
+ " [\n",
299
+ " (\"system\", system_prompt),\n",
300
+ " (\"human\", \"{input}\"),\n",
301
+ " ]\n",
302
+ ")"
303
+ ]
304
+ },
305
+ {
306
+ "cell_type": "code",
307
+ "execution_count": 20,
308
+ "metadata": {},
309
+ "outputs": [],
310
+ "source": [
311
+ "question_answer_chain = create_stuff_documents_chain(llm, prompt)\n",
312
+ "rag_chain = create_retrieval_chain(retriever, question_answer_chain)"
313
+ ]
314
+ },
315
+ {
316
+ "cell_type": "code",
317
+ "execution_count": 23,
318
+ "metadata": {},
319
+ "outputs": [
320
+ {
321
+ "name": "stdout",
322
+ "output_type": "stream",
323
+ "text": [
324
+ "Diabetes mellitus is a disorder of carbohydrate metabolism brought on by a combination of hereditary and environmental factors. It occurs when a person either does not make enough insulin or makes insulin that does not work properly, resulting in high blood sugar, a condition called hyperglycemia. This can lead to damage or failure of various body organs if left untreated.\n"
325
+ ]
326
+ }
327
+ ],
328
+ "source": [
329
+ "response = rag_chain.invoke({\"input\": \"what is Diabetes?\"})\n",
330
+ "print(response[\"answer\"])\n"
331
+ ]
332
+ }
333
+ ],
334
+ "metadata": {
335
+ "kernelspec": {
336
+ "display_name": "base",
337
+ "language": "python",
338
+ "name": "python3"
339
+ },
340
+ "language_info": {
341
+ "codemirror_mode": {
342
+ "name": "ipython",
343
+ "version": 3
344
+ },
345
+ "file_extension": ".py",
346
+ "mimetype": "text/x-python",
347
+ "name": "python",
348
+ "nbconvert_exporter": "python",
349
+ "pygments_lexer": "ipython3",
350
+ "version": "3.12.7"
351
+ }
352
+ },
353
+ "nbformat": 4,
354
+ "nbformat_minor": 2
355
+ }
setup.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from setuptools import find_packages, setup
2
+
3
+ setup(
4
+ name = 'Generative AI Project',
5
+ version= '0.0.0',
6
+ author= 'Yaswanth',
7
+ author_email= 'yashmanic96@gmail.com',
8
+ packages= find_packages(),
9
+ install_requires = []
10
+
11
+ )
src/__init__.py ADDED
File without changes
src/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (151 Bytes). View file
 
src/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (155 Bytes). View file
 
src/__pycache__/helper.cpython-310.pyc ADDED
Binary file (1.02 kB). View file
 
src/__pycache__/helper.cpython-312.pyc ADDED
Binary file (1.22 kB). View file
 
src/__pycache__/prompt.cpython-310.pyc ADDED
Binary file (412 Bytes). View file
 
src/helper.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_community.document_loaders import PyPDFLoader,DirectoryLoader
2
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
3
+ from langchain_community.embeddings import HuggingFaceEmbeddings
4
+
5
+
6
+ #Extract Data From the PDF File
7
+ def load_pdf_file(data):
8
+ loader= DirectoryLoader(data,
9
+ glob="*.pdf",
10
+ loader_cls=PyPDFLoader)
11
+
12
+ documents=loader.load()
13
+
14
+ return documents
15
+
16
+
17
+
18
+ #Split the Data into Text Chunks
19
+ def text_split(extracted_data):
20
+ text_splitter=RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)
21
+ text_chunks=text_splitter.split_documents(extracted_data)
22
+ return text_chunks
23
+
24
+
25
+
26
+ #Download the Embeddings from HuggingFace
27
+ def download_hugging_face_embeddings():
28
+ embeddings=HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2') #this model return 384 dimensions
29
+ return embeddings
src/prompt.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ system_prompt = (
2
+ "You are an assistant for question-answering tasks. "
3
+ "Use the following pieces of retrieved context to answer "
4
+ "the question. If you don't know the answer, say that you "
5
+ "don't know. Use three sentences maximum and keep the "
6
+ "answer concise."
7
+ "\n\n"
8
+ "{context}"
9
+ )
static/style.css ADDED
@@ -0,0 +1,223 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ body,html{
2
+ height: 100%;
3
+ margin: 0;
4
+ background: rgb(44, 47, 59);
5
+ background: -webkit-linear-gradient(to right, rgb(40, 59, 34), rgb(54, 60, 70), rgb(32, 32, 43));
6
+ background: linear-gradient(to right, rgb(38, 51, 61), rgb(50, 55, 65), rgb(33, 33, 78));
7
+ }
8
+
9
+ .chat{
10
+ margin-top: auto;
11
+ margin-bottom: auto;
12
+ }
13
+ .card{
14
+ height: 500px;
15
+ border-radius: 15px !important;
16
+ background-color: rgba(0,0,0,0.4) !important;
17
+ }
18
+ .contacts_body{
19
+ padding: 0.75rem 0 !important;
20
+ overflow-y: auto;
21
+ white-space: nowrap;
22
+ }
23
+ .msg_card_body{
24
+ overflow-y: auto;
25
+ }
26
+ .card-header{
27
+ border-radius: 15px 15px 0 0 !important;
28
+ border-bottom: 0 !important;
29
+ }
30
+ .card-footer{
31
+ border-radius: 0 0 15px 15px !important;
32
+ border-top: 0 !important;
33
+ }
34
+ .container{
35
+ align-content: center;
36
+ }
37
+ .search{
38
+ border-radius: 15px 0 0 15px !important;
39
+ background-color: rgba(0,0,0,0.3) !important;
40
+ border:0 !important;
41
+ color:white !important;
42
+ }
43
+ .search:focus{
44
+ box-shadow:none !important;
45
+ outline:0px !important;
46
+ }
47
+ .type_msg{
48
+ background-color: rgba(0,0,0,0.3) !important;
49
+ border:0 !important;
50
+ color:white !important;
51
+ height: 60px !important;
52
+ overflow-y: auto;
53
+ }
54
+ .type_msg:focus{
55
+ box-shadow:none !important;
56
+ outline:0px !important;
57
+ }
58
+ .attach_btn{
59
+ border-radius: 15px 0 0 15px !important;
60
+ background-color: rgba(0,0,0,0.3) !important;
61
+ border:0 !important;
62
+ color: white !important;
63
+ cursor: pointer;
64
+ }
65
+ .send_btn{
66
+ border-radius: 0 15px 15px 0 !important;
67
+ background-color: rgba(0,0,0,0.3) !important;
68
+ border:0 !important;
69
+ color: white !important;
70
+ cursor: pointer;
71
+ }
72
+ .search_btn{
73
+ border-radius: 0 15px 15px 0 !important;
74
+ background-color: rgba(0,0,0,0.3) !important;
75
+ border:0 !important;
76
+ color: white !important;
77
+ cursor: pointer;
78
+ }
79
+ .contacts{
80
+ list-style: none;
81
+ padding: 0;
82
+ }
83
+ .contacts li{
84
+ width: 100% !important;
85
+ padding: 5px 10px;
86
+ margin-bottom: 15px !important;
87
+ }
88
+ .active{
89
+ background-color: rgba(0,0,0,0.3);
90
+ }
91
+ .user_img{
92
+ height: 70px;
93
+ width: 70px;
94
+ border:1.5px solid #f5f6fa;
95
+
96
+ }
97
+ .user_img_msg{
98
+ height: 40px;
99
+ width: 40px;
100
+ border:1.5px solid #f5f6fa;
101
+
102
+ }
103
+ .img_cont{
104
+ position: relative;
105
+ height: 70px;
106
+ width: 70px;
107
+ }
108
+ .img_cont_msg{
109
+ height: 40px;
110
+ width: 40px;
111
+ }
112
+ .online_icon{
113
+ position: absolute;
114
+ height: 15px;
115
+ width:15px;
116
+ background-color: #4cd137;
117
+ border-radius: 50%;
118
+ bottom: 0.2em;
119
+ right: 0.4em;
120
+ border:1.5px solid white;
121
+ }
122
+ .offline{
123
+ background-color: #c23616 !important;
124
+ }
125
+ .user_info{
126
+ margin-top: auto;
127
+ margin-bottom: auto;
128
+ margin-left: 15px;
129
+ }
130
+ .user_info span{
131
+ font-size: 20px;
132
+ color: white;
133
+ }
134
+ .user_info p{
135
+ font-size: 10px;
136
+ color: rgba(255,255,255,0.6);
137
+ }
138
+ .video_cam{
139
+ margin-left: 50px;
140
+ margin-top: 5px;
141
+ }
142
+ .video_cam span{
143
+ color: white;
144
+ font-size: 20px;
145
+ cursor: pointer;
146
+ margin-right: 20px;
147
+ }
148
+ .msg_cotainer{
149
+ margin-top: auto;
150
+ margin-bottom: auto;
151
+ margin-left: 10px;
152
+ border-radius: 25px;
153
+ background-color: rgb(82, 172, 255);
154
+ padding: 10px;
155
+ position: relative;
156
+ }
157
+ .msg_cotainer_send{
158
+ margin-top: auto;
159
+ margin-bottom: auto;
160
+ margin-right: 10px;
161
+ border-radius: 25px;
162
+ background-color: #58cc71;
163
+ padding: 10px;
164
+ position: relative;
165
+ }
166
+ .msg_time{
167
+ position: absolute;
168
+ left: 0;
169
+ bottom: -15px;
170
+ color: rgba(255,255,255,0.5);
171
+ font-size: 10px;
172
+ }
173
+ .msg_time_send{
174
+ position: absolute;
175
+ right:0;
176
+ bottom: -15px;
177
+ color: rgba(255,255,255,0.5);
178
+ font-size: 10px;
179
+ }
180
+ .msg_head{
181
+ position: relative;
182
+ }
183
+ #action_menu_btn{
184
+ position: absolute;
185
+ right: 10px;
186
+ top: 10px;
187
+ color: white;
188
+ cursor: pointer;
189
+ font-size: 20px;
190
+ }
191
+ .action_menu{
192
+ z-index: 1;
193
+ position: absolute;
194
+ padding: 15px 0;
195
+ background-color: rgba(0,0,0,0.5);
196
+ color: white;
197
+ border-radius: 15px;
198
+ top: 30px;
199
+ right: 15px;
200
+ display: none;
201
+ }
202
+ .action_menu ul{
203
+ list-style: none;
204
+ padding: 0;
205
+ margin: 0;
206
+ }
207
+ .action_menu ul li{
208
+ width: 100%;
209
+ padding: 10px 15px;
210
+ margin-bottom: 5px;
211
+ }
212
+ .action_menu ul li i{
213
+ padding-right: 10px;
214
+ }
215
+ .action_menu ul li:hover{
216
+ cursor: pointer;
217
+ background-color: rgba(0,0,0,0.2);
218
+ }
219
+ @media(max-width: 576px){
220
+ .contacts_card{
221
+ margin-bottom: 15px !important;
222
+ }
223
+ }
store_index.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from src.helper import load_pdf_file, text_split, download_hugging_face_embeddings
2
+ from pinecone.grpc import PineconeGRPC as Pinecone
3
+ from pinecone import ServerlessSpec
4
+ from langchain_pinecone import PineconeVectorStore
5
+ from dotenv import load_dotenv
6
+ import os
7
+
8
+
9
+ load_dotenv()
10
+
11
+ PINECONE_API_KEY=os.environ.get('PINECONE_API_KEY')
12
+ os.environ["PINECONE_API_KEY"] = PINECONE_API_KEY
13
+
14
+
15
+ extracted_data=load_pdf_file(data='Data/')
16
+ text_chunks=text_split(extracted_data)
17
+ embeddings = download_hugging_face_embeddings()
18
+
19
+
20
+ pc = Pinecone(api_key=PINECONE_API_KEY)
21
+
22
+ index_name = "medicalbot"
23
+
24
+
25
+ pc.create_index(
26
+ name=index_name,
27
+ dimension=384,
28
+ metric="cosine",
29
+ spec=ServerlessSpec(
30
+ cloud="aws",
31
+ region="us-east-1"
32
+ )
33
+ )
34
+
35
+ # Embed each chunk and upsert the embeddings into your Pinecone index.
36
+ docsearch = PineconeVectorStore.from_documents(
37
+ documents=text_chunks,
38
+ index_name=index_name,
39
+ embedding=embeddings,
40
+ )
template.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from pathlib import Path
3
+ import logging
4
+
5
+ logging.basicConfig(level=logging.INFO, format='[%(asctime)s]: %(message)s:')
6
+
7
+
8
+ list_of_files = [
9
+ "src/__init__.py",
10
+ "src/helper.py",
11
+ "src/prompt.py",
12
+ ".env",
13
+ "setup.py",
14
+ "app.py",
15
+ "research/trials.ipynb",
16
+ " test.py"
17
+ ]
18
+
19
+
20
+ for filepath in list_of_files:
21
+ filepath = Path(filepath)
22
+ filedir, filename = os.path.split(filepath)
23
+
24
+
25
+ if filedir !="":
26
+ os.makedirs(filedir, exist_ok=True)
27
+ logging.info(f"Creating directory; {filedir} for the file: {filename}")
28
+
29
+ if (not os.path.exists(filepath)) or (os.path.getsize(filepath) == 0):
30
+ with open(filepath, "w") as f:
31
+ pass
32
+ logging.info(f"Creating empty file: {filepath}")
33
+
34
+
35
+ else:
36
+ logging.info(f"{filename} is already exists")
templates/chat.html ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <link href="//maxcdn.bootstrapcdn.com/bootstrap/4.1.1/css/bootstrap.min.css" rel="stylesheet" id="bootstrap-css">
2
+ <script src="//maxcdn.bootstrapcdn.com/bootstrap/4.1.1/js/bootstrap.min.js"></script>
3
+ <script src="//cdnjs.cloudflare.com/ajax/libs/jquery/3.2.1/jquery.min.js"></script>
4
+
5
+ <!DOCTYPE html>
6
+ <html>
7
+ <head>
8
+ <title>Chatbot</title>
9
+ <link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.1.3/css/bootstrap.min.css" integrity="sha384-MCw98/SFnGE8fJT3GXwEOngsV7Zt27NXFoaoApmYm81iuXoPkFOJwJ8ERdknLPMO" crossorigin="anonymous">
10
+ <link rel="stylesheet" href="https://use.fontawesome.com/releases/v5.5.0/css/all.css" integrity="sha384-B4dIYHKNBt8Bc12p+WXckhzcICo0wtJAoU8YZTY5qE0Id1GSseTk6S+L3BlXeVIU" crossorigin="anonymous">
11
+ <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.3.1/jquery.min.js"></script>
12
+ <link rel="stylesheet" type="text/css" href="{{ url_for('static', filename='style.css')}}"/>
13
+ </head>
14
+
15
+
16
+ <body>
17
+ <div class="container-fluid h-100">
18
+ <div class="row justify-content-center h-100">
19
+ <div class="col-md-8 col-xl-6 chat">
20
+ <div class="card">
21
+ <div class="card-header msg_head">
22
+ <div class="d-flex bd-highlight">
23
+ <div class="img_cont">
24
+ <img src="https://cdn-icons-png.flaticon.com/512/387/387569.png" class="rounded-circle user_img">
25
+ <!-- <img src="https://www.prdistribution.com/spirit/uploads/pressreleases/2019/newsreleases/d83341deb75c4c4f6b113f27b1e42cd8-chatbot-florence-already-helps-thousands-of-patients-to-remember-their-medication.png" class="rounded-circle user_img"> -->
26
+ <span class="online_icon"></span>
27
+ </div>
28
+ <div class="user_info">
29
+ <span>Medical Chatbot</span>
30
+ <p>Ask me anything!</p>
31
+ </div>
32
+ </div>
33
+ </div>
34
+ <div id="messageFormeight" class="card-body msg_card_body">
35
+
36
+
37
+ </div>
38
+ <div class="card-footer">
39
+ <form id="messageArea" class="input-group">
40
+ <input type="text" id="text" name="msg" placeholder="Type your message..." autocomplete="off" class="form-control type_msg" required/>
41
+ <div class="input-group-append">
42
+ <button type="submit" id="send" class="input-group-text send_btn"><i class="fas fa-location-arrow"></i></button>
43
+ </div>
44
+ </form>
45
+ </div>
46
+ </div>
47
+ </div>
48
+ </div>
49
+ </div>
50
+
51
+ <script>
52
+ $(document).ready(function() {
53
+ $("#messageArea").on("submit", function(event) {
54
+ const date = new Date();
55
+ const hour = date.getHours();
56
+ const minute = date.getMinutes();
57
+ const str_time = hour+":"+minute;
58
+ var rawText = $("#text").val();
59
+
60
+ var userHtml = '<div class="d-flex justify-content-end mb-4"><div class="msg_cotainer_send">' + rawText + '<span class="msg_time_send">'+ str_time + '</span></div><div class="img_cont_msg"><img src="https://i.ibb.co/d5b84Xw/Untitled-design.png" class="rounded-circle user_img_msg"></div></div>';
61
+
62
+ $("#text").val("");
63
+ $("#messageFormeight").append(userHtml);
64
+
65
+ $.ajax({
66
+ data: {
67
+ msg: rawText,
68
+ },
69
+ type: "POST",
70
+ url: "/get",
71
+ }).done(function(data) {
72
+ var botHtml = '<div class="d-flex justify-content-start mb-4"><div class="img_cont_msg"><img src="https://cdn-icons-png.flaticon.com/512/387/387569.png" class="rounded-circle user_img_msg"></div><div class="msg_cotainer">' + data + '<span class="msg_time">' + str_time + '</span></div></div>';
73
+ $("#messageFormeight").append($.parseHTML(botHtml));
74
+ });
75
+ event.preventDefault();
76
+ });
77
+ });
78
+ </script>
79
+
80
+ </body>
81
+ </html>