EdgarDataScientist commited on
Commit
05d0065
·
verified ·
1 Parent(s): a5dd441

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +118 -0
app.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """Customer_Care_ChatbotV1.ipynb
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1ICE_-Hl23GdbEO6Tt5VjvJ8XMyYC3Z3b
8
+ """
9
+
10
+
11
+
12
+
13
+
14
+ import torch
15
+ import fitz # PyMuPDF for PDF text extraction
16
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
17
+ from sentence_transformers import SentenceTransformer
18
+ import faiss
19
+ import gradio as gr
20
+ import os
21
+ from huggingface_hub import login
22
+
23
+ # Authenticate with Hugging Face Hub
24
+ pdf_path1 ='/content/Chrono 1.pdf'
25
+ pdf_path2 ='/content/Chrono 2.pdf'
26
+ pdf_path3 ='/content/Chrono 3.pdf'
27
+
28
+ # Load the Mistral 7B model and tokenizer
29
+ model_name = 'mistralai/Mistral-7B-Instruct-v0.3'
30
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
31
+ model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype=torch.float16)
32
+
33
+ # Load sentence transformer for embedding and similarity search
34
+ embedder = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
35
+
36
+ # Function to extract text from PDFs
37
+ def extract_text_from_pdf(pdf_file_path):
38
+ doc = fitz.open(pdf_file_path)
39
+ text = ""
40
+ for page in doc:
41
+ text += page.get_text("text")
42
+ return text
43
+
44
+ # Placeholder PDF knowledge base (Extracted content from PDFs)
45
+ pdf_knowledge_base = []
46
+ pdf_files = [pdf_path1, pdf_path2,pdf_path3] # Add the actual paths to your PDF files
47
+
48
+ for pdf_file in pdf_files:
49
+ pdf_text = extract_text_from_pdf(pdf_file)
50
+ pdf_knowledge_base.append({"document": pdf_file, "content": pdf_text})
51
+
52
+ # Combine extracted text with specific company information
53
+ knowledge_base = [
54
+ {"question": "How does DiabeTrek ensure data privacy and security?",
55
+ "answer": ("DiabeTrek ensures data privacy through multiple layers of protection, including data encryption during "
56
+ "transit and at rest. We comply with regulations like HIPAA and GDPR to safeguard your personal data.")},
57
+ {"question": "What are DiabeTrek's emergency guidelines?",
58
+ "answer": "DiabeTrek advises users to seek immediate medical attention in case of diabetes-related emergencies. This chatbot is not for emergency use."},
59
+ {"question": "What are DiabeTrek's mission, vision, and values?",
60
+ "answer": "DiabeTrek's mission is to improve the lives of people with diabetes through innovative AI-driven solutions. Our vision is a world where diabetes care is seamless, proactive, and accessible."},
61
+ # Additional items can be added here following the CEO's instructions
62
+ ]
63
+
64
+ # Create a FAISS index for efficient retrieval
65
+ embedding_dim = 384 # Output dimension of the MiniLM model
66
+ index = faiss.IndexFlatL2(embedding_dim)
67
+
68
+ # Create a list of embeddings and index them
69
+ knowledge_embeddings = []
70
+ for entry in knowledge_base:
71
+ embedding = embedder.encode(entry['question'], convert_to_tensor=False)
72
+ knowledge_embeddings.append(embedding)
73
+ index.add(embedding.reshape(1, -1))
74
+
75
+ # Create embeddings for PDF content and index them
76
+ for pdf_entry in pdf_knowledge_base:
77
+ embedding = embedder.encode(pdf_entry['content'], convert_to_tensor=False)
78
+ knowledge_embeddings.append(embedding)
79
+ index.add(embedding.reshape(1, -1))
80
+
81
+ # RAG Retrieval function
82
+ def retrieve_knowledge(question, top_k=1):
83
+ question_embedding = embedder.encode(question, convert_to_tensor=False)
84
+ D, I = index.search(question_embedding.reshape(1, -1), top_k)
85
+ results = [knowledge_base[idx] for idx in I[0]]
86
+ return results
87
+
88
+ # Chatbot function combining retrieval and generation
89
+ def customer_support_chatbot(user_input):
90
+ # Retrieve relevant knowledge
91
+ retrieved_knowledge = retrieve_knowledge(user_input)
92
+
93
+ # Prepare context for the generative model
94
+ context = " ".join([f"Q: {entry['question']} A: {entry['answer']}" for entry in retrieved_knowledge])
95
+
96
+ # Generate response using Mistral
97
+ prompt = f"Customer Question: {user_input}\n\n{context}\n\nResponse:"
98
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
99
+ outputs = model.generate(**inputs, max_length=150, do_sample=True, temperature=0.7)
100
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
101
+
102
+ return response
103
+
104
+ # Gradio UI
105
+ def gradio_interface(user_input):
106
+ response = customer_support_chatbot(user_input)
107
+ return response
108
+
109
+ # Build Gradio interface
110
+ interface = gr.Interface(fn=gradio_interface,
111
+ inputs="text",
112
+ outputs="text",
113
+ title="DiabeTrek Customer Support Chatbot",
114
+ description="Ask any question about DiabeTrek, its services, and policies.")
115
+
116
+ # Launch the Gradio app
117
+ interface.launch()
118
+