mgreg555 commited on
Commit
4c99da8
·
verified ·
1 Parent(s): a286ae5

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -158
app.py DELETED
@@ -1,158 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
- """Doc_chat_vegleges.ipynb
3
-
4
- Automatically generated by Colaboratory.
5
-
6
- Original file is located at
7
- https://colab.research.google.com/drive/1G34ZCuupJZxNy-CFxjMNIa4_I3jynKqC
8
-
9
- # Setting up environment
10
- """
11
-
12
- from PyPDF2 import PdfReader
13
- from langchain.embeddings.openai import OpenAIEmbeddings
14
- from langchain.text_splitter import CharacterTextSplitter
15
- from langchain.vectorstores import ElasticVectorSearch, Pinecone, Weaviate, FAISS
16
-
17
- # Get your API keys from openai, you will need to create an account.
18
- # Here is the link to get the keys: https://platform.openai.com/account/billing/overview
19
- import os
20
-
21
- """# Preprocessing document"""
22
-
23
- # location of the pdf file/files.
24
- reader = PdfReader('constitution.pdf')
25
- #reader = PdfReader('/content/WOW.pdf')
26
- #reader = PdfReader('/content/the_little_prince.pdf')
27
-
28
- # read data from the file
29
- raw_text = ''
30
- for i, page in enumerate(reader.pages):
31
- text = page.extract_text()
32
- if text:
33
- raw_text += text
34
-
35
- # We need to split the text that we read into smaller chunks so that during information retreival we don't hit the token size limits.
36
-
37
- text_splitter = CharacterTextSplitter(
38
- separator = "\n",
39
- chunk_size = 1000,
40
- chunk_overlap = 200,
41
- length_function = len,
42
- )
43
- texts = text_splitter.split_text(raw_text)
44
-
45
- len(texts)
46
-
47
- """## Setting up doc search"""
48
-
49
- embeddings = OpenAIEmbeddings()
50
- doc_search = FAISS.from_texts(texts, embeddings)
51
-
52
- """# Setting up chatbot"""
53
-
54
- from langchain.chains.question_answering import load_qa_chain
55
- from langchain.memory import ConversationBufferWindowMemory
56
- from langchain.prompts import PromptTemplate
57
- from langchain_openai import OpenAI
58
-
59
- template = """You are a chatbot having a conversation with a human.
60
-
61
- Given the following extracted parts of a long document and a question, create a final answer based on the document ONLY and NOTHING else.
62
- Any questions outside of the document is irrelevant and you certanly dont know!
63
-
64
- {context}
65
-
66
- {chat_history}
67
- Human: {human_input}
68
- Chatbot:"""
69
-
70
- prompt = PromptTemplate(
71
- input_variables=["chat_history", "human_input", "context"], template=template
72
- )
73
-
74
- memory = ConversationBufferWindowMemory(memory_key="chat_history", input_key="human_input",k=3)
75
- chain = load_qa_chain( OpenAI(), chain_type="stuff", memory=memory, prompt=prompt)
76
-
77
- """## The chatbot should know the answer"""
78
-
79
- query = "Who wrote the constitution?"
80
- docs = doc_search.similarity_search(query)
81
-
82
- chain({"input_documents": docs, "human_input": query}, return_only_outputs=True)['output_text']
83
-
84
- # query = "Acronyms?"
85
- # docs = doc_search.similarity_search(query)
86
-
87
- # chain({"input_documents": docs, "human_input": query}, return_only_outputs=True)['output_text']
88
-
89
- # query = "Say 3 of them"
90
- # docs = doc_search.similarity_search(query)
91
-
92
- # chain({"input_documents": docs, "human_input": query}, return_only_outputs=True)['output_text']
93
-
94
- """## The chatbot should not know the answer."""
95
-
96
- query = "What is the capital of France?"
97
- docs = doc_search.similarity_search(query)
98
-
99
- chain({"input_documents": docs, "human_input": query}, return_only_outputs=True)['output_text']
100
-
101
- """## Previous chatbot (deprecated)"""
102
-
103
- #print(chain.memory.buffer)
104
-
105
- # from langchain.chains.question_answering import load_qa_chain
106
- # from langchain.llms import OpenAI
107
-
108
- # embeddings = OpenAIEmbeddings()
109
- # doc_search = FAISS.from_texts(texts, embeddings)
110
-
111
- # chain = load_qa_chain(OpenAI(), chain_type="stuff")
112
-
113
- # query = "Who wrote the constitution?"
114
-
115
-
116
- # answer = chain.run(input_documents=docs, question=query)
117
- # print(answer)
118
-
119
- # query = "What is the capital of france?"
120
- # answer = chain.run(input_documents=docs, question=query)
121
- # print(answer)
122
-
123
- """# Demo
124
-
125
- ## Setting up methods
126
- """
127
-
128
- def chat(query,history):
129
- docs = doc_search.similarity_search(query)
130
- return chain({"input_documents": docs, "human_input": query}, return_only_outputs=True)['output_text']
131
-
132
- """## Setting up UI with gradio"""
133
-
134
- import gradio as gr
135
-
136
- css = """
137
- body {
138
- background-color: #FFFFFF; /* White background */
139
- }
140
-
141
- .gradio-chat-interface, .gradio-chat-input {
142
- background-color: #E0FFFF; /* Light blue-green background */
143
- }
144
- """
145
-
146
- gr.ChatInterface(
147
- chat,
148
- chatbot=gr.Chatbot(height=500),
149
- title="Doc-chat",
150
- description="Ask about the constitution!",
151
- theme="soft",
152
- examples=["Who wrote the constitution?","What is the capital of France?"],
153
- cache_examples=True,
154
- retry_btn=None,
155
- undo_btn="Delete Previous",
156
- clear_btn="Clear",
157
- css=css
158
- ).launch()