haksoo commited on
Commit
f755f5a
ยท
1 Parent(s): a877e66

Upload 3 files

Browse files
Files changed (4) hide show
  1. .gitattributes +1 -0
  2. 2020_emaster_keynote.pdf +3 -0
  3. app.py +66 -0
  4. requirements.txt +8 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ 2020_emaster_keynote.pdf filter=lfs diff=lfs merge=lfs -text
2020_emaster_keynote.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8dc98802d24d9c1f51d53f5c082241e68e29a7212661c626afc32a2fa6d37ec0
3
+ size 4046421
app.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from dotenv import load_dotenv
3
+ load_dotenv()
4
+ from langchain.document_loaders import PyPDFLoader
5
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
6
+ from langchain.vectorstores import Chroma
7
+ from langchain.embeddings import OpenAIEmbeddings
8
+ from langchain.chat_models import ChatOpenAI
9
+ from langchain.retrievers.multi_query import MultiQueryRetriever
10
+ from langchain.chains import RetrievalQA
11
+
12
+ #Loader
13
+ loader = PyPDFLoader("2020_emaster_keynote.pdf")
14
+ pages = loader.load_and_split()
15
+
16
+ #Split
17
+ text_splitter = RecursiveCharacterTextSplitter(
18
+ # Set a really small chunk size, just to show.
19
+ chunk_size = 300,
20
+ chunk_overlap = 20,
21
+ length_function = len,
22
+ is_separator_regex = False,
23
+ )
24
+ texts = text_splitter.split_documents(pages)
25
+
26
+ #Embedding
27
+ embeddings_model = OpenAIEmbeddings()
28
+
29
+ # load it into Chroma
30
+ db = Chroma.from_documents(texts, embeddings_model)
31
+
32
+ # question = "๋ฐ์ดํ„ฐ๋ฒ ์ด์Šค์˜ ์ •์˜?"
33
+ llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0.5)
34
+ qa_chain = RetrievalQA.from_chain_type(llm,retriever=db.as_retriever())
35
+
36
+
37
+ # ์ธํ„ฐํŽ˜์ด์Šค๋ฅผ ์ƒ์„ฑ.
38
+ with gr.Blocks() as demo:
39
+ gr.Image('images/emaster.png')
40
+
41
+ gr.Text('''
42
+ ์ด๋Ÿฐ ์งˆ๋ฌธ ์–ด๋– ์„ธ์š”?
43
+ ๋ฐ์ดํ„ฐ๋ฒ ์ด์Šค์˜ ์ •์˜
44
+ ''')
45
+ chatbot = gr.Chatbot(label="์ •๋ณด์ฒ˜๋ฆฌ์‚ฐ์—…๊ธฐ์‚ฌ์ฑ—๋ด‡") # ์ฒญ๋…„์ •์ฑ…์ฑ—๋ด‡ ๋ ˆ์ด๋ธ”์„ ์ขŒ์ธก ์ƒ๋‹จ์— ๊ตฌ์„ฑ
46
+ msg = gr.Textbox(label="์งˆ๋ฌธํ•ด์ฃผ์„ธ์š”!") # ํ•˜๋‹จ์˜ ์ฑ„ํŒ…์ฐฝ์˜ ๋ ˆ์ด๋ธ”
47
+ clear = gr.Button("๋Œ€ํ™” ์ดˆ๊ธฐํ™”") # ๋Œ€ํ™” ์ดˆ๊ธฐํ™” ๋ฒ„ํŠผ
48
+
49
+ # ์ฑ—๋ด‡์˜ ๋‹ต๋ณ€์„ ์ฒ˜๋ฆฌํ•˜๋Š” ํ•จ์ˆ˜
50
+ def respond(message, chat_history):
51
+ result = qa_chain({"query": message})
52
+ # result = qa_chain(message)
53
+ bot_message = result['result']
54
+
55
+ # ์ฑ„ํŒ… ๊ธฐ๋ก์— ์‚ฌ์šฉ์ž์˜ ๋ฉ”์‹œ์ง€์™€ ๋ด‡์˜ ์‘๋‹ต์„ ์ถ”๊ฐ€.
56
+ chat_history.append((message, bot_message))
57
+ return "", chat_history
58
+
59
+ # ์‚ฌ์šฉ์ž์˜ ์ž…๋ ฅ์„ ์ œ์ถœ(submit)ํ•˜๋ฉด respond ํ•จ์ˆ˜๊ฐ€ ํ˜ธ์ถœ.
60
+ msg.submit(respond, [msg, chatbot], [msg, chatbot])
61
+
62
+ # '์ดˆ๊ธฐํ™”' ๋ฒ„ํŠผ์„ ํด๋ฆญํ•˜๋ฉด ์ฑ„ํŒ… ๊ธฐ๋ก์„ ์ดˆ๊ธฐํ™”.
63
+ clear.click(lambda: None, None, chatbot, queue=False)
64
+
65
+ # ์ธํ„ฐํŽ˜์ด์Šค ์‹คํ–‰.
66
+ demo.launch(debug=True, share=True)
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ langchain
2
+ python-dotenv
3
+ pypdf
4
+ cryptography
5
+ openai
6
+ chromadb
7
+ tiktoken
8
+ gradio