Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -14,51 +14,6 @@ from langchain.document_loaders import PyPDFLoader, TextLoader, JSONLoader, CSVL
|
|
| 14 |
import tempfile # 임시 파일을 생성하기 위한 라이브러리입니다.
|
| 15 |
import os
|
| 16 |
|
| 17 |
-
{'image': {'creation_timestamp': 1675549016, 'uri': 'image_of_the_chat.jpg'},
|
| 18 |
-
'is_still_participant': True,
|
| 19 |
-
'joinable_mode': {'link': '', 'mode': 1},
|
| 20 |
-
'magic_words': [],
|
| 21 |
-
'messages': [{'content': 'Bye!',
|
| 22 |
-
'sender_name': 'User 2',
|
| 23 |
-
'timestamp_ms': 1675597571851},
|
| 24 |
-
{'content': 'Oh no worries! Bye',
|
| 25 |
-
'sender_name': 'User 1',
|
| 26 |
-
'timestamp_ms': 1675597435669},
|
| 27 |
-
{'content': 'No Im sorry it was my mistake, the blue one is not '
|
| 28 |
-
'for sale',
|
| 29 |
-
'sender_name': 'User 2',
|
| 30 |
-
'timestamp_ms': 1675596277579},
|
| 31 |
-
{'content': 'I thought you were selling the blue one!',
|
| 32 |
-
'sender_name': 'User 1',
|
| 33 |
-
'timestamp_ms': 1675595140251},
|
| 34 |
-
{'content': 'Im not interested in this bag. Im interested in the '
|
| 35 |
-
'blue one!',
|
| 36 |
-
'sender_name': 'User 1',
|
| 37 |
-
'timestamp_ms': 1675595109305},
|
| 38 |
-
{'content': 'Here is $129',
|
| 39 |
-
'sender_name': 'User 2',
|
| 40 |
-
'timestamp_ms': 1675595068468},
|
| 41 |
-
{'photos': [{'creation_timestamp': 1675595059,
|
| 42 |
-
'uri': 'url_of_some_picture.jpg'}],
|
| 43 |
-
'sender_name': 'User 2',
|
| 44 |
-
'timestamp_ms': 1675595060730},
|
| 45 |
-
{'content': 'Online is at least $100',
|
| 46 |
-
'sender_name': 'User 2',
|
| 47 |
-
'timestamp_ms': 1675595045152},
|
| 48 |
-
{'content': 'How much do you want?',
|
| 49 |
-
'sender_name': 'User 1',
|
| 50 |
-
'timestamp_ms': 1675594799696},
|
| 51 |
-
{'content': 'Goodmorning! $50 is too low.',
|
| 52 |
-
'sender_name': 'User 2',
|
| 53 |
-
'timestamp_ms': 1675577876645},
|
| 54 |
-
{'content': 'Hi! Im interested in your bag. Im offering $50. Let '
|
| 55 |
-
'me know if you are interested. Thanks!',
|
| 56 |
-
'sender_name': 'User 1',
|
| 57 |
-
'timestamp_ms': 1675549022673}],
|
| 58 |
-
'participants': [{'name': 'User 1'}, {'name': 'User 2'}],
|
| 59 |
-
'thread_path': 'inbox/User 1 and User 2 chat',
|
| 60 |
-
'title': 'User 1 and User 2 chat'}
|
| 61 |
-
|
| 62 |
|
| 63 |
# PDF 문서로부터 텍스트를 추출하는 함수입니다.
|
| 64 |
def get_pdf_text(pdf_docs):
|
|
@@ -97,18 +52,11 @@ def get_json_file(json_docs):
|
|
| 97 |
temp_filepath = os.path.join(temp_dir.name, json_docs.name)
|
| 98 |
with open(temp_filepath, "wb") as f:
|
| 99 |
f.write(json_docs.getvalue())
|
| 100 |
-
loader = JSONLoader(
|
| 101 |
-
file_path='./example_data/facebook_chat.json',
|
| 102 |
-
jq_schema='.messages[].content',
|
| 103 |
-
text_content=False)
|
| 104 |
-
|
| 105 |
-
data = loader.load()
|
| 106 |
json_loader = JSONLoader(temp_filepath)
|
| 107 |
json_doc = json_loader.load()
|
| 108 |
return json_doc
|
| 109 |
|
| 110 |
|
| 111 |
-
|
| 112 |
# 문서들을 처리하여 텍스트 청크로 나누는 함수입니다.
|
| 113 |
def get_text_chunks(documents):
|
| 114 |
text_splitter = RecursiveCharacterTextSplitter(
|
|
|
|
| 14 |
import tempfile # 임시 파일을 생성하기 위한 라이브러리입니다.
|
| 15 |
import os
|
| 16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
|
| 18 |
# PDF 문서로부터 텍스트를 추출하는 함수입니다.
|
| 19 |
def get_pdf_text(pdf_docs):
|
|
|
|
| 52 |
temp_filepath = os.path.join(temp_dir.name, json_docs.name)
|
| 53 |
with open(temp_filepath, "wb") as f:
|
| 54 |
f.write(json_docs.getvalue())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
json_loader = JSONLoader(temp_filepath)
|
| 56 |
json_doc = json_loader.load()
|
| 57 |
return json_doc
|
| 58 |
|
| 59 |
|
|
|
|
| 60 |
# 문서들을 처리하여 텍스트 청크로 나누는 함수입니다.
|
| 61 |
def get_text_chunks(documents):
|
| 62 |
text_splitter = RecursiveCharacterTextSplitter(
|