File size: 900 Bytes
79787b2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39

import pandas as pd
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import CharacterTextSplitter


def make_descriptions(table, tittle):

    df = pd.read_csv(table)
    columns = list(df.columns)

    table_description0 = {
        'path': 'random',
        'number': 1,
        'columns': ["clothes", "animals", "students"],
        'tittle': "fashionable student clothes"
    }

    table_description1 = {
        'path': table,
        'number': 2,
        'columns': columns,
        'tittle': tittle
    }

    table_descriptions = [table_description0, table_description1]
    return table_descriptions


def make_documens(pdf):
    loader = PyPDFLoader(pdf)
    documents = loader.load()

    text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=0, separator='\n')
    documents = text_splitter.split_documents(documents)
    return documents