Waflon commited on
Commit
3930a90
·
verified ·
1 Parent(s): 720274c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -6
app.py CHANGED
@@ -28,12 +28,18 @@ def add_data(chain):
28
 
29
  chain = get_data()
30
  if chain == 'dummy':
31
- loader = PyPDFLoader("https://www.sii.cl/normativa_legislacion/circulares/2024/circu3.pdf")
32
- data = loader.load()
33
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
34
- #Transformado a tipo de dato especifico para esto
35
- docs = text_splitter.split_documents(data) # 'data' holds the text you want to split, split the text into documents using the text splitter.
36
-
 
 
 
 
 
 
37
  #Modelo QA sentence similarity
38
  modelPath = 'sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2' #español
39
  model_kwargs = {'device':'cpu'} # o cuda
 
28
 
29
  chain = get_data()
30
  if chain == 'dummy':
31
+ loaders = [
32
+ PyPDFLoader("https://www.sii.cl/normativa_legislacion/circulares/2024/circu3.pdf"),
33
+ PyPDFLoader("https://www.sii.cl/normativa_legislacion/circulares/2024/circu2.pdf"),
34
+ PyPDFLoader("https://www.sii.cl/normativa_legislacion/circulares/2024/circu1.pdf"),
35
+ ]
36
+ docs = []
37
+ for loader in loaders:
38
+ docs.extend(loader.load())
39
+
40
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000)
41
+ docs = text_splitter.split_documents(docs)
42
+
43
  #Modelo QA sentence similarity
44
  modelPath = 'sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2' #español
45
  model_kwargs = {'device':'cpu'} # o cuda