jglowa commited on
Commit
77fe7fe
verified
1 Parent(s): cb19c73

Update indeksator.cmd

Browse files
Files changed (1) hide show
  1. indeksator.cmd +7 -5
indeksator.cmd CHANGED
@@ -6,13 +6,14 @@
6
  :; dbFile="prosty-rag.db"
7
  :; chunkWords=200
8
  :; overlapWords=10
9
- :; declare -a buf
10
  :; for ((i = 0; i < overlapWords; i++)); do buf[i]=""; done
11
  :; > $chunksFile
12
- :; [ ! -f $inputDir ] && echo Pobieranie przyk艂adowego pliku $inputDir/wikipedia.txt... && curl --create-dirs -Lo $inputDir/wikipedia.txt https://huggingface.co/jglowa/prosty-rag/resolve/main/baza/wikipedia.txt?download=true && echo Gotowe!
13
- :; [ ! -f $embedfile ] && echo Pobieranie $embedfile... && curl -Lo $embedfile https://huggingface.co/jglowa/prosty-rag/resolve/main/bge-m3.llamafile?download=true && chmod +x $embedfile && echo Gotowe!
14
  :; [ ! -f pdftotext ] && echo Pobieranie pdftotext... && curl -LO https://dl.xpdfreader.com/xpdf-tools-linux-4.05.tar.gz && tar --strip-components 2 -xzf xpdf-tools-linux-4.05.tar.gz xpdf-tools-linux-4.05/bin64/pdftotext && del xpdf-tools-linux-4.05.tar.gz && echo Gotowe!
15
  :; echo "Indeksowanie plik贸w PDF/TXT/MD w folderze $inputDir..."
 
16
  :; for pdf in $inputDir/*.pdf; do echo "Konwertowanie $(basename "$pdf")..." && pdftotext -nopgbrk -enc UTF-8 "$pdf"; done
17
  :; for file in $inputDir/*.txt $inputDir/*.md; do
18
  :; echo "Przetwarzanie $(basename "$file")..."
@@ -37,7 +38,8 @@
37
  :; echo "Osadzanie plik贸w..."
38
  :; [ -f $dbFile ] && rm $dbFile
39
  :; ./$embedfile import $chunksFile $dbFile && echo "Gotowe! Po ka偶dej zmianie w folderze $inputDir nale偶y uruchomi膰 ponownie indeksator."
40
- :; rm "$chunksFile"; exit $?
 
41
  :; # Windows:
42
  @echo off
43
  setlocal enabledelayedexpansion
@@ -50,7 +52,7 @@ set overlapWords=10
50
  for /l %%i in (1,1,%overlapWords%) do set buf[%%i]=
51
  break>%chunksFile%
52
  if not exist %inputDir% echo Pobieranie przyk艂adowego pliku %inputDir%\wikipedia.txt... && curl --create-dirs -Lo %inputDir%\wikipedia.txt https://huggingface.co/jglowa/prosty-rag/resolve/main/baza/wikipedia.txt?download=true && echo Gotowe^!
53
- if not exist %embedfile% echo Pobieranie %embedfile%... && curl -Lo %embedfile% https://huggingface.co/jglowa/prosty-rag/resolve/main/bge-m3.embedfile?download=true && echo Gotowe^!
54
  if not exist pdftotext.exe echo Pobieranie pdftotext.exe... && curl -LO https://dl.xpdfreader.com/xpdf-tools-win-4.05.zip && tar --strip-components 2 -xf xpdf-tools-win-4.05.zip xpdf-tools-win-4.05/bin64/pdftotext.exe && del xpdf-tools-win-4.05.zip && echo Gotowe^!
55
  echo Indeksowanie plik贸w PDF/TXT/MD w folderze %inputDir%...
56
  for %%F in ("%inputDir%\*.pdf") do if not exist "%%~dpnF.txt" echo Konwertowanie %%~nxF... && pdftotext -nopgbrk -enc UTF-8 "%%~F"
 
6
  :; dbFile="prosty-rag.db"
7
  :; chunkWords=200
8
  :; overlapWords=10
9
+ :; buf=()
10
  :; for ((i = 0; i < overlapWords; i++)); do buf[i]=""; done
11
  :; > $chunksFile
12
+ :; [ ! -d $inputDir ] && echo Pobieranie przyk艂adowego pliku $inputDir/wikipedia.txt... && curl --create-dirs -Lo $inputDir/wikipedia.txt https://huggingface.co/jglowa/prosty-rag/resolve/main/baza/wikipedia.txt?download=true && echo Gotowe!
13
+ :; [ ! -f $embedfile ] && echo Pobieranie $embedfile... && curl -Lo $embedfile https://huggingface.co/asg017/embedfile/resolve/refs%2Fpr%2F2/bge-m3.embedfile?download=true && chmod +x $embedfile && echo Gotowe!
14
  :; [ ! -f pdftotext ] && echo Pobieranie pdftotext... && curl -LO https://dl.xpdfreader.com/xpdf-tools-linux-4.05.tar.gz && tar --strip-components 2 -xzf xpdf-tools-linux-4.05.tar.gz xpdf-tools-linux-4.05/bin64/pdftotext && del xpdf-tools-linux-4.05.tar.gz && echo Gotowe!
15
  :; echo "Indeksowanie plik贸w PDF/TXT/MD w folderze $inputDir..."
16
+ :; shopt -s nullglob
17
  :; for pdf in $inputDir/*.pdf; do echo "Konwertowanie $(basename "$pdf")..." && pdftotext -nopgbrk -enc UTF-8 "$pdf"; done
18
  :; for file in $inputDir/*.txt $inputDir/*.md; do
19
  :; echo "Przetwarzanie $(basename "$file")..."
 
38
  :; echo "Osadzanie plik贸w..."
39
  :; [ -f $dbFile ] && rm $dbFile
40
  :; ./$embedfile import $chunksFile $dbFile && echo "Gotowe! Po ka偶dej zmianie w folderze $inputDir nale偶y uruchomi膰 ponownie indeksator."
41
+ :; # rm $chunksFile
42
+ :; exit $?
43
  :; # Windows:
44
  @echo off
45
  setlocal enabledelayedexpansion
 
52
  for /l %%i in (1,1,%overlapWords%) do set buf[%%i]=
53
  break>%chunksFile%
54
  if not exist %inputDir% echo Pobieranie przyk艂adowego pliku %inputDir%\wikipedia.txt... && curl --create-dirs -Lo %inputDir%\wikipedia.txt https://huggingface.co/jglowa/prosty-rag/resolve/main/baza/wikipedia.txt?download=true && echo Gotowe^!
55
+ if not exist %embedfile% echo Pobieranie %embedfile%... && curl -Lo %embedfile% https://huggingface.co/asg017/embedfile/resolve/refs%2Fpr%2F2/bge-m3.embedfile?download=true && echo Gotowe^!
56
  if not exist pdftotext.exe echo Pobieranie pdftotext.exe... && curl -LO https://dl.xpdfreader.com/xpdf-tools-win-4.05.zip && tar --strip-components 2 -xf xpdf-tools-win-4.05.zip xpdf-tools-win-4.05/bin64/pdftotext.exe && del xpdf-tools-win-4.05.zip && echo Gotowe^!
57
  echo Indeksowanie plik贸w PDF/TXT/MD w folderze %inputDir%...
58
  for %%F in ("%inputDir%\*.pdf") do if not exist "%%~dpnF.txt" echo Konwertowanie %%~nxF... && pdftotext -nopgbrk -enc UTF-8 "%%~F"