Spaces:
Runtime error
Runtime error
matt HOFFNER
commited on
Commit
Β·
5e14bd6
1
Parent(s):
81c1854
use chromadb to run in browser
Browse files- package-lock.json +2 -3
- package.json +1 -1
- src/pages/api/docHandle.ts +5 -7
- src/utils/file-handler.ts +0 -76
package-lock.json
CHANGED
|
@@ -15,6 +15,7 @@
|
|
| 15 |
"@types/react": "18.2.6",
|
| 16 |
"@types/react-dom": "18.2.4",
|
| 17 |
"@xenova/transformers": "^2.1.1",
|
|
|
|
| 18 |
"dexie": "^3.2.4",
|
| 19 |
"eslint": "8.40.0",
|
| 20 |
"eslint-config-next": "13.4.2",
|
|
@@ -2212,9 +2213,7 @@
|
|
| 2212 |
"node_modules/chromadb": {
|
| 2213 |
"version": "1.5.2",
|
| 2214 |
"resolved": "https://registry.npmjs.org/chromadb/-/chromadb-1.5.2.tgz",
|
| 2215 |
-
"integrity": "sha512-x/rOD7Oo1RiYA+vPK+Ma7CliCHlx26OjUt5J7Z9HZ5Ud1qDrPlvctBycK9Il3zqza96yeUoPQ7gCXHVKNoyvRQ=="
|
| 2216 |
-
"optional": true,
|
| 2217 |
-
"peer": true
|
| 2218 |
},
|
| 2219 |
"node_modules/client-only": {
|
| 2220 |
"version": "0.0.1",
|
|
|
|
| 15 |
"@types/react": "18.2.6",
|
| 16 |
"@types/react-dom": "18.2.4",
|
| 17 |
"@xenova/transformers": "^2.1.1",
|
| 18 |
+
"chromadb": "^1.5.2",
|
| 19 |
"dexie": "^3.2.4",
|
| 20 |
"eslint": "8.40.0",
|
| 21 |
"eslint-config-next": "13.4.2",
|
|
|
|
| 2213 |
"node_modules/chromadb": {
|
| 2214 |
"version": "1.5.2",
|
| 2215 |
"resolved": "https://registry.npmjs.org/chromadb/-/chromadb-1.5.2.tgz",
|
| 2216 |
+
"integrity": "sha512-x/rOD7Oo1RiYA+vPK+Ma7CliCHlx26OjUt5J7Z9HZ5Ud1qDrPlvctBycK9Il3zqza96yeUoPQ7gCXHVKNoyvRQ=="
|
|
|
|
|
|
|
| 2217 |
},
|
| 2218 |
"node_modules/client-only": {
|
| 2219 |
"version": "0.0.1",
|
package.json
CHANGED
|
@@ -15,11 +15,11 @@
|
|
| 15 |
"@types/react": "18.2.6",
|
| 16 |
"@types/react-dom": "18.2.4",
|
| 17 |
"@xenova/transformers": "^2.1.1",
|
|
|
|
| 18 |
"dexie": "^3.2.4",
|
| 19 |
"eslint": "8.40.0",
|
| 20 |
"eslint-config-next": "13.4.2",
|
| 21 |
"fs-extra": "^11.1.1",
|
| 22 |
-
"hnswlib-node": "^1.4.2",
|
| 23 |
"langchain": "^0.0.90",
|
| 24 |
"next": "13.4.2",
|
| 25 |
"pdfjs-dist": "^3.7.107",
|
|
|
|
| 15 |
"@types/react": "18.2.6",
|
| 16 |
"@types/react-dom": "18.2.4",
|
| 17 |
"@xenova/transformers": "^2.1.1",
|
| 18 |
+
"chromadb": "^1.5.2",
|
| 19 |
"dexie": "^3.2.4",
|
| 20 |
"eslint": "8.40.0",
|
| 21 |
"eslint-config-next": "13.4.2",
|
| 22 |
"fs-extra": "^11.1.1",
|
|
|
|
| 23 |
"langchain": "^0.0.90",
|
| 24 |
"next": "13.4.2",
|
| 25 |
"pdfjs-dist": "^3.7.107",
|
src/pages/api/docHandle.ts
CHANGED
|
@@ -1,15 +1,14 @@
|
|
| 1 |
import type { NextApiRequest, NextApiResponse } from 'next';
|
| 2 |
-
import {
|
| 3 |
-
vectorStoreToHNSWLibModel,
|
| 4 |
-
} from '@/utils/file-handler';
|
| 5 |
import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
|
| 6 |
-
import {
|
| 7 |
import XenovaTransformersEmbeddings from '../../embed/hf'
|
| 8 |
|
| 9 |
async function handleDocs(text: string) {
|
| 10 |
const textSplitter = new RecursiveCharacterTextSplitter({ chunkSize: 1000 });
|
| 11 |
const docs = await textSplitter.createDocuments([text]);
|
| 12 |
-
const vectorStore = await
|
|
|
|
|
|
|
| 13 |
return vectorStore;
|
| 14 |
}
|
| 15 |
|
|
@@ -25,9 +24,8 @@ export default async function handler(
|
|
| 25 |
}
|
| 26 |
|
| 27 |
const vectorStore = await handleDocs(text);
|
| 28 |
-
const model = await vectorStoreToHNSWLibModel(vectorStore);
|
| 29 |
res.status(200).send({
|
| 30 |
-
|
| 31 |
});
|
| 32 |
}
|
| 33 |
|
|
|
|
| 1 |
import type { NextApiRequest, NextApiResponse } from 'next';
|
|
|
|
|
|
|
|
|
|
| 2 |
import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
|
| 3 |
+
import { Chroma } from "langchain/vectorstores/chroma";
|
| 4 |
import XenovaTransformersEmbeddings from '../../embed/hf'
|
| 5 |
|
| 6 |
async function handleDocs(text: string) {
|
| 7 |
const textSplitter = new RecursiveCharacterTextSplitter({ chunkSize: 1000 });
|
| 8 |
const docs = await textSplitter.createDocuments([text]);
|
| 9 |
+
const vectorStore = await Chroma.fromDocuments(docs, new XenovaTransformersEmbeddings(), {
|
| 10 |
+
collectionName: 'docs'
|
| 11 |
+
});
|
| 12 |
return vectorStore;
|
| 13 |
}
|
| 14 |
|
|
|
|
| 24 |
}
|
| 25 |
|
| 26 |
const vectorStore = await handleDocs(text);
|
|
|
|
| 27 |
res.status(200).send({
|
| 28 |
+
model: vectorStore,
|
| 29 |
});
|
| 30 |
}
|
| 31 |
|
src/utils/file-handler.ts
DELETED
|
@@ -1,76 +0,0 @@
|
|
| 1 |
-
import type XenovaTransformersEmbeddings from '@/embed/hf';
|
| 2 |
-
// import { HuggingFaceInferenceEmbeddings } from 'langchain/embeddings/hf';
|
| 3 |
-
import fs from 'fs-extra';
|
| 4 |
-
import {
|
| 5 |
-
HNSWLib,
|
| 6 |
-
type HNSWLib as StoreTypeHNSWLib,
|
| 7 |
-
} from 'langchain/vectorstores/hnswlib';
|
| 8 |
-
import path from 'path';
|
| 9 |
-
|
| 10 |
-
const ifDev = process.env.NODE_ENV === 'development';
|
| 11 |
-
// in prod mode, only allowed to write to /tmp/
|
| 12 |
-
// https://vercel.com/guides/how-can-i-use-files-in-serverless-functions
|
| 13 |
-
export const storesDir = ifDev ? 'tmp/hnswlib-stores' : '/tmp/hnswlib-stores';
|
| 14 |
-
|
| 15 |
-
type HNSWLibModel = {
|
| 16 |
-
args: string;
|
| 17 |
-
docstore: string;
|
| 18 |
-
hnswlibIndex: string;
|
| 19 |
-
};
|
| 20 |
-
|
| 21 |
-
const HNSWLibModelFilesName = {
|
| 22 |
-
args: 'args.json',
|
| 23 |
-
docstore: 'docstore.json',
|
| 24 |
-
hnswlibIndex: 'hnswlib.index',
|
| 25 |
-
};
|
| 26 |
-
|
| 27 |
-
// looking forward to a better way to transfrom hnswlibStore <=> indexes
|
| 28 |
-
export async function HNSWLibModelToVectorStore(
|
| 29 |
-
model: HNSWLibModel,
|
| 30 |
-
embeddings: XenovaTransformersEmbeddings,
|
| 31 |
-
) {
|
| 32 |
-
await saveHNSWLibModelToLocal(model);
|
| 33 |
-
// load from dir
|
| 34 |
-
const vectorStore = await HNSWLib.load(storesDir, embeddings);
|
| 35 |
-
return vectorStore;
|
| 36 |
-
}
|
| 37 |
-
|
| 38 |
-
export async function saveHNSWLibModelToLocal(model: HNSWLibModel) {
|
| 39 |
-
// save model to /tmp/
|
| 40 |
-
await Promise.all(
|
| 41 |
-
Object.keys(HNSWLibModelFilesName).map((key) => {
|
| 42 |
-
const fullPath = path.join(
|
| 43 |
-
storesDir,
|
| 44 |
-
(HNSWLibModelFilesName as Record<string, string>)[key],
|
| 45 |
-
);
|
| 46 |
-
console.log(fullPath);
|
| 47 |
-
const data = (model as Record<string, string>)[key];
|
| 48 |
-
console.log(data);
|
| 49 |
-
|
| 50 |
-
return fs.writeFile(fullPath, data);
|
| 51 |
-
}),
|
| 52 |
-
);
|
| 53 |
-
}
|
| 54 |
-
|
| 55 |
-
export async function vectorStoreToHNSWLibModel(
|
| 56 |
-
store: StoreTypeHNSWLib,
|
| 57 |
-
): Promise<HNSWLibModel> {
|
| 58 |
-
await store.save(storesDir);
|
| 59 |
-
return await readHNSWLibModelFromLocal();
|
| 60 |
-
}
|
| 61 |
-
|
| 62 |
-
export async function readHNSWLibModelFromLocal(): Promise<HNSWLibModel> {
|
| 63 |
-
const [args, docstore, hnswlibIndex] = await Promise.all([
|
| 64 |
-
fs.readFile(path.join(storesDir, HNSWLibModelFilesName.args), 'utf-8'),
|
| 65 |
-
fs.readFile(path.join(storesDir, HNSWLibModelFilesName.docstore), 'utf-8'),
|
| 66 |
-
fs.readFile(
|
| 67 |
-
path.join(storesDir, HNSWLibModelFilesName.hnswlibIndex),
|
| 68 |
-
'hex',
|
| 69 |
-
),
|
| 70 |
-
]);
|
| 71 |
-
return {
|
| 72 |
-
args,
|
| 73 |
-
docstore,
|
| 74 |
-
hnswlibIndex,
|
| 75 |
-
};
|
| 76 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|