Nihal2000 commited on
Commit
ed42a41
·
1 Parent(s): 8ba2581

delated storage

Browse files
Files changed (1) hide show
  1. core/storage.py +0 -58
core/storage.py DELETED
@@ -1,58 +0,0 @@
1
- import chromadb
2
- import os
3
- from mistralai import Mistral
4
- import config
5
-
6
- # Initialize ChromaDB client (persistent directory can be set via CHROMA_DB_DIR)
7
- chroma_db_path = os.getenv("CHROMA_DB_DIR", "db/")
8
- client = chromadb.Client()
9
- collection = client.get_or_create_collection("documents")
10
-
11
- # Use Mistral API for embeddings
12
-
13
- def get_mistral_embedding(text: str) -> list[float]:
14
- """
15
- Get embedding for the given text using Mistral API.
16
- """
17
- with Mistral(api_key=config.MISTRAL_API_KEY) as client:
18
- response = client.embeddings.create(
19
- model="mistral-embed",
20
- input=text
21
- )
22
- # The API returns a list of embeddings (one per input)
23
- return response['data'][0]['embedding']
24
-
25
-
26
- def add_document(doc_id: str, text: str, metadata: dict):
27
- """
28
- Add a document's text and metadata to the ChromaDB collection.
29
- """
30
- embedding = get_mistral_embedding(text)
31
- collection.add(ids=[doc_id], embeddings=[embedding], documents=[text], metadatas=[metadata])
32
- # Persist to disk
33
- client.persist()
34
- return True
35
-
36
-
37
- def search_documents(query: str, top_k: int = 5) -> dict:
38
- """
39
- Search for documents semantically similar to the query.
40
- Returns a dictionary of top results.
41
- """
42
- query_vec = get_mistral_embedding(query)
43
- results = collection.query(query_embeddings=[query_vec], n_results=top_k,
44
- include=['ids','distances','documents','metadatas'])
45
- return results
46
-
47
-
48
- def get_all_documents() -> list:
49
- """
50
- Retrieve metadata for all documents in the collection.
51
- """
52
- all_ids = collection.get()['ids']
53
- docs = []
54
- for doc_id in all_ids:
55
- res = collection.get(ids=[doc_id])
56
- if res and res['metadatas']:
57
- docs.append({"id": doc_id, "metadata": res['metadatas'][0]})
58
- return docs