shivareddy-03 commited on
Commit
3f265ad
·
1 Parent(s): 6796b92

Fix LangChain import compatibility for space runtime

Browse files
Files changed (2) hide show
  1. csv_result.py +1 -1
  2. functions/data_to_vectors.py +9 -43
csv_result.py CHANGED
@@ -1,6 +1,6 @@
1
  # 1️⃣ Imports
2
  import pandas as pd
3
- from langchain.schema import Document
4
  import os
5
  from dotenv import load_dotenv
6
  from functions.data_to_vectors import create_vectorstore
 
1
  # 1️⃣ Imports
2
  import pandas as pd
3
+ from langchain_core.documents import Document
4
  import os
5
  from dotenv import load_dotenv
6
  from functions.data_to_vectors import create_vectorstore
functions/data_to_vectors.py CHANGED
@@ -1,39 +1,12 @@
1
- # __package__ = "functions"
2
- # from langchain.text_splitter import RecursiveCharacterTextSplitter
3
- # from langchain_huggingface import HuggingFaceEmbeddings
4
- # from langchain_community.vectorstores import Chroma
5
- # from langchain.schema import Document
6
- # def create_vectorstore(text,store):
7
- # print("data loaded......")
8
- # documents = [Document(page_content=text)]
9
- # # Chunk text
10
- # text_splitter = RecursiveCharacterTextSplitter(
11
- # chunk_size=500,
12
- # chunk_overlap=200
13
- # )
14
- # docs_chunks = text_splitter.split_documents(documents)
15
-
16
- # # Embeddings
17
- # embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
18
-
19
- # # Chroma vectorstore
20
- # vectorstore = Chroma.from_documents(
21
- # documents=docs_chunks,
22
- # embedding=embeddings,
23
- # persist_directory=store
24
- # )
25
- # print(f"✅ Stored {len(docs_chunks)} chunks in ChromaDB")
26
- # return vectorstore
27
 
28
- __package__ = "functions"
29
-
30
- from langchain.text_splitter import RecursiveCharacterTextSplitter
31
  from langchain_community.vectorstores import Chroma
32
- from langchain.schema import Document
 
33
  from sentence_transformers import SentenceTransformer
34
- from langchain.embeddings.base import Embeddings # 👈 base class
35
 
36
- # Custom wrapper for SentenceTransformer
37
  class SentenceTransformerEmbeddings(Embeddings):
38
  def __init__(self, model_name="sentence-transformers/all-MiniLM-L6-v2", device="cpu"):
39
  self.model = SentenceTransformer(model_name, device=device)
@@ -48,23 +21,16 @@ class SentenceTransformerEmbeddings(Embeddings):
48
  def create_vectorstore(text, store):
49
  print("data loaded......")
50
  documents = [Document(page_content=text)]
51
- print(text)
52
- # Chunk text
53
- text_splitter = RecursiveCharacterTextSplitter(
54
- chunk_size=500,
55
- chunk_overlap=200
56
- )
57
  docs_chunks = text_splitter.split_documents(documents)
58
 
59
- # Use custom embedding wrapper
60
  embeddings = SentenceTransformerEmbeddings()
61
-
62
- # Chroma vectorstore
63
  vectorstore = Chroma.from_documents(
64
  documents=docs_chunks,
65
  embedding=embeddings,
66
- persist_directory=store
67
  )
68
 
69
- print(f"Stored {len(docs_chunks)} chunks in ChromaDB")
70
  return vectorstore
 
1
+ __package__ = "functions"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
 
 
4
  from langchain_community.vectorstores import Chroma
5
+ from langchain_core.documents import Document
6
+ from langchain_core.embeddings import Embeddings
7
  from sentence_transformers import SentenceTransformer
 
8
 
9
+
10
  class SentenceTransformerEmbeddings(Embeddings):
11
  def __init__(self, model_name="sentence-transformers/all-MiniLM-L6-v2", device="cpu"):
12
  self.model = SentenceTransformer(model_name, device=device)
 
21
  def create_vectorstore(text, store):
22
  print("data loaded......")
23
  documents = [Document(page_content=text)]
24
+
25
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=200)
 
 
 
 
26
  docs_chunks = text_splitter.split_documents(documents)
27
 
 
28
  embeddings = SentenceTransformerEmbeddings()
 
 
29
  vectorstore = Chroma.from_documents(
30
  documents=docs_chunks,
31
  embedding=embeddings,
32
+ persist_directory=store,
33
  )
34
 
35
+ print(f"Stored {len(docs_chunks)} chunks in ChromaDB")
36
  return vectorstore