ernani commited on
Commit
fb72cf5
·
1 Parent(s): 5e9938c

Removed chromadb from tools

Browse files
Files changed (1) hide show
  1. tools.py +0 -75
tools.py CHANGED
@@ -8,8 +8,6 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter
8
  from langchain_community.tools import WikipediaQueryRun, DuckDuckGoSearchRun
9
  from langchain_community.document_loaders import PythonLoader
10
  from langchain_community.utilities import WikipediaAPIWrapper
11
- import chromadb
12
- from chromadb.config import Settings
13
  import pytube
14
  from PIL import Image
15
  import pandas as pd
@@ -709,76 +707,3 @@ class WebSearchTool(BaseTool):
709
 
710
  except Exception as e:
711
  return f"Error searching the web: {str(e)}"
712
-
713
- class ChromaDBManager:
714
- """Manager for ChromaDB operations"""
715
- def __init__(self, persist_directory: str = "./chroma_db"):
716
- self.persist_directory = persist_directory
717
- self.client = chromadb.Client(Settings(
718
- persist_directory=persist_directory,
719
- is_persistent=True
720
- ))
721
-
722
- def create_collection(self, name: str):
723
- """Create a new collection or get existing one"""
724
- try:
725
- return self.client.create_collection(name=name)
726
- except ValueError:
727
- return self.client.get_collection(name=name)
728
-
729
- def _generate_document_id(self, content: str, metadata: dict) -> str:
730
- """Generate a unique ID for a document based on its content and metadata"""
731
- # Use content and key metadata fields for ID generation
732
- id_parts = [content[:100]] # First 100 chars of content
733
- if metadata:
734
- source = metadata.get('source', '')
735
- doc_type = metadata.get('type', '')
736
- if source:
737
- id_parts.append(str(source))
738
- if doc_type:
739
- id_parts.append(str(doc_type))
740
-
741
- # Generate hash from combined parts
742
- combined = "_".join(id_parts)
743
- return f"doc_{hash(combined)}"
744
-
745
- def add_documents_with_metadata(self, collection_name: str, documents: List[str], metadatas: List[dict]):
746
- """Add documents with their metadata to a collection"""
747
- if not documents or not metadatas or len(documents) != len(metadatas):
748
- raise ValueError("Invalid documents or metadata")
749
-
750
- collection = self.create_collection(collection_name)
751
-
752
- # Generate unique IDs for documents
753
- ids = [self._generate_document_id(doc, meta)
754
- for doc, meta in zip(documents, metadatas)]
755
-
756
- try:
757
- # First try to add documents
758
- collection.add(
759
- documents=documents,
760
- metadatas=metadatas,
761
- ids=ids
762
- )
763
- except Exception as e:
764
- # If documents exist, update them
765
- logging.info(f"Updating existing documents in collection {collection_name}")
766
- collection.upsert(
767
- documents=documents,
768
- metadatas=metadatas,
769
- ids=ids
770
- )
771
-
772
- def query_collection(self, collection_name: str, query: str, n_results: int = 5) -> Dict:
773
- """Query a collection with improved retrieval"""
774
- try:
775
- collection = self.client.get_collection(collection_name)
776
- results = collection.query(
777
- query_texts=[query],
778
- n_results=n_results
779
- )
780
-
781
- return results
782
- except Exception as e:
783
- logging.error(f"Error querying collection {collection_name}: {str(e)}")
784
- return {"documents": [], "metadatas": [], "distances": []}
 
8
  from langchain_community.tools import WikipediaQueryRun, DuckDuckGoSearchRun
9
  from langchain_community.document_loaders import PythonLoader
10
  from langchain_community.utilities import WikipediaAPIWrapper
 
 
11
  import pytube
12
  from PIL import Image
13
  import pandas as pd
 
707
 
708
  except Exception as e:
709
  return f"Error searching the web: {str(e)}"