| | from createVectorDB import createVectorDB
|
| | from splitBioModels import splitBioModels
|
| | from createDocuments import createDocuments
|
| | from generateResponse import generateResponse
|
| | from selectBioModels import search_biomodels
|
| | from selectBioModels import copy_matching_files
|
| |
|
| | DATA_PATH = r"C:\Users\navan\Downloads\BioModelsRAG\BioModelsRAG\2data"
|
| | CHROMA_DATA_PATH = r"C:\Users\navan\Downloads\BioModelsRAG\CHROMA_EMBEDDINGS_PATH"
|
| | directory = r'C:\Users\navan\Downloads\BioModelsRAG\BioModelsRAG\data'
|
| | output_file = r'C:\Users\navan\Downloads\BioModelsRAG\biomodels_output.csv'
|
| | final_models_folder = r'C:\Users\navan\Downloads\BioModelsRAG\final_models'
|
| | user_keywords = input("Keyword you would like to search for: ").split()
|
| |
|
| |
|
| | def main(report:bool = True, directory = DATA_PATH, chroma_data_path = CHROMA_DATA_PATH):
|
| | data = []
|
| | search_biomodels(directory, user_keywords, output_file)
|
| | copy_matching_files(output_file, directory, final_models_folder)
|
| |
|
| | splitBioModels(directory=DATA_PATH, final_items=data)
|
| |
|
| | collection = createVectorDB(
|
| | collection_name="123456789101112131415",
|
| | chroma_data_path=chroma_data_path,
|
| | embed_model="all-MiniLM-L6-v2",
|
| | metadata={"hnsw:space": "cosine"}
|
| | )
|
| |
|
| | if report:
|
| | print("Collection created:", collection)
|
| |
|
| | createDocuments(final_items=data, collection=collection)
|
| |
|
| | if report:
|
| | print("Documents added to collection.")
|
| |
|
| | query = "What protein interacts with DesensitizedAch2?"
|
| | result = generateResponse(query_text=query, collection=collection)
|
| | return result
|
| |
|
| | if __name__ == "__main__":
|
| | result = main()
|
| | print(result)
|
| |
|
| |
|