Spaces:

TheBobBob
/

BioModelsRAG-Website_streamlit

Sleeping

App Files Files Community

TheBobBob commited on Dec 23, 2024

Commit

03047df

verified ·

1 Parent(s): 6fa5036

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -35

app.py CHANGED Viewed

@@ -10,17 +10,12 @@ import libsbml
 import networkx as nx
 from pyvis.network import Network
 client = chromadb.Client()
 collection_name = "BioModelsRAG"
 global db
 db = client.get_or_create_collection(name=collection_name)
-#Todolists
-#1. if MODEL (cannot download) don't even include (TICK)
-#2. switch the choosing and groq api key so if they just want to visualize thats fine (TICK)
 class BioModelFetcher:
     def __init__(self, github_owner="TheBobBob", github_repo_cache="BiomodelsCache", biomodels_json_db_path="src/cached_biomodels.json"):
@@ -121,7 +116,7 @@ class BioModelSplitter:
     def __init__(self, groq_api_key):
         self.groq_client = Groq(api_key=groq_api_key)
-    def split_biomodels(self, antimony_file_path, models):
         text_splitter = CharacterTextSplitter(
             separator="  // ",
             chunk_size=1000,
@@ -130,33 +125,19 @@ class BioModelSplitter:
             is_separator_regex=False,
         )
-        directory_path = os.path.dirname(os.path.abspath(antimony_file_path))
-        files = os.listdir(directory_path)
-        for file in files:
-            file_path = os.path.join(directory_path, file)
-            try:
-                with open(file_path, 'r') as f:
-                    file_content = f.read()
-                    items = text_splitter.create_documents([file_content])
-                    self.create_vector_db(items, models)
-                    break
-            except Exception as e:
-                print(f"Error reading file {file_path}: {e}")
         return db
-    def create_vector_db(self, final_items, models):
         counter = 0
-        for model_id in models:
-            try:
-                results = db.get(where={"document": {"$eq": model_id}})
-                #might be a problem here?
-                if results['documents']:
-                    continue
-                #could also be a problem in how the IDs are created
                 for item in final_items:
                     counter += 1  # Increment counter for each item
                     item_id = f"{counter}_{model_id}"
@@ -188,8 +169,8 @@ class BioModelSplitter:
                         )
                     else:
                         print(f"Error: No content returned from Groq for model {model_id}.")
-            except Exception as e:
-                print(f"Error processing model {model_id}: {e}")
 class SBMLNetworkVisualizer:
@@ -287,6 +268,7 @@ class StreamlitApp:
             if models:
                 model_ids = list(models.keys())
                 model_ids = [model_id for model_id in model_ids if not str(model_id).startswith("MODEL")]
                 if models:
                     selected_models = st.multiselect(
                         "Select biomodels to analyze",
@@ -304,7 +286,7 @@ class StreamlitApp:
                         net = self.visualizer.sbml_to_network(model_file_path)
-                        st.subheader(f"Model: {model_data['title']}")
                         net.show(f"sbml_network_{model_id}.html")
                         HtmlFile = open(f"sbml_network_{model_id}.html", "r", encoding="utf-8")
@@ -325,7 +307,7 @@ class StreamlitApp:
                         antimony_file_path = model_file_path.replace(".xml", ".txt")
                         AntimonyConverter.convert_sbml_to_antimony(model_file_path, antimony_file_path)
-                        self.splitter.split_biomodels(antimony_file_path, selected_models)
                         st.info(f"Model {model_id} {model_data['name']} has successfully been added to the database! :) ")
@@ -357,7 +339,6 @@ class StreamlitApp:
             flat_recommendation = [item for sublist in best_recommendation for item in (sublist if isinstance(sublist, list) else [sublist])]
             query_results_final += "\n\n".join(flat_recommendation) + "\n\n"
         prompt_template = f"""
         Using the context and previous conversation provided below, answer the following question. If the information is insufficient to answer the question, please state that clearly:

 import networkx as nx
 from pyvis.network import Network
 client = chromadb.Client()
 collection_name = "BioModelsRAG"
 global db
 db = client.get_or_create_collection(name=collection_name)
 class BioModelFetcher:
     def __init__(self, github_owner="TheBobBob", github_repo_cache="BiomodelsCache", biomodels_json_db_path="src/cached_biomodels.json"):
     def __init__(self, groq_api_key):
         self.groq_client = Groq(api_key=groq_api_key)
+    def split_biomodels(self, antimony_file_path, models, model_id):
         text_splitter = CharacterTextSplitter(
             separator="  // ",
             chunk_size=1000,
             is_separator_regex=False,
         )
+        with open(antimony_file_path) as f:
+            file_content = f.read()
+        items = text_splitter.create_documents([file_content])
+        self.create_vector_db(items, model_id)
         return db
+    def create_vector_db(self, final_items, model_id):
         counter = 0
+        try:
+            results = db.get(where={"document": model_id})
+            if len(results['documents']) == 0:
                 for item in final_items:
                     counter += 1  # Increment counter for each item
                     item_id = f"{counter}_{model_id}"
                         )
                     else:
                         print(f"Error: No content returned from Groq for model {model_id}.")
+        except Exception as e:
+            print(f"Error processing model {model_id}: {e}")
 class SBMLNetworkVisualizer:
             if models:
                 model_ids = list(models.keys())
                 model_ids = [model_id for model_id in model_ids if not str(model_id).startswith("MODEL")]
                 if models:
                     selected_models = st.multiselect(
                         "Select biomodels to analyze",
                         net = self.visualizer.sbml_to_network(model_file_path)
+                        st.subheader(f"Model {model_data['title']}")
                         net.show(f"sbml_network_{model_id}.html")
                         HtmlFile = open(f"sbml_network_{model_id}.html", "r", encoding="utf-8")
                         antimony_file_path = model_file_path.replace(".xml", ".txt")
                         AntimonyConverter.convert_sbml_to_antimony(model_file_path, antimony_file_path)
+                        self.splitter.split_biomodels(antimony_file_path, selected_models, model_id)
                         st.info(f"Model {model_id} {model_data['name']} has successfully been added to the database! :) ")
             flat_recommendation = [item for sublist in best_recommendation for item in (sublist if isinstance(sublist, list) else [sublist])]
             query_results_final += "\n\n".join(flat_recommendation) + "\n\n"
         prompt_template = f"""
         Using the context and previous conversation provided below, answer the following question. If the information is insufficient to answer the question, please state that clearly: