Spaces:

sabazo
/

innoSageAgentOne

Sleeping

App Files Files Community

Asaad Almutareb commited on Mar 16, 2024

Commit

2e6490e

1 Parent(s): 5c0a79d

added sqlite schema and handling

Browse files

Files changed (3) hide show

innovation_pathfinder_ai/database/db_handler.py +109 -0
innovation_pathfinder_ai/database/schema.py +15 -0
innovation_pathfinder_ai/structured_tools/structured_tools.py +24 -23

innovation_pathfinder_ai/database/db_handler.py ADDED Viewed

	@@ -0,0 +1,109 @@

+from sqlmodel import SQLModel, create_engine, Session, select
+from innovation_pathfinder_ai.database.schema import Sources
+from innovation_pathfinder_ai.utils.logger import get_console_logger
+sqlite_file_name = "database.db"
+sqlite_url = f"sqlite:///{sqlite_file_name}"
+engine = create_engine(sqlite_url, echo=False)
+logger = get_console_logger("db_handler")
+SQLModel.metadata.create_all(engine)
+def read_one(hash_id: dict):
+    with Session(engine) as session:
+        statement = select(Sources).where(Sources.hash_id == hash_id)
+        sources = session.exec(statement).first()
+        return sources
+def add_one(data: dict):
+    with Session(engine) as session:
+        if session.exec(
+            select(Sources).where(Sources.hash_id == data.get("hash_id"))
+        ).first():
+            logger.warning(f"Item with hash_id {data.get('hash_id')} already exists")
+            return None  # or raise an exception, or handle as needed
+        sources = Sources(**data)
+        session.add(sources)
+        session.commit()
+        session.refresh(sources)
+        logger.info(f"Item with hash_id {data.get('hash_id')} added to the database")
+        return sources
+def update_one(hash_id: dict, data: dict):
+    with Session(engine) as session:
+        # Check if the item with the given hash_id exists
+        sources = session.exec(
+            select(Sources).where(Sources.hash_id == hash_id)
+        ).first()
+        if not sources:
+            logger.warning(f"No item with hash_id {hash_id} found for update")
+            return None  # or raise an exception, or handle as needed
+        for key, value in data.items():
+            setattr(sources, key, value)
+        session.commit()
+        logger.info(f"Item with hash_id {hash_id} updated in the database")
+        return sources
+def delete_one(id: int):
+    with Session(engine) as session:
+        # Check if the item with the given hash_id exists
+        sources = session.exec(
+            select(Sources).where(Sources.hash_id == id)
+        ).first()
+        if not sources:
+            logger.warning(f"No item with hash_id {id} found for deletion")
+            return None  # or raise an exception, or handle as needed
+        session.delete(sources)
+        session.commit()
+        logger.info(f"Item with hash_id {id} deleted from the database")
+def add_many(data: list):
+    with Session(engine) as session:
+        for info in data:
+            # Reuse add_one function for each item
+            result = add_one(info)
+            if result is None:
+                logger.warning(
+                    f"Item with hash_id {info.get('hash_id')} could not be added"
+                )
+            else:
+                logger.info(
+                    f"Item with hash_id {info.get('hash_id')} added to the database"
+                )
+        session.commit()  # Commit at the end of the loop
+def delete_many(ids: list):
+    with Session(engine) as session:
+        for id in ids:
+            # Reuse delete_one function for each item
+            result = delete_one(id)
+            if result is None:
+                logger.warning(f"No item with hash_id {id} found for deletion")
+            else:
+                logger.info(f"Item with hash_id {id} deleted from the database")
+        session.commit()  # Commit at the end of the loop
+def read_all(query: dict = None):
+    with Session(engine) as session:
+        statement = select(Sources)
+        if query:
+            statement = statement.where(
+                *[getattr(Sources, key) == value for key, value in query.items()]
+            )
+        sources = session.exec(statement).all()
+        return sources
+def delete_all():
+    with Session(engine) as session:
+        session.exec(Sources).delete()
+        session.commit()
+        logger.info("All items deleted from the database")

innovation_pathfinder_ai/database/schema.py ADDED Viewed

	@@ -0,0 +1,15 @@

+from sqlmodel import SQLModel, Field
+from typing import Optional
+import datetime
+class Sources(SQLModel, table=True):
+    id: Optional[int] = Field(default=None, primary_key=True)
+    url: str = Field()
+    title: Optional[str] = Field(default="NA", unique=False)
+    hash_id: str = Field(unique=True)
+    created_at: float = Field(default=datetime.datetime.now().timestamp())
+    summary: str = Field(default="")
+    embedded: bool = Field(default=False)
+    __table_args__ = {"extend_existing": True}

innovation_pathfinder_ai/structured_tools/structured_tools.py CHANGED Viewed

@@ -6,31 +6,32 @@ from langchain_community.utilities import WikipediaAPIWrapper
 #from langchain.tools import Tool
 from langchain_community.utilities import GoogleSearchAPIWrapper
 import arxiv
 # hacky and should be replaced with a database
 from innovation_pathfinder_ai.source_container.container import (
     all_sources
 )
-from innovation_pathfinder_ai.utils import create_wikipedia_urls_from_text
 @tool
 def arxiv_search(query: str) -> str:
     """Search arxiv database for scientific research papers and studies. This is your primary information source.
     always check it first when you search for information, before using any other tool."""
-    # return "LangChain"
     global all_sources
-    arxiv_retriever = ArxivRetriever(load_max_docs=2)
     data = arxiv_retriever.invoke(query)
     meta_data = [i.metadata for i in data]
-    # meta_data += all_sources
-    # all_sources += meta_data
-    all_sources += meta_data
-    # formatted_info = format_info(entry_id, published, title, authors)
-    # formatted_info = format_info_list(all_sources)
-    return meta_data.__str__()
 @tool
 def get_arxiv_paper(paper_id:str) -> None:
@@ -52,17 +53,13 @@ def get_arxiv_paper(paper_id:str) -> None:
 @tool
 def google_search(query: str) -> str:
     """Search Google for additional results when you can't answer questions using arxiv search or wikipedia search."""
-    # return "LangChain"
     global all_sources
     websearch = GoogleSearchAPIWrapper()
-    search_results:dict = websearch.results(query, 5)
-    #organic_source = search_results['organic_results']
-    # formatted_string = "Title: {title}, link: {link}, snippet: {snippet}".format(**organic_source)
-    cleaner_sources = ["Title: {title}, link: {link}, snippet: {snippet}".format(**i) for i in search_results]
     all_sources += cleaner_sources
     return cleaner_sources.__str__()
@@ -75,5 +72,9 @@ def wikipedia_search(query: str) -> str:
     api_wrapper = WikipediaAPIWrapper()
     wikipedia_search = WikipediaQueryRun(api_wrapper=api_wrapper)
     wikipedia_results = wikipedia_search.run(query)
-    all_sources += create_wikipedia_urls_from_text(wikipedia_results)
-    return wikipedia_results

 #from langchain.tools import Tool
 from langchain_community.utilities import GoogleSearchAPIWrapper
 import arxiv
+import ast
 # hacky and should be replaced with a database
 from innovation_pathfinder_ai.source_container.container import (
     all_sources
 )
+from innovation_pathfinder_ai.utils.utils import (
+    parse_list_to_dicts, format_wiki_summaries, format_arxiv_documents, format_search_results
+)
+from innovation_pathfinder_ai.database.db_handler import (
+    add_many
+)
 @tool
 def arxiv_search(query: str) -> str:
     """Search arxiv database for scientific research papers and studies. This is your primary information source.
     always check it first when you search for information, before using any other tool."""
     global all_sources
+    arxiv_retriever = ArxivRetriever(load_max_docs=3)
     data = arxiv_retriever.invoke(query)
     meta_data = [i.metadata for i in data]
+    formatted_sources = format_arxiv_documents(data)
+    all_sources += formatted_sources
+    parsed_sources = parse_list_to_dicts(formatted_sources)
+    add_many(parsed_sources)
+    return data.__str__()
 @tool
 def get_arxiv_paper(paper_id:str) -> None:
 @tool
 def google_search(query: str) -> str:
     """Search Google for additional results when you can't answer questions using arxiv search or wikipedia search."""
     global all_sources
     websearch = GoogleSearchAPIWrapper()
+    search_results:dict = websearch.results(query, 3)
+    cleaner_sources =format_search_results(search_results)
+    parsed_csources = parse_list_to_dicts(cleaner_sources)
+    add_many(parsed_csources)
     all_sources += cleaner_sources
     return cleaner_sources.__str__()
     api_wrapper = WikipediaAPIWrapper()
     wikipedia_search = WikipediaQueryRun(api_wrapper=api_wrapper)
     wikipedia_results = wikipedia_search.run(query)
+    formatted_summaries = format_wiki_summaries(wikipedia_results)
+    all_sources += formatted_summaries
+    parsed_summaries = parse_list_to_dicts(formatted_summaries)
+    add_many(parsed_summaries)
+    return wikipedia_results.__str__()