Spaces:
Sleeping
Sleeping
Asaad Almutareb commited on
Commit Β·
fa99d8f
1
Parent(s): 057d3c8
cleaned branch, added final streaming callback handler
Browse files- {innovation_pathfinder_ai/backend β app/api}/__init__.py +0 -0
- {innovation_pathfinder_ai/backend/app/api β app/api/v1}/__init__.py +0 -0
- {innovation_pathfinder_ai/backend/app/api/v1 β app/api/v1/agents}/__init__.py +0 -0
- {innovation_pathfinder_ai/backend/app β app}/api/v1/agents/hf_mixtral_agent.py +37 -15
- {innovation_pathfinder_ai/backend/app β app}/api/v1/agents/ollama_mixtral_agent.py +0 -0
- {innovation_pathfinder_ai/backend/app β app}/api/v1/agents/requirements.txt +0 -0
- {innovation_pathfinder_ai/backend/app β app}/api/v1/api.py +0 -0
- {innovation_pathfinder_ai/backend/app/api/v1/agents β app/api/v1/endpoints}/__init__.py +0 -0
- {innovation_pathfinder_ai/backend/app β app}/api/v1/endpoints/add_to_kb.py +0 -0
- {innovation_pathfinder_ai/backend/app/api/v1/endpoints β app/core}/__init__.py +0 -0
- {innovation_pathfinder_ai/backend/app β app}/core/config.py +4 -5
- {innovation_pathfinder_ai/backend/app β app}/crud/db_handler.py +5 -3
- {innovation_pathfinder_ai/backend/app β app}/database/db_schema.py +0 -0
- {innovation_pathfinder_ai/backend/app β app}/knowledge_base/placeholder.txt +0 -0
- {innovation_pathfinder_ai/backend/app β app}/main.py +9 -4
- {innovation_pathfinder_ai/backend/app β app}/schemas/adaptive_cards_schema.py +0 -0
- {innovation_pathfinder_ai/backend/app β app}/schemas/message_schema.py +0 -0
- {innovation_pathfinder_ai/backend/app β app}/schemas/response_schema.py +0 -0
- {innovation_pathfinder_ai/backend/app β app}/structured_tools/structured_tools.py +15 -9
- {innovation_pathfinder_ai/backend/app β app}/templates/chat.py +3 -3
- {innovation_pathfinder_ai/backend/app β app}/templates/react_json_with_memory.py +0 -0
- {innovation_pathfinder_ai/backend/app/core β app/utils}/__init__.py +0 -0
- {innovation_pathfinder_ai/backend/app β app}/utils/adaptive_cards/cards.py +0 -0
- {innovation_pathfinder_ai/backend/app β app}/utils/callback.py +0 -0
- {innovation_pathfinder_ai/backend/app β app}/utils/chains.py +2 -1
- {innovation_pathfinder_ai/backend/app β app}/utils/logger.py +0 -0
- {innovation_pathfinder_ai/backend/app β app}/utils/utils.py +0 -0
- {innovation_pathfinder_ai/backend/app β app}/vector_store/chroma_vector_store.py +12 -7
- {innovation_pathfinder_ai/backend/app β app}/vector_store/initialize_chroma_db.py +9 -6
- {innovation_pathfinder_ai/frontend/assets β assets}/avatar.png +0 -0
- {innovation_pathfinder_ai/frontend/assets β assets}/favicon.ico +0 -0
- innovation_pathfinder_ai/backend/app/utils/__init__.py +0 -0
- innovation_pathfinder_ai/frontend/app.py +0 -143
- innovation_pathfinder_ai/source_container/container.py +0 -1
{innovation_pathfinder_ai/backend β app/api}/__init__.py
RENAMED
|
File without changes
|
{innovation_pathfinder_ai/backend/app/api β app/api/v1}/__init__.py
RENAMED
|
File without changes
|
{innovation_pathfinder_ai/backend/app/api/v1 β app/api/v1/agents}/__init__.py
RENAMED
|
File without changes
|
{innovation_pathfinder_ai/backend/app β app}/api/v1/agents/hf_mixtral_agent.py
RENAMED
|
@@ -5,8 +5,13 @@ from langchain.agents.format_scratchpad import format_log_to_str
|
|
| 5 |
from langchain.agents.output_parsers import ReActJsonSingleInputOutputParser
|
| 6 |
# Import things that are needed generically
|
| 7 |
from langchain.tools.render import render_text_description
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
import os
|
| 9 |
from dotenv import load_dotenv
|
|
|
|
| 10 |
from app.structured_tools.structured_tools import (
|
| 11 |
arxiv_search, get_arxiv_paper, google_search, wikipedia_search, knowledgeBase_search, memory_search
|
| 12 |
)
|
|
@@ -17,43 +22,59 @@ from app.utils import logger
|
|
| 17 |
from app.utils import utils
|
| 18 |
from langchain.globals import set_llm_cache
|
| 19 |
from langchain.cache import SQLiteCache
|
| 20 |
-
from app.utils.callback import
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
|
| 22 |
-
|
|
|
|
| 23 |
logger = logger.get_console_logger("hf_mixtral_agent")
|
| 24 |
|
| 25 |
config = load_dotenv(".env")
|
| 26 |
HUGGINGFACEHUB_API_TOKEN = os.getenv('HUGGINGFACEHUB_API_TOKEN')
|
| 27 |
GOOGLE_CSE_ID = os.getenv('GOOGLE_CSE_ID')
|
| 28 |
GOOGLE_API_KEY = os.getenv('GOOGLE_API_KEY')
|
| 29 |
-
LANGCHAIN_TRACING_V2 = "true"
|
| 30 |
-
LANGCHAIN_ENDPOINT = "https://api.smith.langchain.com"
|
| 31 |
-
LANGCHAIN_API_KEY = os.getenv('LANGCHAIN_API_KEY')
|
| 32 |
-
LANGCHAIN_PROJECT = os.getenv('LANGCHAIN_PROJECT')
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
|
| 34 |
router = APIRouter()
|
| 35 |
|
|
|
|
|
|
|
| 36 |
@router.websocket("/agent")
|
| 37 |
async def websocket_endpoint(websocket: WebSocket):
|
| 38 |
await websocket.accept()
|
|
|
|
|
|
|
|
|
|
| 39 |
|
| 40 |
while True:
|
| 41 |
try:
|
| 42 |
data = await websocket.receive_json()
|
| 43 |
user_message = data["message"]
|
|
|
|
| 44 |
chat_history = []#data["history"]
|
| 45 |
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
|
| 54 |
-
|
| 55 |
message_id: str = utils.generate_uuid()
|
| 56 |
-
custom_handler =
|
| 57 |
websocket, message_id=message_id
|
| 58 |
)
|
| 59 |
|
|
@@ -106,6 +127,7 @@ async def websocket_endpoint(websocket: WebSocket):
|
|
| 106 |
#max_execution_time=60, # timout at 60 sec
|
| 107 |
return_intermediate_steps=True,
|
| 108 |
handle_parsing_errors=True,
|
|
|
|
| 109 |
)
|
| 110 |
|
| 111 |
await agent_executor.arun(input=user_message, chat_history=chat_history, callbacks=[custom_handler])
|
|
|
|
| 5 |
from langchain.agents.output_parsers import ReActJsonSingleInputOutputParser
|
| 6 |
# Import things that are needed generically
|
| 7 |
from langchain.tools.render import render_text_description
|
| 8 |
+
from app.schemas.message_schema import (
|
| 9 |
+
IChatResponse,
|
| 10 |
+
)
|
| 11 |
+
from app.utils.utils import generate_uuid
|
| 12 |
import os
|
| 13 |
from dotenv import load_dotenv
|
| 14 |
+
from app.utils.adaptive_cards.cards import create_adaptive_card
|
| 15 |
from app.structured_tools.structured_tools import (
|
| 16 |
arxiv_search, get_arxiv_paper, google_search, wikipedia_search, knowledgeBase_search, memory_search
|
| 17 |
)
|
|
|
|
| 22 |
from app.utils import utils
|
| 23 |
from langchain.globals import set_llm_cache
|
| 24 |
from langchain.cache import SQLiteCache
|
| 25 |
+
from app.utils.callback import (
|
| 26 |
+
CustomAsyncCallbackHandler,
|
| 27 |
+
CustomFinalStreamingStdOutCallbackHandler,
|
| 28 |
+
)
|
| 29 |
+
from langchain.memory import ConversationBufferMemory
|
| 30 |
+
from app.core.config import settings
|
| 31 |
|
| 32 |
+
local_cache=settings.LOCAL_CACHE
|
| 33 |
+
set_llm_cache(SQLiteCache(database_path=local_cache))
|
| 34 |
logger = logger.get_console_logger("hf_mixtral_agent")
|
| 35 |
|
| 36 |
config = load_dotenv(".env")
|
| 37 |
HUGGINGFACEHUB_API_TOKEN = os.getenv('HUGGINGFACEHUB_API_TOKEN')
|
| 38 |
GOOGLE_CSE_ID = os.getenv('GOOGLE_CSE_ID')
|
| 39 |
GOOGLE_API_KEY = os.getenv('GOOGLE_API_KEY')
|
| 40 |
+
# LANGCHAIN_TRACING_V2 = "true"
|
| 41 |
+
# LANGCHAIN_ENDPOINT = "https://api.smith.langchain.com"
|
| 42 |
+
# LANGCHAIN_API_KEY = os.getenv('LANGCHAIN_API_KEY')
|
| 43 |
+
# LANGCHAIN_PROJECT = os.getenv('LANGCHAIN_PROJECT')
|
| 44 |
+
# GOOGLE_CSE_ID=settings.GOOGLE_CSE_ID
|
| 45 |
+
# GOOGLE_API_KEY=settings.GOOGLE_API_KEY
|
| 46 |
+
# HUGGINGFACEHUB_API_TOKEN=settings.HUGGINGFACEHUB_API_TOKEN
|
| 47 |
+
# print(HUGGINGFACEHUB_API_TOKEN)
|
| 48 |
|
| 49 |
router = APIRouter()
|
| 50 |
|
| 51 |
+
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
|
| 52 |
+
|
| 53 |
@router.websocket("/agent")
|
| 54 |
async def websocket_endpoint(websocket: WebSocket):
|
| 55 |
await websocket.accept()
|
| 56 |
+
if not settings.HUGGINGFACEHUB_API_TOKEN.startswith("hf_"):
|
| 57 |
+
await websocket.send_json({"error": "HUGGINGFACEHUB_API_TOKEN is not set"})
|
| 58 |
+
return
|
| 59 |
|
| 60 |
while True:
|
| 61 |
try:
|
| 62 |
data = await websocket.receive_json()
|
| 63 |
user_message = data["message"]
|
| 64 |
+
user_message_card = create_adaptive_card(user_message)
|
| 65 |
chat_history = []#data["history"]
|
| 66 |
|
| 67 |
+
resp = IChatResponse(
|
| 68 |
+
sender="you",
|
| 69 |
+
message=user_message_card.to_dict(),
|
| 70 |
+
type="start",
|
| 71 |
+
message_id=generate_uuid(),
|
| 72 |
+
id=generate_uuid(),
|
| 73 |
+
)
|
| 74 |
|
| 75 |
+
await websocket.send_json(resp.model_dump())
|
| 76 |
message_id: str = utils.generate_uuid()
|
| 77 |
+
custom_handler = CustomFinalStreamingStdOutCallbackHandler(
|
| 78 |
websocket, message_id=message_id
|
| 79 |
)
|
| 80 |
|
|
|
|
| 127 |
#max_execution_time=60, # timout at 60 sec
|
| 128 |
return_intermediate_steps=True,
|
| 129 |
handle_parsing_errors=True,
|
| 130 |
+
#memory=memory
|
| 131 |
)
|
| 132 |
|
| 133 |
await agent_executor.arun(input=user_message, chat_history=chat_history, callbacks=[custom_handler])
|
{innovation_pathfinder_ai/backend/app β app}/api/v1/agents/ollama_mixtral_agent.py
RENAMED
|
File without changes
|
{innovation_pathfinder_ai/backend/app β app}/api/v1/agents/requirements.txt
RENAMED
|
File without changes
|
{innovation_pathfinder_ai/backend/app β app}/api/v1/api.py
RENAMED
|
File without changes
|
{innovation_pathfinder_ai/backend/app/api/v1/agents β app/api/v1/endpoints}/__init__.py
RENAMED
|
File without changes
|
{innovation_pathfinder_ai/backend/app β app}/api/v1/endpoints/add_to_kb.py
RENAMED
|
File without changes
|
{innovation_pathfinder_ai/backend/app/api/v1/endpoints β app/core}/__init__.py
RENAMED
|
File without changes
|
{innovation_pathfinder_ai/backend/app β app}/core/config.py
RENAMED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
import os
|
| 2 |
-
from pydantic import AnyHttpUrl,
|
|
|
|
| 3 |
from enum import Enum
|
| 4 |
|
| 5 |
|
|
@@ -15,9 +16,6 @@ class Settings(BaseSettings):
|
|
| 15 |
MODE: ModeEnum = ModeEnum.development
|
| 16 |
API_VERSION: str = "v1"
|
| 17 |
API_V1_STR: str = f"/api/{API_VERSION}"
|
| 18 |
-
OPENAI_API_KEY: str
|
| 19 |
-
UNSPLASH_API_KEY: str
|
| 20 |
-
SERP_API_KEY: str
|
| 21 |
HUGGINGFACEHUB_API_TOKEN: str
|
| 22 |
GOOGLE_CSE_ID: str
|
| 23 |
GOOGLE_API_KEY: str
|
|
@@ -25,10 +23,11 @@ class Settings(BaseSettings):
|
|
| 25 |
CONVERSATION_COLLECTION_NAME: str
|
| 26 |
EMBEDDING_MODEL: str
|
| 27 |
SOURCES_CACHE: str
|
|
|
|
| 28 |
|
| 29 |
class Config:
|
| 30 |
case_sensitive = True
|
| 31 |
-
env_file = os.path.expanduser("
|
| 32 |
|
| 33 |
|
| 34 |
settings = Settings()
|
|
|
|
| 1 |
import os
|
| 2 |
+
from pydantic import AnyHttpUrl, ConfigDict
|
| 3 |
+
from pydantic_settings import BaseSettings
|
| 4 |
from enum import Enum
|
| 5 |
|
| 6 |
|
|
|
|
| 16 |
MODE: ModeEnum = ModeEnum.development
|
| 17 |
API_VERSION: str = "v1"
|
| 18 |
API_V1_STR: str = f"/api/{API_VERSION}"
|
|
|
|
|
|
|
|
|
|
| 19 |
HUGGINGFACEHUB_API_TOKEN: str
|
| 20 |
GOOGLE_CSE_ID: str
|
| 21 |
GOOGLE_API_KEY: str
|
|
|
|
| 23 |
CONVERSATION_COLLECTION_NAME: str
|
| 24 |
EMBEDDING_MODEL: str
|
| 25 |
SOURCES_CACHE: str
|
| 26 |
+
LOCAL_CACHE: str
|
| 27 |
|
| 28 |
class Config:
|
| 29 |
case_sensitive = True
|
| 30 |
+
env_file = os.path.expanduser(".env")
|
| 31 |
|
| 32 |
|
| 33 |
settings = Settings()
|
{innovation_pathfinder_ai/backend/app β app}/crud/db_handler.py
RENAMED
|
@@ -2,11 +2,13 @@ from sqlmodel import SQLModel, create_engine, Session, select
|
|
| 2 |
from app.database.db_schema import Sources
|
| 3 |
from app.utils.logger import get_console_logger
|
| 4 |
import os
|
| 5 |
-
from
|
|
|
|
| 6 |
|
| 7 |
-
load_dotenv()
|
| 8 |
|
| 9 |
-
sqlite_file_name = os.getenv('SOURCES_CACHE')
|
|
|
|
| 10 |
|
| 11 |
sqlite_url = f"sqlite:///{sqlite_file_name}"
|
| 12 |
engine = create_engine(sqlite_url, echo=False)
|
|
|
|
| 2 |
from app.database.db_schema import Sources
|
| 3 |
from app.utils.logger import get_console_logger
|
| 4 |
import os
|
| 5 |
+
from app.core.config import settings
|
| 6 |
+
#from dotenv import load_dotenv
|
| 7 |
|
| 8 |
+
#load_dotenv()
|
| 9 |
|
| 10 |
+
#sqlite_file_name = os.getenv('SOURCES_CACHE')
|
| 11 |
+
sqlite_file_name = settings.SOURCES_CACHE
|
| 12 |
|
| 13 |
sqlite_url = f"sqlite:///{sqlite_file_name}"
|
| 14 |
engine = create_engine(sqlite_url, echo=False)
|
{innovation_pathfinder_ai/backend/app β app}/database/db_schema.py
RENAMED
|
File without changes
|
{innovation_pathfinder_ai/backend/app β app}/knowledge_base/placeholder.txt
RENAMED
|
File without changes
|
{innovation_pathfinder_ai/backend/app β app}/main.py
RENAMED
|
@@ -1,18 +1,23 @@
|
|
| 1 |
from fastapi import FastAPI
|
| 2 |
from app.api.v1.api import api_router as api_router_v1
|
| 3 |
from fastapi.responses import HTMLResponse
|
|
|
|
| 4 |
from app.templates.chat import chat_html
|
| 5 |
#from app.core.config import settings
|
| 6 |
from fastapi.middleware.cors import CORSMiddleware
|
| 7 |
|
| 8 |
-
app = FastAPI(
|
| 9 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
# CORS Middleware setup for allowing frontend requests
|
| 11 |
# ToDO: replace with settings.BACKEND_CORS_ORIGINS once core/config.py is implemented
|
| 12 |
-
if BACKEND_CORS_ORIGINS:
|
| 13 |
app.add_middleware(
|
| 14 |
CORSMiddleware,
|
| 15 |
-
allow_origins=[str(origin) for origin in BACKEND_CORS_ORIGINS],
|
| 16 |
allow_credentials=True,
|
| 17 |
allow_methods=["*"],
|
| 18 |
allow_headers=["*"],
|
|
|
|
| 1 |
from fastapi import FastAPI
|
| 2 |
from app.api.v1.api import api_router as api_router_v1
|
| 3 |
from fastapi.responses import HTMLResponse
|
| 4 |
+
from app.core.config import settings
|
| 5 |
from app.templates.chat import chat_html
|
| 6 |
#from app.core.config import settings
|
| 7 |
from fastapi.middleware.cors import CORSMiddleware
|
| 8 |
|
| 9 |
+
app = FastAPI(
|
| 10 |
+
title=settings.PROJECT_NAME,
|
| 11 |
+
version=settings.API_VERSION,
|
| 12 |
+
openapi_url=f"{settings.API_V1_STR}/openapi.json",
|
| 13 |
+
)
|
| 14 |
+
#BACKEND_CORS_ORIGINS = ["*"]
|
| 15 |
# CORS Middleware setup for allowing frontend requests
|
| 16 |
# ToDO: replace with settings.BACKEND_CORS_ORIGINS once core/config.py is implemented
|
| 17 |
+
if settings.BACKEND_CORS_ORIGINS:
|
| 18 |
app.add_middleware(
|
| 19 |
CORSMiddleware,
|
| 20 |
+
allow_origins=[str(origin) for origin in settings.BACKEND_CORS_ORIGINS],
|
| 21 |
allow_credentials=True,
|
| 22 |
allow_methods=["*"],
|
| 23 |
allow_headers=["*"],
|
{innovation_pathfinder_ai/backend/app β app}/schemas/adaptive_cards_schema.py
RENAMED
|
File without changes
|
{innovation_pathfinder_ai/backend/app β app}/schemas/message_schema.py
RENAMED
|
File without changes
|
{innovation_pathfinder_ai/backend/app β app}/schemas/response_schema.py
RENAMED
|
File without changes
|
{innovation_pathfinder_ai/backend/app β app}/structured_tools/structured_tools.py
RENAMED
|
@@ -8,9 +8,10 @@ from langchain_community.utilities import GoogleSearchAPIWrapper
|
|
| 8 |
from langchain_community.embeddings.sentence_transformer import (
|
| 9 |
SentenceTransformerEmbeddings,
|
| 10 |
)
|
|
|
|
| 11 |
from langchain_community.vectorstores import Chroma
|
| 12 |
import arxiv
|
| 13 |
-
import ast
|
| 14 |
|
| 15 |
import chromadb
|
| 16 |
|
|
@@ -34,7 +35,8 @@ from app.utils.utils import (
|
|
| 34 |
import os
|
| 35 |
# from app.utils import create_wikipedia_urls_from_text
|
| 36 |
|
| 37 |
-
persist_directory = os.getenv('VECTOR_DATABASE_LOCATION')
|
|
|
|
| 38 |
|
| 39 |
@tool
|
| 40 |
def memory_search(query:str) -> str:
|
|
@@ -45,11 +47,13 @@ def memory_search(query:str) -> str:
|
|
| 45 |
path=persist_directory,
|
| 46 |
)
|
| 47 |
|
| 48 |
-
collection_name = os.getenv('CONVERSATION_COLLECTION_NAME')
|
|
|
|
| 49 |
#store using envar
|
| 50 |
|
| 51 |
embedding_function = SentenceTransformerEmbeddings(
|
| 52 |
-
model_name=
|
|
|
|
| 53 |
)
|
| 54 |
|
| 55 |
vector_db = Chroma(
|
|
@@ -75,7 +79,8 @@ def knowledgeBase_search(query:str) -> str:
|
|
| 75 |
#store using envar
|
| 76 |
|
| 77 |
embedding_function = SentenceTransformerEmbeddings(
|
| 78 |
-
model_name=os.getenv("EMBEDDING_MODEL"),
|
|
|
|
| 79 |
)
|
| 80 |
|
| 81 |
vector_db = Chroma(
|
|
@@ -100,7 +105,7 @@ def arxiv_search(query: str) -> str:
|
|
| 100 |
formatted_sources = format_arxiv_documents(data)
|
| 101 |
#all_sources += formatted_sources
|
| 102 |
parsed_sources = parse_list_to_dicts(formatted_sources)
|
| 103 |
-
|
| 104 |
|
| 105 |
return data.__str__()
|
| 106 |
|
|
@@ -149,7 +154,8 @@ def embed_arvix_paper(paper_id:str) -> None:
|
|
| 149 |
#store using envar
|
| 150 |
|
| 151 |
embedding_function = SentenceTransformerEmbeddings(
|
| 152 |
-
model_name=os.getenv("EMBEDDING_MODEL"),
|
|
|
|
| 153 |
)
|
| 154 |
|
| 155 |
full_path = os.path.join(pdf_directory, pdf_file_name)
|
|
@@ -170,7 +176,7 @@ def wikipedia_search(query: str) -> str:
|
|
| 170 |
formatted_summaries = format_wiki_summaries(wikipedia_results)
|
| 171 |
#all_sources += formatted_summaries
|
| 172 |
parsed_summaries = parse_list_to_dicts(formatted_summaries)
|
| 173 |
-
|
| 174 |
#all_sources += create_wikipedia_urls_from_text(wikipedia_results)
|
| 175 |
return wikipedia_results
|
| 176 |
|
|
@@ -183,7 +189,7 @@ def google_search(query: str) -> str:
|
|
| 183 |
search_results:dict = websearch.results(query, 3)
|
| 184 |
cleaner_sources =format_search_results(search_results)
|
| 185 |
parsed_csources = parse_list_to_dicts(cleaner_sources)
|
| 186 |
-
|
| 187 |
#all_sources += cleaner_sources
|
| 188 |
|
| 189 |
return cleaner_sources.__str__()
|
|
|
|
| 8 |
from langchain_community.embeddings.sentence_transformer import (
|
| 9 |
SentenceTransformerEmbeddings,
|
| 10 |
)
|
| 11 |
+
from app.core.config import settings
|
| 12 |
from langchain_community.vectorstores import Chroma
|
| 13 |
import arxiv
|
| 14 |
+
#import ast
|
| 15 |
|
| 16 |
import chromadb
|
| 17 |
|
|
|
|
| 35 |
import os
|
| 36 |
# from app.utils import create_wikipedia_urls_from_text
|
| 37 |
|
| 38 |
+
#persist_directory = os.getenv('VECTOR_DATABASE_LOCATION')
|
| 39 |
+
persist_directory = settings.VECTOR_DATABASE_LOCATION
|
| 40 |
|
| 41 |
@tool
|
| 42 |
def memory_search(query:str) -> str:
|
|
|
|
| 47 |
path=persist_directory,
|
| 48 |
)
|
| 49 |
|
| 50 |
+
#collection_name = os.getenv('CONVERSATION_COLLECTION_NAME')
|
| 51 |
+
collection_name = settings.CONVERSATION_COLLECTION_NAME
|
| 52 |
#store using envar
|
| 53 |
|
| 54 |
embedding_function = SentenceTransformerEmbeddings(
|
| 55 |
+
model_name=settings.EMBEDDING_MODEL
|
| 56 |
+
#model_name=os.getenv("EMBEDDING_MODEL"),
|
| 57 |
)
|
| 58 |
|
| 59 |
vector_db = Chroma(
|
|
|
|
| 79 |
#store using envar
|
| 80 |
|
| 81 |
embedding_function = SentenceTransformerEmbeddings(
|
| 82 |
+
#model_name=os.getenv("EMBEDDING_MODEL"),
|
| 83 |
+
model_name=settings.EMBEDDING_MODEL
|
| 84 |
)
|
| 85 |
|
| 86 |
vector_db = Chroma(
|
|
|
|
| 105 |
formatted_sources = format_arxiv_documents(data)
|
| 106 |
#all_sources += formatted_sources
|
| 107 |
parsed_sources = parse_list_to_dicts(formatted_sources)
|
| 108 |
+
add_many(parsed_sources)
|
| 109 |
|
| 110 |
return data.__str__()
|
| 111 |
|
|
|
|
| 154 |
#store using envar
|
| 155 |
|
| 156 |
embedding_function = SentenceTransformerEmbeddings(
|
| 157 |
+
#model_name=os.getenv("EMBEDDING_MODEL"),
|
| 158 |
+
model_name=settings.EMBEDDING_MODEL
|
| 159 |
)
|
| 160 |
|
| 161 |
full_path = os.path.join(pdf_directory, pdf_file_name)
|
|
|
|
| 176 |
formatted_summaries = format_wiki_summaries(wikipedia_results)
|
| 177 |
#all_sources += formatted_summaries
|
| 178 |
parsed_summaries = parse_list_to_dicts(formatted_summaries)
|
| 179 |
+
add_many(parsed_summaries)
|
| 180 |
#all_sources += create_wikipedia_urls_from_text(wikipedia_results)
|
| 181 |
return wikipedia_results
|
| 182 |
|
|
|
|
| 189 |
search_results:dict = websearch.results(query, 3)
|
| 190 |
cleaner_sources =format_search_results(search_results)
|
| 191 |
parsed_csources = parse_list_to_dicts(cleaner_sources)
|
| 192 |
+
add_many(parsed_csources)
|
| 193 |
#all_sources += cleaner_sources
|
| 194 |
|
| 195 |
return cleaner_sources.__str__()
|
{innovation_pathfinder_ai/backend/app β app}/templates/chat.py
RENAMED
|
@@ -6,10 +6,10 @@ chat_html = """
|
|
| 6 |
</head>
|
| 7 |
<body>
|
| 8 |
<webchat-widget
|
| 9 |
-
widget-websocket="ws://localhost:
|
| 10 |
widget-color="#47A7F6"
|
| 11 |
-
widget-chat-avatar="https://icon-library.com/images/
|
| 12 |
-
widget-user-avatar="https://
|
| 13 |
widget-header="Bot"
|
| 14 |
widget-subheader="Online"
|
| 15 |
widget-placeholder="Send a message"
|
|
|
|
| 6 |
</head>
|
| 7 |
<body>
|
| 8 |
<webchat-widget
|
| 9 |
+
widget-websocket="ws://localhost:8000/chat/agent"
|
| 10 |
widget-color="#47A7F6"
|
| 11 |
+
widget-chat-avatar="https://icon-library.com/images/bot-icon/bot-icon-1.jpg"
|
| 12 |
+
widget-user-avatar="https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQWR4hHJTiikyzCv6nc5OAkHPIHMD-ESsP-LFEaY2vVIjV6wqCt&s"
|
| 13 |
widget-header="Bot"
|
| 14 |
widget-subheader="Online"
|
| 15 |
widget-placeholder="Send a message"
|
{innovation_pathfinder_ai/backend/app β app}/templates/react_json_with_memory.py
RENAMED
|
File without changes
|
{innovation_pathfinder_ai/backend/app/core β app/utils}/__init__.py
RENAMED
|
File without changes
|
{innovation_pathfinder_ai/backend/app β app}/utils/adaptive_cards/cards.py
RENAMED
|
File without changes
|
{innovation_pathfinder_ai/backend/app β app}/utils/callback.py
RENAMED
|
File without changes
|
{innovation_pathfinder_ai/backend/app β app}/utils/chains.py
RENAMED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
-
from langchain import LLMChain
|
|
|
|
| 2 |
from langchain_community.llms import HuggingFaceEndpoint
|
| 3 |
|
| 4 |
import re
|
|
|
|
| 1 |
+
from langchain.chains import LLMChain
|
| 2 |
+
from langchain.prompts import PromptTemplate
|
| 3 |
from langchain_community.llms import HuggingFaceEndpoint
|
| 4 |
|
| 5 |
import re
|
{innovation_pathfinder_ai/backend/app β app}/utils/logger.py
RENAMED
|
File without changes
|
{innovation_pathfinder_ai/backend/app β app}/utils/utils.py
RENAMED
|
File without changes
|
{innovation_pathfinder_ai/backend/app β app}/vector_store/chroma_vector_store.py
RENAMED
|
@@ -23,11 +23,13 @@ from langchain_community.embeddings.sentence_transformer import (
|
|
| 23 |
from app.utils.utils import (
|
| 24 |
generate_uuid
|
| 25 |
)
|
| 26 |
-
import
|
| 27 |
-
import
|
|
|
|
| 28 |
|
| 29 |
-
dotenv.load_dotenv()
|
| 30 |
-
persist_directory = os.getenv('VECTOR_DATABASE_LOCATION')
|
|
|
|
| 31 |
|
| 32 |
|
| 33 |
def read_markdown_file(file_path: str) -> str:
|
|
@@ -96,7 +98,8 @@ def add_markdown_to_collection(
|
|
| 96 |
)
|
| 97 |
|
| 98 |
embedding_function = SentenceTransformerEmbeddings(
|
| 99 |
-
model_name=os.getenv("EMBEDDING_MODEL"),
|
|
|
|
| 100 |
)
|
| 101 |
|
| 102 |
documents_page_content:list = [i.page_content for i in splits]
|
|
@@ -178,7 +181,8 @@ def add_pdf_to_vector_store(
|
|
| 178 |
)
|
| 179 |
|
| 180 |
embedding_function = SentenceTransformerEmbeddings(
|
| 181 |
-
model_name=os.getenv("EMBEDDING_MODEL"),
|
|
|
|
| 182 |
)
|
| 183 |
|
| 184 |
documents_page_content:list = [i.page_content for i in split_docs]
|
|
@@ -236,7 +240,8 @@ if __name__ == "__main__":
|
|
| 236 |
|
| 237 |
# create the open-source embedding function
|
| 238 |
embedding_function = SentenceTransformerEmbeddings(
|
| 239 |
-
model_name=os.getenv("EMBEDDING_MODEL"),
|
|
|
|
| 240 |
)
|
| 241 |
|
| 242 |
#method of integrating Chroma and Langchain
|
|
|
|
| 23 |
from app.utils.utils import (
|
| 24 |
generate_uuid
|
| 25 |
)
|
| 26 |
+
from app.core.config import settings
|
| 27 |
+
# import dotenv
|
| 28 |
+
# import os
|
| 29 |
|
| 30 |
+
# dotenv.load_dotenv()
|
| 31 |
+
# persist_directory = os.getenv('VECTOR_DATABASE_LOCATION')
|
| 32 |
+
persist_directory = settings.VECTOR_DATABASE_LOCATION
|
| 33 |
|
| 34 |
|
| 35 |
def read_markdown_file(file_path: str) -> str:
|
|
|
|
| 98 |
)
|
| 99 |
|
| 100 |
embedding_function = SentenceTransformerEmbeddings(
|
| 101 |
+
#model_name=os.getenv("EMBEDDING_MODEL"),
|
| 102 |
+
model_name=settings.EMBEDDING_MODEL
|
| 103 |
)
|
| 104 |
|
| 105 |
documents_page_content:list = [i.page_content for i in splits]
|
|
|
|
| 181 |
)
|
| 182 |
|
| 183 |
embedding_function = SentenceTransformerEmbeddings(
|
| 184 |
+
#model_name=os.getenv("EMBEDDING_MODEL"),
|
| 185 |
+
model_name=settings.EMBEDDING_MODEL
|
| 186 |
)
|
| 187 |
|
| 188 |
documents_page_content:list = [i.page_content for i in split_docs]
|
|
|
|
| 240 |
|
| 241 |
# create the open-source embedding function
|
| 242 |
embedding_function = SentenceTransformerEmbeddings(
|
| 243 |
+
#model_name=os.getenv("EMBEDDING_MODEL"),
|
| 244 |
+
model_name=settings.EMBEDDING_MODEL
|
| 245 |
)
|
| 246 |
|
| 247 |
#method of integrating Chroma and Langchain
|
{innovation_pathfinder_ai/backend/app β app}/vector_store/initialize_chroma_db.py
RENAMED
|
@@ -1,14 +1,17 @@
|
|
| 1 |
from langchain_community.vectorstores import Chroma
|
|
|
|
| 2 |
import chromadb
|
| 3 |
-
import dotenv
|
| 4 |
-
import os
|
| 5 |
|
| 6 |
-
dotenv.load_dotenv()
|
| 7 |
-
persist_directory = os.getenv('VECTOR_DATABASE_LOCATION')
|
|
|
|
| 8 |
|
| 9 |
def initialize_chroma_db() -> Chroma:
|
| 10 |
-
collection_name = os.getenv('CONVERSATION_COLLECTION_NAME')
|
| 11 |
-
|
|
|
|
| 12 |
client = chromadb.PersistentClient(
|
| 13 |
path=persist_directory
|
| 14 |
)
|
|
|
|
| 1 |
from langchain_community.vectorstores import Chroma
|
| 2 |
+
from app.core.config import settings
|
| 3 |
import chromadb
|
| 4 |
+
#import dotenv
|
| 5 |
+
#import os
|
| 6 |
|
| 7 |
+
#dotenv.load_dotenv()
|
| 8 |
+
#persist_directory = os.getenv('VECTOR_DATABASE_LOCATION')
|
| 9 |
+
persist_directory = settings.VECTOR_DATABASE_LOCATION
|
| 10 |
|
| 11 |
def initialize_chroma_db() -> Chroma:
|
| 12 |
+
#collection_name = os.getenv('CONVERSATION_COLLECTION_NAME')
|
| 13 |
+
collection_name = settings.CONVERSATION_COLLECTION_NAME
|
| 14 |
+
|
| 15 |
client = chromadb.PersistentClient(
|
| 16 |
path=persist_directory
|
| 17 |
)
|
{innovation_pathfinder_ai/frontend/assets β assets}/avatar.png
RENAMED
|
File without changes
|
{innovation_pathfinder_ai/frontend/assets β assets}/favicon.ico
RENAMED
|
File without changes
|
innovation_pathfinder_ai/backend/app/utils/__init__.py
DELETED
|
File without changes
|
innovation_pathfinder_ai/frontend/app.py
DELETED
|
@@ -1,143 +0,0 @@
|
|
| 1 |
-
from fastapi import FastAPI
|
| 2 |
-
import gradio as gr
|
| 3 |
-
from gradio.themes.base import Base
|
| 4 |
-
#from innovation_pathfinder_ai.backend.app.api.v1.agents.hf_mixtral_agent import agent_executor
|
| 5 |
-
#from innovation_pathfinder_ai.source_container.container import (
|
| 6 |
-
# all_sources
|
| 7 |
-
#)
|
| 8 |
-
#from innovation_pathfinder_ai.backend.app.utils.utils import extract_urls
|
| 9 |
-
#from innovation_pathfinder_ai.backend.app.utils import logger
|
| 10 |
-
#from innovation_pathfinder_ai.backend.app.vector_store.chroma_vector_store import initialize_chroma_db
|
| 11 |
-
#from innovation_pathfinder_ai.backend.app.utils.utils import (
|
| 12 |
-
# generate_uuid
|
| 13 |
-
#)
|
| 14 |
-
from langchain_community.vectorstores import Chroma
|
| 15 |
-
|
| 16 |
-
import asyncio
|
| 17 |
-
import websockets
|
| 18 |
-
import json
|
| 19 |
-
import dotenv
|
| 20 |
-
import os
|
| 21 |
-
|
| 22 |
-
dotenv.load_dotenv()
|
| 23 |
-
persist_directory = os.getenv('VECTOR_DATABASE_LOCATION')
|
| 24 |
-
|
| 25 |
-
#logger = logger.get_console_logger("app")
|
| 26 |
-
|
| 27 |
-
app = FastAPI()
|
| 28 |
-
|
| 29 |
-
if __name__ == "__main__":
|
| 30 |
-
|
| 31 |
-
#db = initialize_chroma_db()
|
| 32 |
-
|
| 33 |
-
def add_text(history, text):
|
| 34 |
-
history = history + [(text, None)]
|
| 35 |
-
return history, ""
|
| 36 |
-
|
| 37 |
-
def bot(history):
|
| 38 |
-
response = infer(history[-1][0], history)
|
| 39 |
-
# Existing logic remains the same up to the point where you need to call backend operations
|
| 40 |
-
# Example for calling generate_uuid from the backend
|
| 41 |
-
# response = requests.post("http://localhost:8000/add-document")
|
| 42 |
-
#current_id = response.text
|
| 43 |
-
# sources = extract_urls(all_sources)
|
| 44 |
-
# src_list = '\n'.join(sources)
|
| 45 |
-
# current_id = generate_uuid()
|
| 46 |
-
# db.add(
|
| 47 |
-
# ids=[current_id],
|
| 48 |
-
# documents=[response['output']],
|
| 49 |
-
# metadatas=[
|
| 50 |
-
# {
|
| 51 |
-
# "human_message":history[-1][0],
|
| 52 |
-
# "sources": 'Internal Knowledge Base From: \n\n' + src_list
|
| 53 |
-
# }
|
| 54 |
-
# ]
|
| 55 |
-
# )
|
| 56 |
-
# if not sources:
|
| 57 |
-
# response_w_sources = response['output']+"\n\n\n Sources: \n\n\n Internal knowledge base"
|
| 58 |
-
# else:
|
| 59 |
-
# response_w_sources = response['output']+"\n\n\n Sources: \n\n\n"+src_list
|
| 60 |
-
print(response)
|
| 61 |
-
history[-1][1] = response['output']
|
| 62 |
-
# all_sources.clear()
|
| 63 |
-
return history
|
| 64 |
-
|
| 65 |
-
async def ask_question_async(question, history):
|
| 66 |
-
uri = "ws://localhost:8000/chat/agent" # Update this URI to your actual WebSocket endpoint
|
| 67 |
-
async with websockets.connect(uri) as websocket:
|
| 68 |
-
# Prepare the message to send (adjust the structure as needed for your backend)
|
| 69 |
-
message_data = {
|
| 70 |
-
"message": question,
|
| 71 |
-
"history": history
|
| 72 |
-
}
|
| 73 |
-
json_data = json.dumps(message_data)
|
| 74 |
-
await websocket.send(json_data)
|
| 75 |
-
|
| 76 |
-
# Wait for the response
|
| 77 |
-
response_data = await websocket.recv()
|
| 78 |
-
return json.loads(response_data)
|
| 79 |
-
|
| 80 |
-
def infer(question, history):
|
| 81 |
-
# result = agent_executor.invoke(
|
| 82 |
-
# {
|
| 83 |
-
# "input": question,
|
| 84 |
-
# "chat_history": history
|
| 85 |
-
# }
|
| 86 |
-
# )
|
| 87 |
-
# return result
|
| 88 |
-
try:
|
| 89 |
-
# Ensure there's an event loop to run async code
|
| 90 |
-
loop = asyncio.get_event_loop()
|
| 91 |
-
except RuntimeError as ex:
|
| 92 |
-
if "There is no current event loop" in str(ex):
|
| 93 |
-
loop = asyncio.new_event_loop()
|
| 94 |
-
asyncio.set_event_loop(loop)
|
| 95 |
-
|
| 96 |
-
result = loop.run_until_complete(ask_question_async(question, history))
|
| 97 |
-
return result
|
| 98 |
-
|
| 99 |
-
# Run the asynchronous function in the synchronous context
|
| 100 |
-
result = asyncio.get_event_loop().run_until_complete(ask_question_async(question, history))
|
| 101 |
-
return result
|
| 102 |
-
|
| 103 |
-
def vote(data: gr.LikeData):
|
| 104 |
-
if data.liked:
|
| 105 |
-
print("You upvoted this response: " + data.value)
|
| 106 |
-
else:
|
| 107 |
-
print("You downvoted this response: " + data.value)
|
| 108 |
-
|
| 109 |
-
css="""
|
| 110 |
-
#col-container {max-width: 700px; margin-left: auto; margin-right: auto;}
|
| 111 |
-
"""
|
| 112 |
-
|
| 113 |
-
title = """
|
| 114 |
-
<div style="text-align:left;">
|
| 115 |
-
<p>Hello Human, I am your AI knowledge research assistant. I can explore topics across ArXiv, Wikipedia and use Google search.<br />
|
| 116 |
-
</div>
|
| 117 |
-
"""
|
| 118 |
-
|
| 119 |
-
with gr.Blocks(theme=gr.themes.Soft(), title="AlfredAI - AI Knowledge Research Assistant") as demo:
|
| 120 |
-
# with gr.Tab("Google|Wikipedia|Arxiv"):
|
| 121 |
-
with gr.Column(elem_id="col-container"):
|
| 122 |
-
gr.HTML(title)
|
| 123 |
-
with gr.Row():
|
| 124 |
-
question = gr.Textbox(label="Question",
|
| 125 |
-
placeholder="Type your question and hit Enter",)
|
| 126 |
-
chatbot = gr.Chatbot([],
|
| 127 |
-
elem_id="AI Assistant",
|
| 128 |
-
bubble_full_width=False,
|
| 129 |
-
avatar_images=(None, "./assets/avatar.png"),
|
| 130 |
-
height=480,)
|
| 131 |
-
chatbot.like(vote, None, None)
|
| 132 |
-
clear = gr.Button("Clear")
|
| 133 |
-
question.submit(add_text, [chatbot, question], [chatbot, question], queue=False).then(
|
| 134 |
-
bot, chatbot, chatbot
|
| 135 |
-
)
|
| 136 |
-
clear.click(lambda: None, None, chatbot, queue=False)
|
| 137 |
-
with gr.Accordion("Open for More!", open=False):
|
| 138 |
-
gr.Markdown("Nothing yet...")
|
| 139 |
-
|
| 140 |
-
demo.queue().launch(debug=True, favicon_path="assets/favicon.ico", share=True)
|
| 141 |
-
|
| 142 |
-
x = 0 # for debugging purposes
|
| 143 |
-
app = gr.mount_gradio_app(app, demo, path="/")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
innovation_pathfinder_ai/source_container/container.py
DELETED
|
@@ -1 +0,0 @@
|
|
| 1 |
-
all_sources = []
|
|
|
|
|
|