Spaces:
Sleeping
Sleeping
Asaad Almutareb
commited on
Commit
·
bec8a7b
1
Parent(s):
a0df48e
added callback
Browse files
innovation_pathfinder_ai/backend/app/api/v1/agents/hf_mixtral_agent.py
CHANGED
|
@@ -17,6 +17,7 @@ from app.utils import logger
|
|
| 17 |
from app.utils import utils
|
| 18 |
from langchain.globals import set_llm_cache
|
| 19 |
from langchain.cache import SQLiteCache
|
|
|
|
| 20 |
|
| 21 |
set_llm_cache(SQLiteCache(database_path=".cache.db"))
|
| 22 |
logger = logger.get_console_logger("hf_mixtral_agent")
|
|
@@ -40,6 +41,7 @@ async def websocket_endpoint(websocket: WebSocket):
|
|
| 40 |
try:
|
| 41 |
data = await websocket.receive_json()
|
| 42 |
user_message = data["message"]
|
|
|
|
| 43 |
|
| 44 |
# resp = IChatResponse(
|
| 45 |
# sender="you",
|
|
@@ -51,9 +53,9 @@ async def websocket_endpoint(websocket: WebSocket):
|
|
| 51 |
|
| 52 |
# await websocket.send_json(resp.dict())
|
| 53 |
message_id: str = utils.generate_uuid()
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
|
| 58 |
# Load the model from the Hugging Face Hub
|
| 59 |
llm = HuggingFaceEndpoint(repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1",
|
|
@@ -106,7 +108,7 @@ async def websocket_endpoint(websocket: WebSocket):
|
|
| 106 |
handle_parsing_errors=True,
|
| 107 |
)
|
| 108 |
|
| 109 |
-
await agent_executor.arun(input=user_message
|
| 110 |
except WebSocketDisconnect:
|
| 111 |
logger.info("websocket disconnect")
|
| 112 |
break
|
|
|
|
| 17 |
from app.utils import utils
|
| 18 |
from langchain.globals import set_llm_cache
|
| 19 |
from langchain.cache import SQLiteCache
|
| 20 |
+
from app.utils.callback import CustomAsyncCallbackHandler
|
| 21 |
|
| 22 |
set_llm_cache(SQLiteCache(database_path=".cache.db"))
|
| 23 |
logger = logger.get_console_logger("hf_mixtral_agent")
|
|
|
|
| 41 |
try:
|
| 42 |
data = await websocket.receive_json()
|
| 43 |
user_message = data["message"]
|
| 44 |
+
chat_history = data["history"]
|
| 45 |
|
| 46 |
# resp = IChatResponse(
|
| 47 |
# sender="you",
|
|
|
|
| 53 |
|
| 54 |
# await websocket.send_json(resp.dict())
|
| 55 |
message_id: str = utils.generate_uuid()
|
| 56 |
+
custom_handler = CustomAsyncCallbackHandler(
|
| 57 |
+
websocket, message_id=message_id
|
| 58 |
+
)
|
| 59 |
|
| 60 |
# Load the model from the Hugging Face Hub
|
| 61 |
llm = HuggingFaceEndpoint(repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1",
|
|
|
|
| 108 |
handle_parsing_errors=True,
|
| 109 |
)
|
| 110 |
|
| 111 |
+
await agent_executor.arun(input=user_message, chat_history=chat_history, callbacks=[custom_handler])
|
| 112 |
except WebSocketDisconnect:
|
| 113 |
logger.info("websocket disconnect")
|
| 114 |
break
|
innovation_pathfinder_ai/backend/app/schemas/message_schema.py
CHANGED
|
@@ -1,5 +1,7 @@
|
|
| 1 |
-
from pydantic import BaseModel
|
| 2 |
from typing import List, Tuple, Optional
|
|
|
|
|
|
|
| 3 |
|
| 4 |
class InferRequest(BaseModel):
|
| 5 |
question: str
|
|
@@ -8,3 +10,30 @@ class InferRequest(BaseModel):
|
|
| 8 |
class BotRequest(BaseModel):
|
| 9 |
history: List[Tuple[str, str]]
|
| 10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pydantic import BaseModel, validator
|
| 2 |
from typing import List, Tuple, Optional
|
| 3 |
+
from app.utils.utils import generate_uuid
|
| 4 |
+
from typing import Any
|
| 5 |
|
| 6 |
class InferRequest(BaseModel):
|
| 7 |
question: str
|
|
|
|
| 10 |
class BotRequest(BaseModel):
|
| 11 |
history: List[Tuple[str, str]]
|
| 12 |
|
| 13 |
+
class IChatResponse(BaseModel):
|
| 14 |
+
"""Chat response schema."""
|
| 15 |
+
|
| 16 |
+
id: str
|
| 17 |
+
message_id: str
|
| 18 |
+
sender: str
|
| 19 |
+
message: Any
|
| 20 |
+
type: str
|
| 21 |
+
suggested_responses: list[str] = []
|
| 22 |
+
|
| 23 |
+
@validator("id", "message_id", pre=True, allow_reuse=True)
|
| 24 |
+
def check_ids(cls, v):
|
| 25 |
+
if v == "" or v is None:
|
| 26 |
+
return generate_uuid()
|
| 27 |
+
return v
|
| 28 |
+
|
| 29 |
+
# @validator("sender")
|
| 30 |
+
# def sender_must_be_bot_or_you(cls, v):
|
| 31 |
+
# if v not in ["bot", "you"]:
|
| 32 |
+
# raise ValueError("sender must be bot or you")
|
| 33 |
+
# return v
|
| 34 |
+
|
| 35 |
+
# @validator("type")
|
| 36 |
+
# def validate_message_type(cls, v):
|
| 37 |
+
# if v not in ["start", "stream", "end", "error", "info"]:
|
| 38 |
+
# raise ValueError("type must be start, stream or end")
|
| 39 |
+
# return v
|
innovation_pathfinder_ai/backend/app/utils/callback.py
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from app.schemas.message_schema import IChatResponse
|
| 2 |
+
from langchain.callbacks.base import AsyncCallbackHandler
|
| 3 |
+
from app.utils.utils import generate_uuid
|
| 4 |
+
from fastapi import WebSocket
|
| 5 |
+
from uuid import UUID
|
| 6 |
+
from typing import Any
|
| 7 |
+
from langchain.schema.agent import AgentFinish
|
| 8 |
+
from langchain.schema.output import LLMResult
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
DEFAULT_ANSWER_PREFIX_TOKENS = ["Final", " Answer", ":"]
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class CustomAsyncCallbackHandler(AsyncCallbackHandler):
|
| 15 |
+
def append_to_last_tokens(self, token: str) -> None:
|
| 16 |
+
self.last_tokens.append(token)
|
| 17 |
+
self.last_tokens_stripped.append(token.strip())
|
| 18 |
+
if len(self.last_tokens) > len(self.answer_prefix_tokens):
|
| 19 |
+
self.last_tokens.pop(0)
|
| 20 |
+
self.last_tokens_stripped.pop(0)
|
| 21 |
+
|
| 22 |
+
def check_if_answer_reached(self) -> bool:
|
| 23 |
+
if self.strip_tokens:
|
| 24 |
+
return self.last_tokens_stripped == self.answer_prefix_tokens_stripped
|
| 25 |
+
else:
|
| 26 |
+
return self.last_tokens == self.answer_prefix_tokens
|
| 27 |
+
|
| 28 |
+
def update_message_id(self, message_id: str = generate_uuid()):
|
| 29 |
+
self.message_id = message_id
|
| 30 |
+
|
| 31 |
+
def __init__(
|
| 32 |
+
self,
|
| 33 |
+
websocket: WebSocket,
|
| 34 |
+
*,
|
| 35 |
+
message_id: str = generate_uuid(),
|
| 36 |
+
answer_prefix_tokens: list[str] | None = None,
|
| 37 |
+
strip_tokens: bool = True,
|
| 38 |
+
stream_prefix: bool = False,
|
| 39 |
+
) -> None:
|
| 40 |
+
"""Instantiate FinalStreamingStdOutCallbackHandler.
|
| 41 |
+
|
| 42 |
+
Args:
|
| 43 |
+
answer_prefix_tokens: Token sequence that prefixes the answer.
|
| 44 |
+
Default is ["Final", "Answer", ":"]
|
| 45 |
+
strip_tokens: Ignore white spaces and new lines when comparing
|
| 46 |
+
answer_prefix_tokens to last tokens? (to determine if answer has been
|
| 47 |
+
reached)
|
| 48 |
+
stream_prefix: Should answer prefix itself also be streamed?
|
| 49 |
+
"""
|
| 50 |
+
self.websocket: WebSocket = websocket
|
| 51 |
+
self.message_id: str = message_id
|
| 52 |
+
self.text: str = ""
|
| 53 |
+
self.started: bool = False
|
| 54 |
+
|
| 55 |
+
if answer_prefix_tokens is None:
|
| 56 |
+
self.answer_prefix_tokens = DEFAULT_ANSWER_PREFIX_TOKENS
|
| 57 |
+
else:
|
| 58 |
+
self.answer_prefix_tokens = answer_prefix_tokens
|
| 59 |
+
if strip_tokens:
|
| 60 |
+
self.answer_prefix_tokens_stripped = [
|
| 61 |
+
token.strip() for token in self.answer_prefix_tokens
|
| 62 |
+
]
|
| 63 |
+
else:
|
| 64 |
+
self.answer_prefix_tokens_stripped = self.answer_prefix_tokens
|
| 65 |
+
self.last_tokens = [""] * len(self.answer_prefix_tokens)
|
| 66 |
+
self.last_tokens_stripped = [""] * len(self.answer_prefix_tokens)
|
| 67 |
+
self.strip_tokens = strip_tokens
|
| 68 |
+
self.stream_prefix = stream_prefix
|
| 69 |
+
self.answer_reached = False
|
| 70 |
+
|
| 71 |
+
async def on_llm_start(
|
| 72 |
+
self, serialized: dict[str, Any], prompts: list[str], **kwargs: Any
|
| 73 |
+
) -> None:
|
| 74 |
+
"""Run when LLM starts running."""
|
| 75 |
+
|
| 76 |
+
resp = IChatResponse(
|
| 77 |
+
id="",
|
| 78 |
+
message_id=self.message_id,
|
| 79 |
+
sender="bot",
|
| 80 |
+
message=self.loading_card.to_dict(),
|
| 81 |
+
type="start",
|
| 82 |
+
)
|
| 83 |
+
await self.websocket.send_json(resp.dict())
|
| 84 |
+
|
| 85 |
+
async def on_llm_new_token(self, token: str, **kwargs: Any) -> None:
|
| 86 |
+
"""Run on new LLM token. Only available when streaming is enabled."""
|
| 87 |
+
# Remember the last n tokens, where n = len(answer_prefix_tokens)
|
| 88 |
+
self.append_to_last_tokens(token)
|
| 89 |
+
|
| 90 |
+
self.text += f"{token}"
|
| 91 |
+
resp = IChatResponse(
|
| 92 |
+
# id=generate_uuid(),
|
| 93 |
+
id="",
|
| 94 |
+
message_id=self.message_id,
|
| 95 |
+
sender="bot",
|
| 96 |
+
message=self.adaptive_card.to_dict(),
|
| 97 |
+
type="stream",
|
| 98 |
+
)
|
| 99 |
+
await self.websocket.send_json(resp.dict())
|
| 100 |
+
|
| 101 |
+
async def on_llm_end(
|
| 102 |
+
self,
|
| 103 |
+
response: LLMResult,
|
| 104 |
+
*,
|
| 105 |
+
run_id: UUID,
|
| 106 |
+
parent_run_id: UUID | None = None,
|
| 107 |
+
tags: list[str] | None = None,
|
| 108 |
+
**kwargs: Any,
|
| 109 |
+
) -> None:
|
| 110 |
+
"""Run when LLM ends running."""
|
| 111 |
+
resp = IChatResponse(
|
| 112 |
+
id="",
|
| 113 |
+
message_id=self.message_id,
|
| 114 |
+
sender="bot",
|
| 115 |
+
message=self.adaptive_card.to_dict(),
|
| 116 |
+
type="end",
|
| 117 |
+
)
|
| 118 |
+
await self.websocket.send_json(resp.dict())
|
innovation_pathfinder_ai/frontend/app.py
CHANGED
|
@@ -60,6 +60,20 @@ if __name__ == "__main__":
|
|
| 60 |
history[-1][1] = response['output']
|
| 61 |
# all_sources.clear()
|
| 62 |
return history
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
|
| 64 |
def infer(question, history):
|
| 65 |
# result = agent_executor.invoke(
|
|
@@ -69,19 +83,16 @@ if __name__ == "__main__":
|
|
| 69 |
# }
|
| 70 |
# )
|
| 71 |
# return result
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
# Wait for the response
|
| 83 |
-
response_data = await websocket.recv()
|
| 84 |
-
return json.loads(response_data)
|
| 85 |
|
| 86 |
# Run the asynchronous function in the synchronous context
|
| 87 |
result = asyncio.get_event_loop().run_until_complete(ask_question_async(question, history))
|
|
@@ -113,7 +124,7 @@ if __name__ == "__main__":
|
|
| 113 |
chatbot = gr.Chatbot([],
|
| 114 |
elem_id="AI Assistant",
|
| 115 |
bubble_full_width=False,
|
| 116 |
-
avatar_images=(None, "./
|
| 117 |
height=480,)
|
| 118 |
chatbot.like(vote, None, None)
|
| 119 |
clear = gr.Button("Clear")
|
|
|
|
| 60 |
history[-1][1] = response['output']
|
| 61 |
# all_sources.clear()
|
| 62 |
return history
|
| 63 |
+
|
| 64 |
+
async def ask_question_async(question, history):
|
| 65 |
+
uri = "ws://localhost:8000/chat/agent" # Update this URI to your actual WebSocket endpoint
|
| 66 |
+
async with websockets.connect(uri) as websocket:
|
| 67 |
+
# Prepare the message to send (adjust the structure as needed for your backend)
|
| 68 |
+
message_data = {
|
| 69 |
+
"message": question,
|
| 70 |
+
"history": history
|
| 71 |
+
}
|
| 72 |
+
await websocket.send(json.dumps(message_data))
|
| 73 |
+
|
| 74 |
+
# Wait for the response
|
| 75 |
+
response_data = await websocket.recv()
|
| 76 |
+
return json.loads(response_data)
|
| 77 |
|
| 78 |
def infer(question, history):
|
| 79 |
# result = agent_executor.invoke(
|
|
|
|
| 83 |
# }
|
| 84 |
# )
|
| 85 |
# return result
|
| 86 |
+
try:
|
| 87 |
+
# Ensure there's an event loop to run async code
|
| 88 |
+
loop = asyncio.get_event_loop()
|
| 89 |
+
except RuntimeError as ex:
|
| 90 |
+
if "There is no current event loop" in str(ex):
|
| 91 |
+
loop = asyncio.new_event_loop()
|
| 92 |
+
asyncio.set_event_loop(loop)
|
| 93 |
+
|
| 94 |
+
result = loop.run_until_complete(ask_question_async(question, history))
|
| 95 |
+
return result
|
|
|
|
|
|
|
|
|
|
| 96 |
|
| 97 |
# Run the asynchronous function in the synchronous context
|
| 98 |
result = asyncio.get_event_loop().run_until_complete(ask_question_async(question, history))
|
|
|
|
| 124 |
chatbot = gr.Chatbot([],
|
| 125 |
elem_id="AI Assistant",
|
| 126 |
bubble_full_width=False,
|
| 127 |
+
avatar_images=(None, "./assets/avatar.png"),
|
| 128 |
height=480,)
|
| 129 |
chatbot.like(vote, None, None)
|
| 130 |
clear = gr.Button("Clear")
|