Spaces:

sabazo
/

innoSageAgentOne

Sleeping

App Files Files Community

Asaad Almutareb commited on Apr 2, 2024

Commit

bec8a7b

1 Parent(s): a0df48e

added callback

Browse files

Files changed (4) hide show

innovation_pathfinder_ai/backend/app/api/v1/agents/hf_mixtral_agent.py +6 -4
innovation_pathfinder_ai/backend/app/schemas/message_schema.py +30 -1
innovation_pathfinder_ai/backend/app/utils/callback.py +118 -0
innovation_pathfinder_ai/frontend/app.py +25 -14

innovation_pathfinder_ai/backend/app/api/v1/agents/hf_mixtral_agent.py CHANGED Viewed

@@ -17,6 +17,7 @@ from app.utils import logger
 from app.utils import utils
 from langchain.globals import set_llm_cache
 from langchain.cache import SQLiteCache
 set_llm_cache(SQLiteCache(database_path=".cache.db"))
 logger = logger.get_console_logger("hf_mixtral_agent")
@@ -40,6 +41,7 @@ async def websocket_endpoint(websocket: WebSocket):
         try:
             data = await websocket.receive_json()
             user_message = data["message"]
             # resp = IChatResponse(
             #     sender="you",
@@ -51,9 +53,9 @@ async def websocket_endpoint(websocket: WebSocket):
             # await websocket.send_json(resp.dict())
             message_id: str = utils.generate_uuid()
-            # custom_handler = CustomFinalStreamingStdOutCallbackHandler(
-            #     websocket, message_id=message_id
-            # )
             # Load the model from the Hugging Face Hub
             llm = HuggingFaceEndpoint(repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1",
@@ -106,7 +108,7 @@ async def websocket_endpoint(websocket: WebSocket):
                 handle_parsing_errors=True,
                 )
-            await agent_executor.arun(input=user_message) #, callbacks=[custom_handler]
         except WebSocketDisconnect:
             logger.info("websocket disconnect")
             break

 from app.utils import utils
 from langchain.globals import set_llm_cache
 from langchain.cache import SQLiteCache
+from app.utils.callback import CustomAsyncCallbackHandler
 set_llm_cache(SQLiteCache(database_path=".cache.db"))
 logger = logger.get_console_logger("hf_mixtral_agent")
         try:
             data = await websocket.receive_json()
             user_message = data["message"]
+            chat_history = data["history"]
             # resp = IChatResponse(
             #     sender="you",
             # await websocket.send_json(resp.dict())
             message_id: str = utils.generate_uuid()
+            custom_handler = CustomAsyncCallbackHandler(
+                 websocket, message_id=message_id
+             )
             # Load the model from the Hugging Face Hub
             llm = HuggingFaceEndpoint(repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1",
                 handle_parsing_errors=True,
                 )
+            await agent_executor.arun(input=user_message, chat_history=chat_history, callbacks=[custom_handler])
         except WebSocketDisconnect:
             logger.info("websocket disconnect")
             break

innovation_pathfinder_ai/backend/app/schemas/message_schema.py CHANGED Viewed

@@ -1,5 +1,7 @@
-from pydantic import BaseModel
 from typing import List, Tuple, Optional
 class InferRequest(BaseModel):
     question: str
@@ -8,3 +10,30 @@ class InferRequest(BaseModel):
 class BotRequest(BaseModel):
     history: List[Tuple[str, str]]

+from pydantic import BaseModel, validator
 from typing import List, Tuple, Optional
+from app.utils.utils import generate_uuid
+from typing import Any
 class InferRequest(BaseModel):
     question: str
 class BotRequest(BaseModel):
     history: List[Tuple[str, str]]
+class IChatResponse(BaseModel):
+    """Chat response schema."""
+    id: str
+    message_id: str
+    sender: str
+    message: Any
+    type: str
+    suggested_responses: list[str] = []
+    @validator("id", "message_id", pre=True, allow_reuse=True)
+    def check_ids(cls, v):
+        if v == "" or v is None:
+            return generate_uuid()
+        return v
+    # @validator("sender")
+    # def sender_must_be_bot_or_you(cls, v):
+    #     if v not in ["bot", "you"]:
+    #         raise ValueError("sender must be bot or you")
+    #     return v
+    # @validator("type")
+    # def validate_message_type(cls, v):
+    #     if v not in ["start", "stream", "end", "error", "info"]:
+    #         raise ValueError("type must be start, stream or end")
+    #     return v

innovation_pathfinder_ai/backend/app/utils/callback.py ADDED Viewed

	@@ -0,0 +1,118 @@

+from app.schemas.message_schema import IChatResponse
+from langchain.callbacks.base import AsyncCallbackHandler
+from app.utils.utils import generate_uuid
+from fastapi import WebSocket
+from uuid import UUID
+from typing import Any
+from langchain.schema.agent import AgentFinish
+from langchain.schema.output import LLMResult
+DEFAULT_ANSWER_PREFIX_TOKENS = ["Final", " Answer", ":"]
+class CustomAsyncCallbackHandler(AsyncCallbackHandler):
+    def append_to_last_tokens(self, token: str) -> None:
+        self.last_tokens.append(token)
+        self.last_tokens_stripped.append(token.strip())
+        if len(self.last_tokens) > len(self.answer_prefix_tokens):
+            self.last_tokens.pop(0)
+            self.last_tokens_stripped.pop(0)
+    def check_if_answer_reached(self) -> bool:
+        if self.strip_tokens:
+            return self.last_tokens_stripped == self.answer_prefix_tokens_stripped
+        else:
+            return self.last_tokens == self.answer_prefix_tokens
+    def update_message_id(self, message_id: str = generate_uuid()):
+        self.message_id = message_id
+    def __init__(
+        self,
+        websocket: WebSocket,
+        *,
+        message_id: str = generate_uuid(),
+        answer_prefix_tokens: list[str] | None = None,
+        strip_tokens: bool = True,
+        stream_prefix: bool = False,
+    ) -> None:
+        """Instantiate FinalStreamingStdOutCallbackHandler.
+        Args:
+            answer_prefix_tokens: Token sequence that prefixes the answer.
+                Default is ["Final", "Answer", ":"]
+            strip_tokens: Ignore white spaces and new lines when comparing
+                answer_prefix_tokens to last tokens? (to determine if answer has been
+                reached)
+            stream_prefix: Should answer prefix itself also be streamed?
+        """
+        self.websocket: WebSocket = websocket
+        self.message_id: str = message_id
+        self.text: str = ""
+        self.started: bool = False
+        if answer_prefix_tokens is None:
+            self.answer_prefix_tokens = DEFAULT_ANSWER_PREFIX_TOKENS
+        else:
+            self.answer_prefix_tokens = answer_prefix_tokens
+        if strip_tokens:
+            self.answer_prefix_tokens_stripped = [
+                token.strip() for token in self.answer_prefix_tokens
+            ]
+        else:
+            self.answer_prefix_tokens_stripped = self.answer_prefix_tokens
+        self.last_tokens = [""] * len(self.answer_prefix_tokens)
+        self.last_tokens_stripped = [""] * len(self.answer_prefix_tokens)
+        self.strip_tokens = strip_tokens
+        self.stream_prefix = stream_prefix
+        self.answer_reached = False
+    async def on_llm_start(
+        self, serialized: dict[str, Any], prompts: list[str], **kwargs: Any
+    ) -> None:
+        """Run when LLM starts running."""
+        resp = IChatResponse(
+            id="",
+            message_id=self.message_id,
+            sender="bot",
+            message=self.loading_card.to_dict(),
+            type="start",
+        )
+        await self.websocket.send_json(resp.dict())
+    async def on_llm_new_token(self, token: str, **kwargs: Any) -> None:
+        """Run on new LLM token. Only available when streaming is enabled."""
+        # Remember the last n tokens, where n = len(answer_prefix_tokens)
+        self.append_to_last_tokens(token)
+        self.text += f"{token}"
+        resp = IChatResponse(
+            # id=generate_uuid(),
+            id="",
+            message_id=self.message_id,
+            sender="bot",
+            message=self.adaptive_card.to_dict(),
+            type="stream",
+        )
+        await self.websocket.send_json(resp.dict())
+    async def on_llm_end(
+        self,
+        response: LLMResult,
+        *,
+        run_id: UUID,
+        parent_run_id: UUID | None = None,
+        tags: list[str] | None = None,
+        **kwargs: Any,
+    ) -> None:
+        """Run when LLM ends running."""
+        resp = IChatResponse(
+            id="",
+            message_id=self.message_id,
+            sender="bot",
+            message=self.adaptive_card.to_dict(),
+            type="end",
+        )
+        await self.websocket.send_json(resp.dict())

innovation_pathfinder_ai/frontend/app.py CHANGED Viewed

@@ -60,6 +60,20 @@ if __name__ == "__main__":
         history[-1][1] = response['output']
        # all_sources.clear()
         return history
     def infer(question, history):
         # result = agent_executor.invoke(
@@ -69,19 +83,16 @@ if __name__ == "__main__":
         #     }
         # )
         # return result
-        async def ask_question_async(question, history):
-            uri = "ws://localhost:8000/chat/agent"  # Update this URI to your actual WebSocket endpoint
-            async with websockets.connect(uri) as websocket:
-                # Prepare the message to send (adjust the structure as needed for your backend)
-                message_data = {
-                    "message": question,
-                    "history": history
-                }
-                await websocket.send(json.dumps(message_data))
-                # Wait for the response
-                response_data = await websocket.recv()
-                return json.loads(response_data)
         # Run the asynchronous function in the synchronous context
         result = asyncio.get_event_loop().run_until_complete(ask_question_async(question, history))
@@ -113,7 +124,7 @@ if __name__ == "__main__":
                 chatbot = gr.Chatbot([],
                                      elem_id="AI Assistant",
                                      bubble_full_width=False,
-                                     avatar_images=(None, "./innovation_pathfinder_ai/assets/avatar.png"),
                                      height=480,)
                 chatbot.like(vote, None, None)
                 clear = gr.Button("Clear")

         history[-1][1] = response['output']
        # all_sources.clear()
         return history
+    async def ask_question_async(question, history):
+        uri = "ws://localhost:8000/chat/agent"  # Update this URI to your actual WebSocket endpoint
+        async with websockets.connect(uri) as websocket:
+            # Prepare the message to send (adjust the structure as needed for your backend)
+            message_data = {
+                "message": question,
+                "history": history
+            }
+            await websocket.send(json.dumps(message_data))
+            # Wait for the response
+            response_data = await websocket.recv()
+            return json.loads(response_data)
     def infer(question, history):
         # result = agent_executor.invoke(
         #     }
         # )
         # return result
+        try:
+            # Ensure there's an event loop to run async code
+            loop = asyncio.get_event_loop()
+        except RuntimeError as ex:
+            if "There is no current event loop" in str(ex):
+                loop = asyncio.new_event_loop()
+                asyncio.set_event_loop(loop)
+        result = loop.run_until_complete(ask_question_async(question, history))
+        return result
         # Run the asynchronous function in the synchronous context
         result = asyncio.get_event_loop().run_until_complete(ask_question_async(question, history))
                 chatbot = gr.Chatbot([],
                                      elem_id="AI Assistant",
                                      bubble_full_width=False,
+                                     avatar_images=(None, "./assets/avatar.png"),
                                      height=480,)
                 chatbot.like(vote, None, None)
                 clear = gr.Button("Clear")