File size: 22,190 Bytes
c6a40ea
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3ac7431
c6a40ea
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6bbb2b6
 
c6a40ea
 
 
d963816
c6a40ea
 
 
d963816
c6a40ea
d963816
c6a40ea
d963816
c6a40ea
 
6bbb2b6
c6a40ea
 
 
 
d963816
 
 
c6a40ea
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85772cd
ec9c116
 
 
 
 
b7c93f0
 
 
ec9c116
b7c93f0
ec9c116
 
b7c93f0
ec9c116
b7c93f0
 
ec9c116
 
85772cd
 
 
 
cc7f7be
c8f5917
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cc7f7be
09ee3a9
c6a40ea
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
# app.py
import json
import os
import sys
import time
import logging
from pathlib import Path
from typing import Any, Dict, List

from neo4j import GraphDatabase
from openai import OpenAI
import subprocess

import time

import requests
import zipfile
import gradio as gr

# Import dotenv to load environment variables from a .env file
from dotenv import load_dotenv

# Load environment variables from a .env file
load_dotenv()

# -----------------------------
# Configuration
# -----------------------------
# Use os.getenv to load environment variables
OPENAI_API_KEY   = os.getenv("OPENAI_API_KEY")
NEO4J_URI        = os.getenv("NEO4J_URI")
NEO4J_USER       = os.getenv("NEO4J_USER")
NEO4J_PASSWORD   = os.getenv("NEO4J_PASSWORD")
NEO4J_DATABASE   = os.getenv("NEO4J_DATABASE") # Use a default if not set

CHAT_MODEL = os.getenv("CHAT_MODEL", " gpt-o1") # Default to gpt-4o-mini if not set
EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL", "text-embedding-3-small") # Default if not set

# -----------------------------
# Logging setup
# -----------------------------
# Basic logging configuration for the application
logger = logging.getLogger("proc-assistant")
handler = logging.StreamHandler(sys.stdout)
fmt = logging.Formatter(
    "%(asctime)s | %(levelname)s | %(name)s | %(message)s",
    datefmt="%Y-%m-%d %H:%M:%S",
)
handler.setFormatter(fmt)
# Clear existing handlers to avoid duplicate logs in some environments
if logger.hasHandlers():
    logger.handlers.clear()
logger.addHandler(handler)
logger.setLevel(logging.INFO)

# -----------------------------
# SYSTEM_PROMPT
# -----------------------------
SYSTEM_PROMPT = """

You are a Signavio Repository assistant and trying to answer questions based on a repository
which can be accessed via cypher queries.

You have access to the following tools:
- get_nodes()
- get_index()
- submit_cypher()


1. Always first call the functions - get_nodes() get_index()
so that you have a complete list of nodes and full text index.

2. Answer questions by creating cypher queries you submit over the tool submit_cypher()

3. Always query the name and Description of a node. 

4. Where an index for the node exists, create a query using a fulltext search

5. Always request related objects using a generic query 'match (n)-[]-(m)' to retrieve all nodes 'm' related to the node 'n'

6. If no responses are found (empty results), retry with a broader or parent term (e.g., "procurement" if "indirect
     procurement" fails).
 
7. Use multiple function calls if several processes are needed.

8. Use only the returned results to answer and always cite the nodes names and Type.

9. If results are poor, ask the user to clarify, especially to clarify which node
supports answering the question best.

10. Inform the user the exact cypher query you submit.


"""

# -----------------------------
# Tool implementation / get_nodes
# -----------------------------
def get_nodes() -> str:
    """
    Returns a list of all node labels in the database.
    """
    logger.info("Getting all node labels.")
    cypher = "MATCH (n) RETURN DISTINCT labels(n) AS labels"
    try:
        with driver.session(database=NEO4J_DATABASE) as sess:
            result = [label for record in sess.run(cypher) for label in record["labels"]]
            logger.info("Retrieved %d distinct node labels.", len(result))
            return json.dumps(result, ensure_ascii=False)
    except Exception as e:
        logger.error("Error getting node labels: %s", e)
        return json.dumps({"error": str(e)}, ensure_ascii=False)

# -----------------------------
# Tool implementation / get_relations
# -----------------------------
def get_relations() -> str:
    """
    Returns a list of all relationship types in the database.
    """
    logger.info("Getting all relationship types.")
    cypher = "MATCH ()-[r]-() RETURN DISTINCT type(r) AS type"
    try:
        with driver.session(database=NEO4J_DATABASE) as sess:
            result = [record["type"] for record in sess.run(cypher)]
            logger.info("Retrieved %d distinct relationship types.", len(result))
            return json.dumps(result, ensure_ascii=False)
    except Exception as e:
        logger.error("Error getting relationship types: %s", e)
        return json.dumps({"error": str(e)}, ensure_ascii=False)

# -----------------------------
# Tool implementation / get_index
# -----------------------------
def get_index() -> str:
    """
    Returns a list of all fulltext index names in the database.
    """
    logger.info("Getting all fulltext index names.")
    cypher = "SHOW FULLTEXT INDEXES YIELD name"
    try:
        with driver.session(database=NEO4J_DATABASE) as sess:
            result = [record["name"] for record in sess.run(cypher)]
            logger.info("Retrieved %d fulltext index names.", len(result))
            return json.dumps(result, ensure_ascii=False)
    except Exception as e:
        logger.error("Error getting fulltext index names: %s", e)
        return json.dumps({"error": str(e)}, ensure_ascii=False)

# -----------------------------
# Tool implementation / submit_cypher
# -----------------------------
def submit_cypher(cypher_query: str) -> str:
    """
    Executes a Cypher query against the Neo4j database and returns the results
    as a JSON string.
    """
    logger.info("Executing Cypher query: %s", cypher_query)
    try:
        with driver.session(database=NEO4J_DATABASE) as sess:
            result = sess.run(cypher_query).data()
            logger.info("Cypher query returned %d rows.", len(result))
            return json.dumps(result, ensure_ascii=False)
    except Exception as e:
        logger.error("Error executing Cypher query: %s", e)
        return json.dumps({"error": str(e)}, ensure_ascii=False)


# -----------------------------
# Tools definition
# -----------------------------
TOOLS = [{
    "type": "function",
    "function": {
        "name": "get_nodes",
        "description": "Returns a list of all node labels in the database.",
        "parameters": {
            "type": "object",
            "properties": {},
            "required": [],
            "additionalProperties": False
        }
    }
},
         {
    "type": "function",
    "function": {
        "name": "get_relations",
        "description": "Returns a list of all relationship types in the database.",
        "parameters": {
            "type": "object",
            "properties": {},
            "required": [],
            "additionalProperties": False
        }
    }
},
         {
    "type": "function",
    "function": {
        "name": "get_index",
        "description": "Returns a list of all fulltext index names in the database.",
        "parameters": {
            "type": "object",
            "properties": {},
            "required": [],
            "additionalProperties": False
        }
    }
},
         {
    "type": "function",
    "function": {
        "name": "submit_cypher",
        "description": "Executes a Cypher query against the Neo4j database and returns the results as a JSON string.",
        "parameters": {
            "type": "object",
            "properties": {
                "cypher_query": {
                    "type": "string",
                    "description": "The Cypher query to execute."
                }
            },
            "required": ["cypher_query"],
            "additionalProperties": False
        }
    }
}]

# -----------------------------
# Chatbot assistant function
# -----------------------------
def assistant_reply(client: OpenAI, user_query: str, history: list[dict] = []) -> str:
    """
    Provides a conversational response to the user query using the OpenAI Chat API,
    optionally calling tools to get relevant information from Neo4j.

    Args:
        client: The initialized OpenAI client object.
        user_query: The user's question or request.
        history: List of previous message dictionaries in the conversation,
                 formatted as [{"role": "user", "content": "..."}, {"role": "assistant", "content": "..."}, ...].

    Returns:
        The assistant's reply as a string.
    """
    logger.info("assistant_reply received user_query: %r", user_query)
    # Log history carefully to avoid excessive output with large histories
    if len(history) < 20: # Log full history if relatively small
        logger.info("assistant_reply received history: %r", history)
    else: # Log summary if history is longer
        logger.info("assistant_reply received history length: %d", len(history))
        logger.info("assistant_reply first 10 history items: %r", history[:10])
        logger.info("assistant_reply last 10 history items: %r", history[-10:])


    # Construct the initial messages list including system prompt and the provided history
    messages = [{"role": "system", "content": SYSTEM_PROMPT}]
    # Extend messages with the received history (already in the correct role/content dict format from chatbot_interface)
    messages.extend(history)
    # Append the current user query as the latest user message dictionary
    messages.append({"role": "user", "content": user_query})

    logger.info("Messages prepared for OpenAI API (first call): %r", messages)


    try:
        # First API call: Get assistant's response or tool call
        logger.info("Calling OpenAI chat completion (first call) with model: %s", CHAT_MODEL)
        response = client.chat.completions.create(
            model=CHAT_MODEL,
            messages=messages,
            tools=TOOLS,
            tool_choice="auto",
        )
        response_message = response.choices[0].message
        logger.info("OpenAI response (first call) message object: %s", response_message) # Log the full message object


        # Handle tool calls
        if response_message.tool_calls:
            logger.info("Tool calls detected: %s", response_message.tool_calls)
            available_functions = {
                "get_nodes": get_nodes,
                "get_relations": get_relations,
                "get_index": get_index,
                "submit_cypher": submit_cypher,
            }

            # Append the assistant's message with tool_calls to the messages list
            # This message object returned by client.chat.completions.create is usually directly compatible
            # with the 'messages' format for the subsequent API call in a tool-using turn.
            messages.append(response_message)
            logger.info("Messages after appending assistant tool_calls message: %r", messages)

            # Execute tool calls and append results
            for tool_call in response_message.tool_calls:
                function_name = tool_call.function.name
                # Ensure the function name exists in available_functions
                function_to_call = available_functions.get(function_name)
                if function_to_call:
                    try:
                        # Parse arguments from the tool call message
                        function_args = json.loads(tool_call.function.arguments)
                        logger.info("Parsed function arguments: %r", function_args)

                        # Call the tool function
                        logger.info("Calling tool function: %s with args: %r", function_name, function_args)
                        # Ensure tool functions return a string or JSON string as content
                        # Adjust how arguments are passed based on the function's signature
                        if function_name == "submit_cypher":
                             function_response_content = function_to_call(
                                function_args.get("cypher_query")
                            )
                        else: # For get_nodes, get_relations, get_index which take no arguments
                             function_response_content = function_to_call()


                        logger.info("Tool function response content (first 500 chars): %s", function_response_content[:500])

                        # Append tool output message to the messages list in the correct format
                        # Role must be 'tool', content is the string output, and must include tool_call_id and name
                        messages.append(
                            {
                                "tool_call_id": tool_call.id, # Required for tool response messages
                                "role": "tool",
                                "content": function_response_content, # Content must be a string
                                "name": function_name # Required for tool response messages
                            }
                        )
                        logger.info("Appended tool output message: %r", messages[-1])

                    except json.JSONDecodeError:
                        logger.error("Error decoding function arguments JSON: %s", tool_call.function.arguments)
                        # Append an error message as tool output if args are invalid
                        messages.append(
                            {
                                "tool_call_id": tool_call.id,
                                "role": "tool",
                                "content": f"Error: Invalid JSON arguments for tool '{function_name}'.",
                                "name": function_name
                            }
                        )
                        logger.info("Appended tool invalid args error message: %r", messages[-1])

                    except Exception as e:
                        logger.error("Error executing tool '%s': %s", function_name, e)
                        logger.exception("Tool execution traceback:")
                        # Append an error message as tool output
                        messages.append(
                            {
                                "tool_call_id": tool_call.id,
                                "role": "tool",
                                "content": f"Error executing tool '{function_name}': {e}", # Content as string
                                "name": function_name
                            }
                        )
                        logger.info("Appended tool execution error message: %r", messages[-1])

                else:
                    logger.warning("Function '%s' called by model not found in available_functions", function_name)
                    # If the model hallucinates a tool call, append a tool message indicating it wasn't found.
                    messages.append(
                         {
                             "tool_call_id": tool_call.id,
                             "role": "tool",
                             "content": f"Error: Tool '{function_name}' not found.",
                             "name": function_name # Still include the name from the model's call
                         }
                    )
                    logger.info("Appended 'tool not found' message: %r", messages[-1])


            # Second API call: Get final response after tool execution
            logger.info("Calling OpenAI chat completion (second call after tools) with model: %s", CHAT_MODEL)
            logger.info("Messages prepared for OpenAI API (second call): %r", messages)

            second_response = client.chat.completions.create(
                model=CHAT_MODEL,
                messages=messages, # Send the updated messages list including assistant tool_calls and tool outputs
            )
            final_response_message = second_response.choices[0].message
            logger.info("OpenAI response (second call) message object: %s", final_response_message) # Log the full message object

            # The content of the second response is the final assistant reply
            # Ensure the final response content is a string
            final_response = final_response_message.content if final_response_message.content is not None else ""
            logger.info("Final assistant response content (after tool): %s", final_response)
            return final_response

        else:
            # No function call needed, return the initial response content
            logger.info("No tool calls detected. Returning initial response content.")
            # Ensure the initial response content is a string
            initial_response_content = response_message.content if response_message.content is not None else ""
            logger.info("Initial assistant response content: %s", initial_response_content)
            return initial_response_content

    except Exception as e:
        logger.exception("An error occurred during OpenAI chat completion:") # Log the full exception traceback
        # Return a user-friendly error message
        return f"An error occurred while processing your request: {e}" # Include error message for debugging


# -----------------------------
# Clients
# -----------------------------
# Ensure these variables are defined in previous cells or loaded from environment variables
# OPENAI_API_KEY, NEO4J_URI, NEO4J_USER, NEO4J_PASSWORD, NEO4J_DATABASE

# Initialize clients using environment variables
try:
    client = OpenAI(api_key=OPENAI_API_KEY)
    driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD))
    logger.info("Clients (OpenAI and Neo4j) initialized successfully.")
except Exception as e:
    logger.error("Error initializing clients: %s", e)
    # Depending on the severity, you might want to exit or handle this differently
    # For a Gradio app, you might want to show an error message on the UI


# -----------------------------
# Gradio Interface
# -----------------------------
# Get the logger for the chatbot interface function as well
logger = logging.getLogger("proc-assistant")

def chatbot_interface(user_query: str, history: list[dict]) -> tuple[list[dict], list[dict]]:
    """
    Wrapper function for assistant_reply to be used with Gradio.
    Manages conversation history. Accepts and returns history in OpenAI messages format.
    """
    logger.info(f"chatbot_interface received history (dict format): {history}")
    logger.info(f"chatbot_interface received user_query: {user_query}")

    # The history is already in the correct format (list[dict]) due to gr.Chatbot type='messages'

    # Get the response from the assistant
    try:
        # Pass the history directly to assistant_reply (it expects list[dict])
        # Pass the client object to assistant_reply
        assistant_response_content = assistant_reply(client, user_query, history)
        logger.info(f"chatbot_interface received assistant_response_content: {assistant_response_content}")
        assistant_message = {"role": "assistant", "content": assistant_response_content}

    except Exception as e:
        logger.exception("Error calling assistant_reply from chatbot_interface:")
        assistant_response_content = "An error occurred while getting the assistant's response. Please check the logs for details." # Provide a fallback message
        assistant_message = {"role": "assistant", "content": assistant_response_content}


    # Append the new interaction (user query and assistant response) to the history
    # History is already in list[dict] format, so we append the new messages
    updated_history = history + [{"role": "user", "content": user_query}, assistant_message]

    logger.info(f"chatbot_interface updated history (dict format): {updated_history}")


    # Return the updated history for the Chatbot component and the state
    # Both need the history in list[dict] format now because gr.Chatbot has type='messages'
    return updated_history, updated_history

# Create the Gradio interface with history components
my_theme = gr.themes.Soft(
    primary_hue="yellow",
    secondary_hue="gray",
    neutral_hue="zinc",
    radius_size="sm",
    spacing_size="md"
).set(
    body_background_fill="#FFFFFF",
    block_background_fill="#FFFFFF",
    block_border_color="#E5E5E5",

    input_background_fill="#FAFAFA",
    input_border_color="#DDDDDD",

    button_primary_background_fill="#F2C200",
    button_primary_text_color="#000000",

    button_secondary_background_fill="#FFFFFF",
    button_secondary_text_color="#111111"
)


with gr.Blocks(theme=my_theme) as iface:
    gr.HTML("""
    <div style="
        display:flex;
        align-items:center;
        gap:20px;
        padding:15px 0;
    ">
        <a href="https://www.bpexperts.de" target="_blank" style="text-decoration:none;">
            <img src='https://images.squarespace-cdn.com/content/v1/62835c73f824d0627cfba7a7/093df9f4-89e4-48f9-8359-2096efe5b65a/Logo_bp_experts_2019.png' 
                alt="bpExperts Logo"
                style="height:70px; object-fit:contain;">
        </a>
        
        <h1 style="
            color:#ffffff;
            margin:0;
            font-weight:600;
            font-size:32px;
        ">
            Business Flows Assistant
        </h1>
    </div>
    """)
    # Use gr.Chatbot with type='messages' to handle history in OpenAI message format
    chatbot = gr.Chatbot(label="Process Assistant Chatbot", type='messages')
    msg = gr.Textbox(label="Ask a question about business processes.")
    clear = gr.ClearButton([msg, chatbot])
    # gr.State is used to maintain the history between interactions
    # Initialize state as an empty list for history in dict format
    state = gr.State([])

    # The submit method connects the input (msg, state) to the function (chatbot_interface)
    # and updates the chatbot and state with the returned history (now in dict format).
    msg.submit(chatbot_interface, inputs=[msg, state], outputs=[chatbot, state])


# -----------------------------
# Launch the Gradio interface
# -----------------------------
# This part will run when the app.py file is executed
if __name__ == "__main__":
    logger.info("Launching Gradio interface.")
    # Use host="0.0.0.0" to make the app accessible externally in a container environment
    # Set share=False for production deployment
    iface.launch(server_name="0.0.0.0", share=False)
    logger.info("Gradio interface launched.")