File size: 4,527 Bytes
1c330f9
 
 
 
 
 
 
a7a264b
 
1c330f9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a7a264b
 
 
 
 
 
 
 
 
 
 
1c330f9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a7a264b
 
 
 
1c330f9
 
 
 
 
 
 
 
 
a7a264b
1c330f9
 
 
 
 
 
 
 
 
a7a264b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import os
from langchain_openai import OpenAIEmbeddings
from pinecone import Pinecone, ServerlessSpec
from langchain_pinecone import PineconeVectorStore
import chainlit as cl
from dotenv import load_dotenv
from openai import AsyncOpenAI
import socketio
from fastapi import FastAPI

# Load environment variables
load_dotenv()

# Initialize Pinecone
pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"), environment=os.getenv("PINECONE_ENVIRONMENT"))

index_name = "tg-handbook-index"
if index_name not in pc.list_indexes().names():
    pc.create_index(
        name=index_name,
        dimension=1536,
        metric='cosine',
        spec=ServerlessSpec(
            cloud='aws',
            region='us-east-1'
        )
    )
index = pc.Index(index_name)

embeddings = OpenAIEmbeddings(openai_api_key=os.getenv("OPENAI_API_KEY"))

# Instrument the OpenAI client
cl.instrument_openai()
client = AsyncOpenAI()

settings = {
    "model": "gpt-4o",
    "temperature": 0,
}

# Initialize Socket.IO server with custom ping timeout and interval
sio = socketio.AsyncServer(
    async_mode='asgi',
    ping_timeout=5,    # Time in seconds before a ping is considered timed out
    ping_interval=25   # Time in seconds between pings
)

# Initialize FastAPI app and mount the Socket.IO app
app = FastAPI()
app.mount('/', socketio.ASGIApp(sio))

# Welcome message
welcome_message = """

Hello! I am the Teagasc Staff Handbook helper, a large language model trained to assist you with information from the Teagasc Staff Handbook.

You can ask me about policies, procedures, or any other information contained in the handbook.

"""

@cl.on_chat_start
async def main():
    msg = cl.Message(content=welcome_message)
    await msg.send()

# Prompt template for the chatbot
prompt_template = """

Your name is Teagasc Staff Handbook helper, a large language model trained by Teagasc based on the GPT-4o architecture.

Your role is to respond to the user's queries from the staff handbook.

Use the following documents to answer the user's query. Always include the sources of the documents in your response.

If the user query is not about the staff handbook, refuse to answer. The answers should be concise and accurate.

If the answer is not referenced in the {context} refuse answer.

Current date: May 2024-05-30.



Context: {context}



User Query: {query}



Sources: {sources}

"""

@cl.on_message
async def on_message(message: cl.Message):
    # Set up the Pinecone vector store with the existing index and embeddings
    docsearch = PineconeVectorStore(
        index=index,
        embedding=embeddings,
        namespace=None
    )

    try:
        # Perform similarity search with the query
        docs = docsearch.similarity_search(message.content)
        print(f"Found {len(docs)} documents.")
        for doc in docs:
            print(f"Document metadata: {doc.metadata}")
    except KeyError as ke:
        await cl.Message(content=f"Session error: {str(ke)}").send()
        print(f"Session error: {str(ke)}")
        return
    except Exception as e:
        await cl.Message(content=f"Error during similarity search: {str(e)}").send()
        print(f"Error during similarity search: {str(e)}")
        return

    if not docs:
        await cl.Message(content="No relevant documents found.").send()
        return

    # Extract the content and source URL from the matching documents
    context = "\n\n".join([doc.metadata.get("page_content", "No content") for doc in docs])
    sources = "\n".join([doc.metadata.get("source_url", "Unknown source") for doc in docs])

    if not context.strip():
        await cl.Message(content="Found documents but they contain no content.").send()
        return

    prompt = prompt_template.format(query=message.content, context=context, sources=sources)

    try:
        response = await client.chat.completions.create(
            messages=[
                {
                    "content": prompt,
                    "role": "system"
                },
                {
                    "content": message.content,
                    "role": "user"
                }
            ],
            **settings
        )
        await cl.Message(content=response.choices[0].message.content).send()
    except Exception as e:
        await cl.Message(content=f"Error generating response: {str(e)}").send()
        print(f"Error generating response: {str(e)}")