niddijoris commited on
Commit
c5fe829
·
0 Parent(s):

upload files

Browse files
.DS_Store ADDED
Binary file (6.15 kB). View file
 
.env ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ OPENAI_API_KEY=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
2
+ GITHUB_TOKEN=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
3
+ REPO_NAME=niddijoris/Generative-AI-2025-masters
4
+ PROJECT_FOLDER=Capstone project 1 - RAG
.env.example ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ OPENAI_API_KEY=sk-...
2
+ GITHUB_TOKEN=ghp_...
3
+ REPO_NAME=username/repo
4
+ PROJECT_FOLDER=Capstone2-1.2Antigravity
.gitattributes ADDED
@@ -0,0 +1 @@
 
 
1
+ data/*.pdf filter=lfs diff=lfs merge=lfs -text
agent.py ADDED
@@ -0,0 +1,181 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dotenv import load_dotenv
3
+ from langchain_openai import ChatOpenAI, OpenAIEmbeddings
4
+ from langchain_community.vectorstores import FAISS
5
+ from langchain.agents import AgentExecutor, create_openai_tools_agent
6
+ from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
7
+ from langchain.tools import tool
8
+ from github import Github
9
+ from github import Auth
10
+
11
+ load_dotenv()
12
+
13
+ # Constants / Config
14
+ COMPANY_NAME = "TechFlow Solutions"
15
+ COMPANY_CONTACT = "support@techflow.com | +1-555-0199"
16
+ DB_PATH = "vector_db"
17
+
18
+ def get_vector_store():
19
+ embeddings = OpenAIEmbeddings()
20
+ vector_store = FAISS.load_local(
21
+ DB_PATH,
22
+ embeddings,
23
+ allow_dangerous_deserialization=True
24
+ )
25
+ return vector_store
26
+
27
+ @tool
28
+ def search_knowledge_base(query: str) -> str:
29
+ """
30
+ Search the company knowledge base for answers to user questions.
31
+ Returns relevance of text and citations (source files and page numbers).
32
+ """
33
+ try:
34
+ vector_store = get_vector_store()
35
+ # Use relevance scores (0 to 1, where 1 is best match)
36
+ results = vector_store.similarity_search_with_relevance_scores(query, k=5)
37
+
38
+ response = ""
39
+ relevant_count = 0
40
+
41
+ # Threshold for relevance (0.7 is a reasonable baseline for OpenAI embeddings)
42
+ THRESHOLD = 0.7
43
+
44
+ print(f"\n--- Search Query: '{query}' ---")
45
+ for i, (doc, score) in enumerate(results):
46
+ print(f"Result {i+1}: Score {score:.4f} | Content: {doc.page_content[:50]}...")
47
+
48
+ if score < THRESHOLD:
49
+ print(f" -> FILTERED (Below {THRESHOLD})")
50
+ continue
51
+
52
+ relevant_count += 1
53
+ print(f" -> ACCEPTED")
54
+ if score < THRESHOLD:
55
+ continue
56
+
57
+ relevant_count += 1
58
+ source = doc.metadata.get("source", "Unknown")
59
+ page = doc.metadata.get("page", "Unknown")
60
+ # Extract just the filename from the path
61
+ filename = os.path.basename(source)
62
+
63
+ response += f"--- Result {relevant_count} (Score: {score:.2f}) ---\n"
64
+ response += f"Content: {doc.page_content}\n"
65
+ response += f"Source: {filename}, Page: {page}\n\n"
66
+
67
+ return response if response else "No relevant information found in the knowledge base (all results below threshold)."
68
+ except Exception as e:
69
+ return f"Error searching knowledge base: {str(e)}"
70
+
71
+ class TicketSystem:
72
+ def __init__(self, token, repo_name):
73
+ auth = Auth.Token(token)
74
+ self.g = Github(auth=auth)
75
+ self.repo = self.g.get_repo(repo_name)
76
+
77
+ def create_ticket(self, title, body, project_folder):
78
+ """
79
+ project_folder: Project folder name (e.g. 'Capstone2-1.2Antigravity')
80
+ """
81
+ # 1. Check or create label
82
+ label_name = project_folder.lower().replace("/", "-").replace(" ", "-")
83
+ try:
84
+ self.repo.get_label(label_name)
85
+ except:
86
+ # Create new label (blue)
87
+ self.repo.create_label(name=label_name, color="0075ca")
88
+
89
+ # 2. Decorate title
90
+ full_title = f"[{project_folder}] {title}"
91
+
92
+ # 3. Add details to body
93
+ full_body = f"**Project:** {project_folder}\n\n**Description:**\n{body}"
94
+
95
+ # 4. Create Issue
96
+ new_issue = self.repo.create_issue(
97
+ title=full_title,
98
+ body=full_body,
99
+ labels=[label_name, "customer-support"]
100
+ )
101
+ return new_issue
102
+
103
+ def create_github_issue(summary: str, description: str, user_email: str, user_name: str) -> str:
104
+ token = os.getenv("GITHUB_TOKEN")
105
+ repo_name = os.getenv("REPO_NAME")
106
+ project_folder = os.getenv("PROJECT_FOLDER", "Capstone Project")
107
+
108
+ if not token or not repo_name:
109
+ return "Error: GitHub credentials not configured. Cannot create ticket."
110
+
111
+ try:
112
+ ticket_system = TicketSystem(token, repo_name)
113
+
114
+ # Combine user details into the body description
115
+ full_description = f"**User Name:** {user_name}\n**User Email:** {user_email}\n\n{description}"
116
+
117
+ issue = ticket_system.create_ticket(
118
+ title=summary,
119
+ body=full_description,
120
+ project_folder=project_folder
121
+ )
122
+
123
+ return f"Ticket created successfully! Ticket ID: #{issue.number}. Link: {issue.html_url}"
124
+ except Exception as e:
125
+ return f"Error creating ticket: {str(e)}"
126
+
127
+ @tool
128
+ def create_support_ticket(summary: str, description: str, user_email: str, user_name: str) -> str:
129
+ """
130
+ Create a support ticket (GitHub Issue) for the user.
131
+ Use this when the knowledge base doesn't have the answer or the user explicitly asks to raise a ticket.
132
+ Include all details: user name, email, issue summary and full description.
133
+ """
134
+ return create_github_issue(summary, description, user_email, user_name)
135
+
136
+ def create_agent():
137
+ llm = ChatOpenAI(model="gpt-4o", temperature=0)
138
+
139
+ tools = [search_knowledge_base, create_support_ticket]
140
+
141
+ system_prompt = f"""You are a helpful and professional customer support agent for {COMPANY_NAME}.
142
+
143
+ Company Contact Info: {COMPANY_CONTACT}
144
+
145
+ Your goal is to assist users with their questions using the available tools.
146
+
147
+ GUIDELINES:
148
+ 1. **ALWAYS SEARCH**: You MUST use the `search_knowledge_base` tool for **EVERY** user message, even if it looks like a typo, gibberish, or nonsense.
149
+ - **Reason**: The search tool has internal logic to handle/reject irrelevant queries. You must let it run.
150
+ - **Do not** simply reply "It seems like a typo" without calling the tool first.
151
+ 2. **Intent**: If you can infer a valid term (e.g. "solutiun" -> "Solution"), search for the corrected term. If it is total gibberish, search for the gibberish exactly.
152
+ 2. **Comprehensive Synthesis**: Use the provided search results to answer the user's question.
153
+ - **Summarize ALL chunks**: You must synthesize information from ALL relevant chunks provided by the search tool.
154
+ - **Proactive Answering**: If exact matches aren't found, define related concepts (e.g., Software Architecture for Solution Architecture) found in the text.
155
+ - **NEVER** refuse to answer if there is ANY retrieved text that is even remotely technical or relevant.
156
+ 3. **MANDATORY CITATIONS**: You MUST list **ALL** source citations found in the search results at the end of your response.
157
+ - Even if you summarize multiple chunks, list every unique source/page used.
158
+ - Format: `**Source 1:** [filename] (Page [number])`
159
+ 4. **IF ANSWER NOT FOUND**: Only if the search results are completely empty or nonsensical string matches, state: "I could not find the answer in the knowledge base."
160
+ 5. **Ticket Creation**: If you truly cannot help, or if the user explicitly asks, create a support ticket using `create_support_ticket`.
161
+ 6. Required details for a ticket: Title (Summary), Description, User Name, User Email.
162
+ 7. Be polite and concise.
163
+ """
164
+
165
+ prompt = ChatPromptTemplate.from_messages([
166
+ ("system", system_prompt),
167
+ MessagesPlaceholder(variable_name="chat_history"),
168
+ ("human", "{input}"),
169
+ MessagesPlaceholder(variable_name="agent_scratchpad"),
170
+ ])
171
+
172
+ agent = create_openai_tools_agent(llm, tools, prompt)
173
+
174
+ agent_executor = AgentExecutor(
175
+ agent=agent,
176
+ tools=tools,
177
+ verbose=True,
178
+ handle_parsing_errors=True
179
+ )
180
+
181
+ return agent_executor
app.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ from langchain_core.messages import AIMessage, HumanMessage
4
+ from agent import create_agent, create_github_issue
5
+ from dotenv import load_dotenv
6
+ from ingest import main as run_ingestion
7
+
8
+ load_dotenv()
9
+
10
+ st.set_page_config(page_title="Customer Support AI", page_icon="🤖")
11
+
12
+ @st.cache_resource
13
+ def automated_ingestion():
14
+ run_ingestion()
15
+
16
+ # Run ingestion automatically on startup (cached)
17
+ with st.spinner("Updating knowledge base..."):
18
+ automated_ingestion()
19
+
20
+ st.title("🤖 TechFlow Support Agent")
21
+
22
+ # Initialize session state for chat history and other flags
23
+ if "chat_history" not in st.session_state:
24
+ st.session_state.chat_history = []
25
+
26
+ if "agent" not in st.session_state:
27
+ st.session_state.agent = create_agent()
28
+
29
+ if "show_ticket_form" not in st.session_state:
30
+ st.session_state.show_ticket_form = False
31
+
32
+ # Display chat messages from history on app rerun
33
+ for message in st.session_state.chat_history:
34
+ if isinstance(message, HumanMessage):
35
+ with st.chat_message("user"):
36
+ st.markdown(message.content)
37
+ elif isinstance(message, AIMessage):
38
+ with st.chat_message("assistant"):
39
+ st.markdown(message.content)
40
+
41
+ # Logic to handle ticket creation form
42
+ def submit_ticket():
43
+ summary = st.session_state.ticket_summary
44
+ desc = st.session_state.ticket_desc
45
+ email = st.session_state.ticket_email
46
+ name = st.session_state.ticket_name
47
+
48
+ if summary and desc and email and name:
49
+ with st.spinner("Creating ticket..."):
50
+ result = create_github_issue(summary, desc, email, name)
51
+ st.success(result)
52
+ st.session_state.show_ticket_form = False
53
+ # Add system message about ticket creation
54
+ st.session_state.chat_history.append(AIMessage(content=f"Ticket created: {summary}"))
55
+ else:
56
+ st.error("Please fill all fields.")
57
+
58
+ # React to user input
59
+ if prompt := st.chat_input("How can I help you today?"):
60
+ # Reset ticket form state on new query
61
+ st.session_state.show_ticket_form = False
62
+
63
+ # Display user message
64
+ st.chat_message("user").markdown(prompt)
65
+ st.session_state.chat_history.append(HumanMessage(content=prompt))
66
+
67
+ # Display assistant response
68
+ with st.chat_message("assistant"):
69
+ with st.spinner("Thinking..."):
70
+ try:
71
+ response = st.session_state.agent.invoke({
72
+ "input": prompt,
73
+ "chat_history": st.session_state.chat_history
74
+ })
75
+
76
+ output_text = response["output"]
77
+ st.markdown(output_text)
78
+ st.session_state.chat_history.append(AIMessage(content=output_text))
79
+
80
+ # Check if we should show ticket button
81
+ if "could not find the answer" in output_text.lower() or "not found" in output_text.lower():
82
+ st.session_state.show_ticket_form = True
83
+ st.rerun()
84
+
85
+ except Exception as e:
86
+ error_msg = f"An error occurred: {str(e)}"
87
+ st.error(error_msg)
88
+ st.session_state.chat_history.append(AIMessage(content=error_msg))
89
+
90
+ # Dedicated section for ticket creation if flag is set
91
+ if st.session_state.show_ticket_form:
92
+ st.divider()
93
+ st.warning("I couldn't find an answer. Would you like to raise a support ticket?")
94
+ with st.form("ticket_form"):
95
+ st.text_input("Name", key="ticket_name")
96
+ st.text_input("Email", key="ticket_email")
97
+ st.text_input("Issue Summary", key="ticket_summary")
98
+ st.text_area("Description", key="ticket_desc")
99
+ st.form_submit_button("Create Ticket", on_click=submit_ticket)
data/Fundamentals of Software Architecture.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5008352574b214d08e4e831288a4e628355557fb73a927f91eda411c2ba1a546
3
+ size 24625023
data/Release It!.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73b8ba2e63176d9a742f7b322c2415ee6f7f593995f770fa65dd5f814e2498dd
3
+ size 5656990
data/Software Architecture The Hard Parts.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e1f286ed91a33c9af6cab35811aec9e6600c197dc6187d8dbefef7bb76c1359
3
+ size 16509658
ingest.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import glob
3
+ from dotenv import load_dotenv
4
+ from langchain_community.document_loaders import PyPDFLoader
5
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
6
+ from langchain_openai import OpenAIEmbeddings
7
+ from langchain_community.vectorstores import FAISS
8
+
9
+ # Load environment variables
10
+ load_dotenv()
11
+
12
+ DATA_PATH = "data"
13
+ DB_PATH = "vector_db"
14
+
15
+ def load_documents():
16
+ documents = []
17
+ pdf_files = glob.glob(os.path.join(DATA_PATH, "*.pdf"))
18
+
19
+ if not pdf_files:
20
+ print(f"No PDF files found in {DATA_PATH}")
21
+ return []
22
+
23
+ print(f"Found {len(pdf_files)} PDF files.")
24
+ for pdf_file in pdf_files:
25
+ print(f"Loading {pdf_file}...")
26
+ try:
27
+ loader = PyPDFLoader(pdf_file)
28
+ docs = loader.load()
29
+ documents.extend(docs)
30
+ except Exception as e:
31
+ print(f"Error loading {pdf_file}: {e}")
32
+
33
+ return documents
34
+
35
+ def split_documents(documents):
36
+ text_splitter = RecursiveCharacterTextSplitter(
37
+ chunk_size=1000,
38
+ chunk_overlap=200,
39
+ length_function=len,
40
+ add_start_index=True,
41
+ )
42
+ chunks = text_splitter.split_documents(documents)
43
+ print(f"Split {len(documents)} documents into {len(chunks)} chunks.")
44
+ return chunks
45
+
46
+ def save_to_faiss(chunks):
47
+ embeddings = OpenAIEmbeddings()
48
+
49
+ print("Creating vector database...")
50
+ db = FAISS.from_documents(chunks, embeddings)
51
+ db.save_local(DB_PATH)
52
+ print(f"Saved {len(chunks)} chunks to {DB_PATH}.")
53
+
54
+ def main():
55
+ documents = load_documents()
56
+ if not documents:
57
+ return
58
+
59
+ chunks = split_documents(documents)
60
+ save_to_faiss(chunks)
61
+
62
+ if __name__ == "__main__":
63
+ main()
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ openai
3
+ langchain
4
+ langchain-community
5
+ langchain-openai
6
+ pypdf
7
+ faiss-cpu
8
+ PyGithub
9
+ python-dotenv
10
+ tiktoken
test_ticket.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dotenv import load_dotenv
3
+ from agent import create_github_issue
4
+
5
+ load_dotenv()
6
+
7
+ def test_ticket_creation():
8
+ print("Testing TicketSystem...")
9
+
10
+ # Check env vars
11
+ if not os.getenv("GITHUB_TOKEN"):
12
+ print("Error: GITHUB_TOKEN not set in .env")
13
+ return
14
+
15
+ summary = "Test Ticket from Script"
16
+ description = "This is a test ticket to verify the TicketSystem class."
17
+ email = "test@example.com"
18
+ name = "Test User"
19
+
20
+ print(f"Creating ticket for project: {os.getenv('PROJECT_FOLDER', 'Default')}")
21
+ result = create_github_issue(summary, description, email, name)
22
+ print(result)
23
+
24
+ if __name__ == "__main__":
25
+ test_ticket_creation()