Spaces:

niddijoris
/

RAGCustomerSupportAgent

Sleeping

App Files Files Community

niddijoris commited on Feb 3

Commit

c5fe829

0 Parent(s):

upload files

Browse files

Files changed (12) hide show

.DS_Store +0 -0
.env +4 -0
.env.example +4 -0
.gitattributes +1 -0
agent.py +181 -0
app.py +99 -0
data/Fundamentals of Software Architecture.pdf +3 -0
data/Release It!.pdf +3 -0
data/Software Architecture The Hard Parts.pdf +3 -0
ingest.py +63 -0
requirements.txt +10 -0
test_ticket.py +25 -0

.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

.env ADDED Viewed

	@@ -0,0 +1,4 @@

+OPENAI_API_KEY=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
+GITHUB_TOKEN=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
+REPO_NAME=niddijoris/Generative-AI-2025-masters
+PROJECT_FOLDER=Capstone project 1 - RAG

.env.example ADDED Viewed

	@@ -0,0 +1,4 @@

+OPENAI_API_KEY=sk-...
+GITHUB_TOKEN=ghp_...
+REPO_NAME=username/repo
+PROJECT_FOLDER=Capstone2-1.2Antigravity

.gitattributes ADDED Viewed

	@@ -0,0 +1 @@


1	+ data/*.pdf filter=lfs diff=lfs merge=lfs -text

agent.py ADDED Viewed

	@@ -0,0 +1,181 @@

+import os
+from dotenv import load_dotenv
+from langchain_openai import ChatOpenAI, OpenAIEmbeddings
+from langchain_community.vectorstores import FAISS
+from langchain.agents import AgentExecutor, create_openai_tools_agent
+from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
+from langchain.tools import tool
+from github import Github
+from github import Auth
+load_dotenv()
+# Constants / Config
+COMPANY_NAME = "TechFlow Solutions"
+COMPANY_CONTACT = "support@techflow.com | +1-555-0199"
+DB_PATH = "vector_db"
+def get_vector_store():
+    embeddings = OpenAIEmbeddings()
+    vector_store = FAISS.load_local(
+        DB_PATH,
+        embeddings,
+        allow_dangerous_deserialization=True
+    )
+    return vector_store
+@tool
+def search_knowledge_base(query: str) -> str:
+    """
+    Search the company knowledge base for answers to user questions.
+    Returns relevance of text and citations (source files and page numbers).
+    """
+    try:
+        vector_store = get_vector_store()
+        # Use relevance scores (0 to 1, where 1 is best match)
+        results = vector_store.similarity_search_with_relevance_scores(query, k=5)
+        response = ""
+        relevant_count = 0
+        # Threshold for relevance (0.7 is a reasonable baseline for OpenAI embeddings)
+        THRESHOLD = 0.7
+        print(f"\n--- Search Query: '{query}' ---")
+        for i, (doc, score) in enumerate(results):
+            print(f"Result {i+1}: Score {score:.4f} | Content: {doc.page_content[:50]}...")
+            if score < THRESHOLD:
+                print(f"  -> FILTERED (Below {THRESHOLD})")
+                continue
+            relevant_count += 1
+            print(f"  -> ACCEPTED")
+            if score < THRESHOLD:
+                continue
+            relevant_count += 1
+            source = doc.metadata.get("source", "Unknown")
+            page = doc.metadata.get("page", "Unknown")
+            # Extract just the filename from the path
+            filename = os.path.basename(source)
+            response += f"--- Result {relevant_count} (Score: {score:.2f}) ---\n"
+            response += f"Content: {doc.page_content}\n"
+            response += f"Source: {filename}, Page: {page}\n\n"
+        return response if response else "No relevant information found in the knowledge base (all results below threshold)."
+    except Exception as e:
+        return f"Error searching knowledge base: {str(e)}"
+class TicketSystem:
+    def __init__(self, token, repo_name):
+        auth = Auth.Token(token)
+        self.g = Github(auth=auth)
+        self.repo = self.g.get_repo(repo_name)
+    def create_ticket(self, title, body, project_folder):
+        """
+        project_folder: Project folder name (e.g. 'Capstone2-1.2Antigravity')
+        """
+        # 1. Check or create label
+        label_name = project_folder.lower().replace("/", "-").replace(" ", "-")
+        try:
+            self.repo.get_label(label_name)
+        except:
+            # Create new label (blue)
+            self.repo.create_label(name=label_name, color="0075ca")
+        # 2. Decorate title
+        full_title = f"[{project_folder}] {title}"
+        # 3. Add details to body
+        full_body = f"**Project:** {project_folder}\n\n**Description:**\n{body}"
+        # 4. Create Issue
+        new_issue = self.repo.create_issue(
+            title=full_title,
+            body=full_body,
+            labels=[label_name, "customer-support"]
+        )
+        return new_issue
+def create_github_issue(summary: str, description: str, user_email: str, user_name: str) -> str:
+    token = os.getenv("GITHUB_TOKEN")
+    repo_name = os.getenv("REPO_NAME")
+    project_folder = os.getenv("PROJECT_FOLDER", "Capstone Project")
+    if not token or not repo_name:
+        return "Error: GitHub credentials not configured. Cannot create ticket."
+    try:
+        ticket_system = TicketSystem(token, repo_name)
+        # Combine user details into the body description
+        full_description = f"**User Name:** {user_name}\n**User Email:** {user_email}\n\n{description}"
+        issue = ticket_system.create_ticket(
+            title=summary,
+            body=full_description,
+            project_folder=project_folder
+        )
+        return f"Ticket created successfully! Ticket ID: #{issue.number}. Link: {issue.html_url}"
+    except Exception as e:
+        return f"Error creating ticket: {str(e)}"
+@tool
+def create_support_ticket(summary: str, description: str, user_email: str, user_name: str) -> str:
+    """
+    Create a support ticket (GitHub Issue) for the user.
+    Use this when the knowledge base doesn't have the answer or the user explicitly asks to raise a ticket.
+    Include all details: user name, email, issue summary and full description.
+    """
+    return create_github_issue(summary, description, user_email, user_name)
+def create_agent():
+    llm = ChatOpenAI(model="gpt-4o", temperature=0)
+    tools = [search_knowledge_base, create_support_ticket]
+    system_prompt = f"""You are a helpful and professional customer support agent for {COMPANY_NAME}.
+    Company Contact Info: {COMPANY_CONTACT}
+    Your goal is to assist users with their questions using the available tools.
+    GUIDELINES:
+    1.  **ALWAYS SEARCH**: You MUST use the `search_knowledge_base` tool for **EVERY** user message, even if it looks like a typo, gibberish, or nonsense.
+        - **Reason**: The search tool has internal logic to handle/reject irrelevant queries. You must let it run.
+        - **Do not** simply reply "It seems like a typo" without calling the tool first.
+    2.  **Intent**: If you can infer a valid term (e.g. "solutiun" -> "Solution"), search for the corrected term. If it is total gibberish, search for the gibberish exactly.
+    2.  **Comprehensive Synthesis**: Use the provided search results to answer the user's question.
+        - **Summarize ALL chunks**: You must synthesize information from ALL relevant chunks provided by the search tool.
+        - **Proactive Answering**: If exact matches aren't found, define related concepts (e.g., Software Architecture for Solution Architecture) found in the text.
+        - **NEVER** refuse to answer if there is ANY retrieved text that is even remotely technical or relevant.
+    3.  **MANDATORY CITATIONS**: You MUST list **ALL** source citations found in the search results at the end of your response.
+        - Even if you summarize multiple chunks, list every unique source/page used.
+        - Format: `**Source 1:** [filename] (Page [number])`
+    4.  **IF ANSWER NOT FOUND**: Only if the search results are completely empty or nonsensical string matches, state: "I could not find the answer in the knowledge base."
+    5.  **Ticket Creation**: If you truly cannot help, or if the user explicitly asks, create a support ticket using `create_support_ticket`.
+    6.  Required details for a ticket: Title (Summary), Description, User Name, User Email.
+    7.  Be polite and concise.
+    """
+    prompt = ChatPromptTemplate.from_messages([
+        ("system", system_prompt),
+        MessagesPlaceholder(variable_name="chat_history"),
+        ("human", "{input}"),
+        MessagesPlaceholder(variable_name="agent_scratchpad"),
+    ])
+    agent = create_openai_tools_agent(llm, tools, prompt)
+    agent_executor = AgentExecutor(
+        agent=agent,
+        tools=tools,
+        verbose=True,
+        handle_parsing_errors=True
+    )
+    return agent_executor

app.py ADDED Viewed

	@@ -0,0 +1,99 @@

+import streamlit as st
+import os
+from langchain_core.messages import AIMessage, HumanMessage
+from agent import create_agent, create_github_issue
+from dotenv import load_dotenv
+from ingest import main as run_ingestion
+load_dotenv()
+st.set_page_config(page_title="Customer Support AI", page_icon="🤖")
+@st.cache_resource
+def automated_ingestion():
+    run_ingestion()
+# Run ingestion automatically on startup (cached)
+with st.spinner("Updating knowledge base..."):
+    automated_ingestion()
+st.title("🤖 TechFlow Support Agent")
+# Initialize session state for chat history and other flags
+if "chat_history" not in st.session_state:
+    st.session_state.chat_history = []
+if "agent" not in st.session_state:
+    st.session_state.agent = create_agent()
+if "show_ticket_form" not in st.session_state:
+    st.session_state.show_ticket_form = False
+# Display chat messages from history on app rerun
+for message in st.session_state.chat_history:
+    if isinstance(message, HumanMessage):
+        with st.chat_message("user"):
+            st.markdown(message.content)
+    elif isinstance(message, AIMessage):
+        with st.chat_message("assistant"):
+            st.markdown(message.content)
+# Logic to handle ticket creation form
+def submit_ticket():
+    summary = st.session_state.ticket_summary
+    desc = st.session_state.ticket_desc
+    email = st.session_state.ticket_email
+    name = st.session_state.ticket_name
+    if summary and desc and email and name:
+        with st.spinner("Creating ticket..."):
+            result = create_github_issue(summary, desc, email, name)
+            st.success(result)
+            st.session_state.show_ticket_form = False
+            # Add system message about ticket creation
+            st.session_state.chat_history.append(AIMessage(content=f"Ticket created: {summary}"))
+    else:
+        st.error("Please fill all fields.")
+# React to user input
+if prompt := st.chat_input("How can I help you today?"):
+    # Reset ticket form state on new query
+    st.session_state.show_ticket_form = False
+    # Display user message
+    st.chat_message("user").markdown(prompt)
+    st.session_state.chat_history.append(HumanMessage(content=prompt))
+    # Display assistant response
+    with st.chat_message("assistant"):
+        with st.spinner("Thinking..."):
+            try:
+                response = st.session_state.agent.invoke({
+                    "input": prompt,
+                    "chat_history": st.session_state.chat_history
+                })
+                output_text = response["output"]
+                st.markdown(output_text)
+                st.session_state.chat_history.append(AIMessage(content=output_text))
+                # Check if we should show ticket button
+                if "could not find the answer" in output_text.lower() or "not found" in output_text.lower():
+                    st.session_state.show_ticket_form = True
+                    st.rerun()
+            except Exception as e:
+                error_msg = f"An error occurred: {str(e)}"
+                st.error(error_msg)
+                st.session_state.chat_history.append(AIMessage(content=error_msg))
+# Dedicated section for ticket creation if flag is set
+if st.session_state.show_ticket_form:
+    st.divider()
+    st.warning("I couldn't find an answer. Would you like to raise a support ticket?")
+    with st.form("ticket_form"):
+        st.text_input("Name", key="ticket_name")
+        st.text_input("Email", key="ticket_email")
+        st.text_input("Issue Summary", key="ticket_summary")
+        st.text_area("Description", key="ticket_desc")
+        st.form_submit_button("Create Ticket", on_click=submit_ticket)

data/Fundamentals of Software Architecture.pdf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5008352574b214d08e4e831288a4e628355557fb73a927f91eda411c2ba1a546
+size 24625023

data/Release It!.pdf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:73b8ba2e63176d9a742f7b322c2415ee6f7f593995f770fa65dd5f814e2498dd
+size 5656990

data/Software Architecture The Hard Parts.pdf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5e1f286ed91a33c9af6cab35811aec9e6600c197dc6187d8dbefef7bb76c1359
+size 16509658

ingest.py ADDED Viewed

	@@ -0,0 +1,63 @@

+import os
+import glob
+from dotenv import load_dotenv
+from langchain_community.document_loaders import PyPDFLoader
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+from langchain_openai import OpenAIEmbeddings
+from langchain_community.vectorstores import FAISS
+# Load environment variables
+load_dotenv()
+DATA_PATH = "data"
+DB_PATH = "vector_db"
+def load_documents():
+    documents = []
+    pdf_files = glob.glob(os.path.join(DATA_PATH, "*.pdf"))
+    if not pdf_files:
+        print(f"No PDF files found in {DATA_PATH}")
+        return []
+    print(f"Found {len(pdf_files)} PDF files.")
+    for pdf_file in pdf_files:
+        print(f"Loading {pdf_file}...")
+        try:
+            loader = PyPDFLoader(pdf_file)
+            docs = loader.load()
+            documents.extend(docs)
+        except Exception as e:
+            print(f"Error loading {pdf_file}: {e}")
+    return documents
+def split_documents(documents):
+    text_splitter = RecursiveCharacterTextSplitter(
+        chunk_size=1000,
+        chunk_overlap=200,
+        length_function=len,
+        add_start_index=True,
+    )
+    chunks = text_splitter.split_documents(documents)
+    print(f"Split {len(documents)} documents into {len(chunks)} chunks.")
+    return chunks
+def save_to_faiss(chunks):
+    embeddings = OpenAIEmbeddings()
+    print("Creating vector database...")
+    db = FAISS.from_documents(chunks, embeddings)
+    db.save_local(DB_PATH)
+    print(f"Saved {len(chunks)} chunks to {DB_PATH}.")
+def main():
+    documents = load_documents()
+    if not documents:
+        return
+    chunks = split_documents(documents)
+    save_to_faiss(chunks)
+if __name__ == "__main__":
+    main()

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+streamlit
+openai
+langchain
+langchain-community
+langchain-openai
+pypdf
+faiss-cpu
+PyGithub
+python-dotenv
+tiktoken

test_ticket.py ADDED Viewed

	@@ -0,0 +1,25 @@

+import os
+from dotenv import load_dotenv
+from agent import create_github_issue
+load_dotenv()
+def test_ticket_creation():
+    print("Testing TicketSystem...")
+    # Check env vars
+    if not os.getenv("GITHUB_TOKEN"):
+        print("Error: GITHUB_TOKEN not set in .env")
+        return
+    summary = "Test Ticket from Script"
+    description = "This is a test ticket to verify the TicketSystem class."
+    email = "test@example.com"
+    name = "Test User"
+    print(f"Creating ticket for project: {os.getenv('PROJECT_FOLDER', 'Default')}")
+    result = create_github_issue(summary, description, email, name)
+    print(result)
+if __name__ == "__main__":
+    test_ticket_creation()