File size: 6,299 Bytes
c51c1e9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
801818c
c51c1e9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143fb9d
 
c51c1e9
 
 
 
 
 
 
143fb9d
 
 
 
801818c
9d40e7f
801818c
 
 
 
 
 
a87f741
c51c1e9
 
b90135f
c51c1e9
a87f741
801818c
18333fa
143fb9d
18333fa
143fb9d
7c6a3f9
801818c
 
143fb9d
 
 
 
c51c1e9
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
import gradio as gr
import faiss
import numpy as np
import pandas as pd
from sentence_transformers import SentenceTransformer
from langchain_anthropic import ChatAnthropic
from langchain.retrievers.multi_query import MultiQueryRetriever
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain_community.vectorstores import FAISS
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain_core.documents import Document
from langchain_huggingface import HuggingFaceEmbeddings
from gradio import Markdown
import os
from dotenv import load_dotenv

# Paths
INDEX_PATH = "index/index_file.index"
CSV_PATH = "csv/processed_markdown_data.csv"

# Load FAISS index
index = faiss.read_index(INDEX_PATH)

# Load document store (CSV)
df = pd.read_csv(CSV_PATH)
all_segments = df['Segment'].tolist()

# Ensure the number of segments matches the number of vectors in the index
assert len(all_segments) == index.ntotal, "Mismatch between number of segments and vectors in the index"

# Set up HuggingFace embeddings
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Assuming you have the `documents` prepared like in your notebook
documents = [
    Document(page_content=segment, metadata={"source": f"doc_{i}"})
    for i, segment in enumerate(all_segments)]

docstore = InMemoryDocstore({f"doc_{i}": doc for i, doc in enumerate(documents)})

vector_store = FAISS(
    embedding_function=embeddings,
    index=index,
    docstore=docstore,
    index_to_docstore_id={i: f"doc_{i}" for i in range(index.ntotal)}
)

# Api key
load_dotenv()
api_key = os.getenv("ANTHROPIC_API_KEY")

# Anthropic API setup (Claude 3 Haiku)
llm = ChatAnthropic(
    api_key=api_key, 
    model="claude-3-haiku-20240307", 
    temperature=0.2, 
    max_tokens_to_sample=1024,
    )

# Multi-query retriever
multi_query_retriever = MultiQueryRetriever.from_llm(
    retriever=vector_store.as_retriever(search_type="mmr", search_kwargs={"k": 6}),
    llm=llm
)

# Prompt and retrieval chain setup
system_prompt = """You are an assistant with access to my notes. The notes are about different topics that are interesting to me.
Your task is to provide insights on the content I've saved in the past. You will be comprehensive and informative in your response.
Use the following pieces of retrieved context to answer the question.
If you don't know the answer, say that you don't know.

{context}"""

prompt = ChatPromptTemplate.from_messages([
    ("system", system_prompt),
    ("human", "{input}"),
])

question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(multi_query_retriever, question_answer_chain)

# Gradio interface
info_text = """
by [Mark Redito](https://markredito.com)
This chatbot has access to my browser bookmarks from 2020 to mid-2024. It covers a variety of topics I’m interested in, including Art, Technology, and Culture.

## You can use it in a few ways:
- Extract specific links. For example: "Give me the links about Ethereum"
- Get summaries of bookmarked content. Try: "Summarize 'How to do great work' by Paul Graham"
- Ask general questions on various topics. Like: "What goes into a typical music recording contract?"

## Here's a quick rundown of how it works behind the scenes:
- The system uses RAG (Retrieval-Augmented Generation) with a framework called Langchain. Basically, it helps the chatbot find and use relevant information.
- The bookmarks are stored in a database called FAISS that makes searching super fast.
- The brains of the operation is Claude 3 Haiku, a small and fast AI model by Anthropic.
- When you ask a question, the system comes up with a few more related questions to help find the right links. It then searches the database and passes the best information to Claude to craft your answer.

Keep in mind, if the chatbot can't find good information to answer your question, it'll let you know by saying something like "I don't know" or "I can't find it." And like any AI, it might make mistakes sometimes.
This is mostly a fun project I put together for my own curiosity and enjoyment. While I can't make any promises about its performance, I hope you have fun exploring and maybe discover something interesting! Enjoy!
"""

# The respond function
def respond(message, history, max_tokens, temperature, top_p):
    # Process user message through RAG chain
    response = rag_chain.invoke({"input": message})
    # Extract the answer from the response
    if isinstance(response, dict) and 'answer' in response:
        answer = response['answer']
    else:
        answer = str(response)  # Convert to string if it's not in the expected format
    
    return answer

def chat_response(message, history, max_tokens, temperature, top_p):
    bot_message = respond(message, history, max_tokens, temperature, top_p)
    return bot_message

# Refactored Gradio Interface
with gr.Blocks(fill_height=True) as demo:
    # Main header
    gr.Markdown("# Welcome to My Bookmarks Chatbot")
    
    # Collapsible info section
    with gr.Accordion("Click to view info about the chatbot", open=False):
        gr.Markdown(info_text)  # Load the info text into the collapsible section

    with gr.Accordion("Advanced Options", open=False):
        max_tokens_slider = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens")
        temperature_slider = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
        top_p_slider = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)")

    # Chatbot interface with additional options
    gr.ChatInterface(
        fn=chat_response,
        additional_inputs=[max_tokens_slider, temperature_slider, top_p_slider],
        examples=[
            ["How to do great work by Paul Graham?", 512, 0.7, 0.95],
            ["Give me the links about Ethereum from the notes", 512, 0.7, 0.95],
            ["What goes into a typical music recording contract?", 512, 0.7, 0.95]
        ],
        retry_btn="Retry",
        undo_btn="Undo",
        clear_btn="Clear",
    )

if __name__ == "__main__":
    demo.launch()