mafzaal commited on
Commit
ac0eae8
·
1 Parent(s): 52daacf

Implement initial project structure and setup

Browse files
Files changed (8) hide show
  1. .env.sample +5 -0
  2. .gitignore +6 -0
  3. Dockerfile +29 -0
  4. app.py +183 -0
  5. chainlit.md +94 -0
  6. data/paul_graham_essays.txt +0 -0
  7. pyproject.toml +22 -0
  8. uv.lock +0 -0
.env.sample ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ # !!! DO NOT UPDATE THIS FILE DIRECTLY. MAKE A COPY AND RENAME IT `.env` TO PROCEED !!! #
2
+ HF_LLM_ENDPOINT="YOUR_LLM_ENDPOINT_URL_HERE"
3
+ HF_EMBED_ENDPOINT="YOUR_EMBED_MODEL_ENDPOINT_URL_HERE"
4
+ HF_TOKEN="YOUR_HF_TOKEN_HERE"
5
+ # !!! DO NOT UPDATE THIS FILE DIRECTLY. MAKE A COPY AND RENAME IT `.env` TO PROCEED !!! #
.gitignore ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ .env
2
+ __pycache__/
3
+ .chainlit
4
+ *.faiss
5
+ *.pkl
6
+ .files
Dockerfile ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Get a distribution that has uv already installed
2
+ FROM ghcr.io/astral-sh/uv:python3.13-bookworm-slim
3
+
4
+ # Add user - this is the user that will run the app
5
+ # If you do not set user, the app will run as root (undesirable)
6
+ RUN useradd -m -u 1000 user
7
+ USER user
8
+
9
+ # Set the home directory and path
10
+ ENV HOME=/home/user \
11
+ PATH=/home/user/.local/bin:$PATH
12
+
13
+ ENV UVICORN_WS_PROTOCOL=websockets
14
+
15
+ # Set the working directory
16
+ WORKDIR $HOME/app
17
+
18
+ # Copy the app to the container
19
+ COPY --chown=user . $HOME/app
20
+
21
+ # Install the dependencies
22
+ # RUN uv sync --frozen
23
+ RUN uv sync
24
+
25
+ # Expose the port
26
+ EXPOSE 7860
27
+
28
+ # Run the app
29
+ CMD ["uv", "run", "chainlit", "run", "app.py", "--host", "0.0.0.0", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,183 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import chainlit as cl
3
+ from dotenv import load_dotenv
4
+ from operator import itemgetter
5
+ from langchain_huggingface import HuggingFaceEndpoint
6
+ from langchain_community.document_loaders import TextLoader
7
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
8
+ from langchain_community.vectorstores import FAISS
9
+ from langchain_huggingface import HuggingFaceEndpointEmbeddings
10
+ from langchain_core.prompts import PromptTemplate
11
+ from langchain.schema.output_parser import StrOutputParser
12
+ from langchain.schema.runnable import RunnablePassthrough
13
+ from langchain.schema.runnable.config import RunnableConfig
14
+ from tqdm.asyncio import tqdm_asyncio
15
+ import asyncio
16
+ from tqdm.asyncio import tqdm
17
+
18
+ # GLOBAL SCOPE - ENTIRE APPLICATION HAS ACCESS TO VALUES SET IN THIS SCOPE #
19
+ # ---- ENV VARIABLES ---- #
20
+ """
21
+ This function will load our environment file (.env) if it is present.
22
+
23
+ NOTE: Make sure that .env is in your .gitignore file - it is by default, but please ensure it remains there.
24
+ """
25
+ load_dotenv()
26
+
27
+ """
28
+ We will load our environment variables here.
29
+ """
30
+ HF_LLM_ENDPOINT = os.environ["HF_LLM_ENDPOINT"]
31
+ HF_EMBED_ENDPOINT = os.environ["HF_EMBED_ENDPOINT"]
32
+ HF_TOKEN = os.environ["HF_TOKEN"]
33
+
34
+ # ---- GLOBAL DECLARATIONS ---- #
35
+
36
+ # -- RETRIEVAL -- #
37
+ """
38
+ 1. Load Documents from Text File
39
+ 2. Split Documents into Chunks
40
+ 3. Load HuggingFace Embeddings (remember to use the URL we set above)
41
+ 4. Index Files if they do not exist, otherwise load the vectorstore
42
+ """
43
+ document_loader = TextLoader("./data/paul_graham_essays.txt")
44
+ documents = document_loader.load()
45
+
46
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=30)
47
+ split_documents = text_splitter.split_documents(documents)
48
+
49
+ hf_embeddings = HuggingFaceEndpointEmbeddings(
50
+ model=HF_EMBED_ENDPOINT,
51
+ task="feature-extraction",
52
+ huggingfacehub_api_token=HF_TOKEN,
53
+ )
54
+
55
+ async def add_documents_async(vectorstore, documents):
56
+ await vectorstore.aadd_documents(documents)
57
+
58
+ async def process_batch(vectorstore, batch, is_first_batch, pbar):
59
+ if is_first_batch:
60
+ result = await FAISS.afrom_documents(batch, hf_embeddings)
61
+ else:
62
+ await add_documents_async(vectorstore, batch)
63
+ result = vectorstore
64
+ pbar.update(len(batch))
65
+ return result
66
+
67
+ # Rename async def main() to async def build_retriever() to avoid name conflict
68
+ async def build_retriever():
69
+ print("Indexing Files")
70
+ vectorstore = None
71
+ batch_size = 32
72
+ batches = [split_documents[i:i+batch_size] for i in range(0, len(split_documents), batch_size)]
73
+ async def process_all_batches():
74
+ nonlocal vectorstore
75
+ tasks = []
76
+ pbars = []
77
+ for i, batch in enumerate(batches):
78
+ pbar = tqdm(total=len(batch), desc=f"Batch {i+1}/{len(batches)}", position=i)
79
+ pbars.append(pbar)
80
+ if i == 0:
81
+ vectorstore = await process_batch(None, batch, True, pbar)
82
+ else:
83
+ tasks.append(process_batch(vectorstore, batch, False, pbar))
84
+ if tasks:
85
+ await asyncio.gather(*tasks)
86
+ for pbar in pbars:
87
+ pbar.close()
88
+ await process_all_batches()
89
+ if vectorstore is None:
90
+ raise RuntimeError("Vectorstore was not created.")
91
+ hf_retriever = vectorstore.as_retriever()
92
+ print("\nIndexing complete. Vectorstore is ready for use.")
93
+ return hf_retriever
94
+
95
+ # Update run() to use build_retriever
96
+ async def run():
97
+ retriever = await build_retriever()
98
+ return retriever
99
+
100
+ hf_retriever = asyncio.run(run())
101
+
102
+ # -- AUGMENTED -- #
103
+ """
104
+ 1. Define a String Template
105
+ 2. Create a Prompt Template from the String Template
106
+ """
107
+ ### 1. DEFINE STRING TEMPLATE
108
+ RAG_PROMPT_TEMPLATE = """
109
+ <|start_header_id|>system<|end_header_id|>
110
+ You are a helpful assistant. You answer user questions based on provided context. If you can't answer the question with the provided context, say you don't know.<|eot_id|>
111
+
112
+ <|start_header_id|>user<|end_header_id|>
113
+ User Query:
114
+ {query}
115
+
116
+ Context:
117
+ {context}<|eot_id|>
118
+
119
+ <|start_header_id|>assistant<|end_header_id|>
120
+ """
121
+
122
+ ### 2. CREATE PROMPT TEMPLATE
123
+ rag_prompt = PromptTemplate.from_template(RAG_PROMPT_TEMPLATE)
124
+
125
+ # -- GENERATION -- #
126
+ """
127
+ 1. Create a HuggingFaceEndpoint for the LLM
128
+ """
129
+ ### 1. CREATE HUGGINGFACE ENDPOINT FOR LLM
130
+ hf_llm = HuggingFaceEndpoint(
131
+ endpoint_url=HF_LLM_ENDPOINT,
132
+ max_new_tokens=512,
133
+ top_k=10,
134
+ top_p=0.95,
135
+ temperature=0.3,
136
+ repetition_penalty=1.15,
137
+ huggingfacehub_api_token=HF_TOKEN,
138
+ )
139
+
140
+ @cl.author_rename
141
+ def rename(original_author: str):
142
+ """
143
+ This function can be used to rename the 'author' of a message.
144
+
145
+ In this case, we're overriding the 'Assistant' author to be 'Paul Graham Essay Bot'.
146
+ """
147
+ rename_dict = {
148
+ "Assistant" : "Paul Graham Essay Bot"
149
+ }
150
+ return rename_dict.get(original_author, original_author)
151
+
152
+ @cl.on_chat_start
153
+ async def start_chat():
154
+ """
155
+ This function will be called at the start of every user session.
156
+
157
+ We will build our LCEL RAG chain here, and store it in the user session.
158
+
159
+ The user session is a dictionary that is unique to each user session, and is stored in the memory of the server.
160
+ """
161
+ lcel_rag_chain = (
162
+ {"context": itemgetter("query") | hf_retriever, "query": itemgetter("query")}
163
+ | rag_prompt | hf_llm
164
+ )
165
+ cl.user_session.set("lcel_rag_chain", lcel_rag_chain)
166
+
167
+ @cl.on_message
168
+ async def main(message: cl.Message):
169
+ """
170
+ This function will be called every time a message is recieved from a session.
171
+
172
+ We will use the LCEL RAG chain to generate a response to the user query.
173
+
174
+ The LCEL RAG chain is stored in the user session, and is unique to each user session - this is why we can access it here.
175
+ """
176
+ lcel_rag_chain = cl.user_session.get("lcel_rag_chain")
177
+ msg = cl.Message(content="")
178
+ for chunk in await cl.make_async(lcel_rag_chain.stream)(
179
+ {"query": message.content},
180
+ config=RunnableConfig(callbacks=[cl.LangchainCallbackHandler()]),
181
+ ):
182
+ await msg.stream_token(chunk)
183
+ await msg.send()
chainlit.md ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Paul Graham Essays RAG Application
2
+
3
+ This application is a Retrieval-Augmented Generation (RAG) system that allows users to ask questions about Paul Graham's essays. The system uses semantic search to find relevant passages from the essays and generates responses based on the retrieved context.
4
+
5
+ ## Features
6
+
7
+ - Semantic search using HuggingFace embeddings
8
+ - Context-aware response generation
9
+ - Chunk-based document processing
10
+ - Interactive chat interface
11
+ - Support for multiple questions in a single session
12
+
13
+ ## How It Works
14
+
15
+ 1. The application processes Paul Graham's essays by splitting them into chunks of 1000 characters with 30 character overlap
16
+ 2. When a user asks a question, the system:
17
+ - Converts the question into an embedding
18
+ - Finds the most relevant passages from the essays
19
+ - Combines the question and context
20
+ - Generates a response using a HuggingFace LLM
21
+
22
+ ## Example Questions
23
+
24
+ Here are some example questions you can ask the system:
25
+
26
+ 1. "What are Paul Graham's views on startup funding and when should founders raise money?"
27
+
28
+ 2. "How does Paul Graham define a good startup idea and what are the key characteristics he looks for?"
29
+
30
+ 3. "What does Paul Graham say about the relationship between programming languages and productivity?"
31
+
32
+ 4. "What are Paul Graham's thoughts on the importance of focus and how it relates to startup success?"
33
+
34
+ 5. "How does Paul Graham describe the process of finding product-market fit?"
35
+
36
+ 6. "What are Paul Graham's views on the role of luck in startup success?"
37
+
38
+ 7. "How does Paul Graham define a 'good' programmer and what qualities does he emphasize?"
39
+
40
+ 8. "What does Paul Graham say about the importance of user feedback in the early stages of a startup?"
41
+
42
+ 9. "How does Paul Graham describe the relationship between founders and investors?"
43
+
44
+ 10. "What are Paul Graham's thoughts on the role of competition in the startup ecosystem?"
45
+
46
+ ## Technical Details
47
+
48
+ ### Embedding Model
49
+ - Uses [BAAI/bge-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5)
50
+ - A powerful English language embedding model optimized for semantic search
51
+ - Provides high-quality vector representations for text chunks
52
+ - Enables efficient similarity search across the essay corpus
53
+
54
+ ### Language Model
55
+ - Uses [NousResearch/Meta-Llama-3.1-8B-Instruct](https://huggingface.co/NousResearch/Meta-Llama-3.1-8B-Instruct)
56
+ - An 8B parameter instruction-tuned model
57
+ - Optimized for dialogue and instruction following
58
+ - Capable of generating detailed, context-aware responses
59
+
60
+ ### System Architecture
61
+ - Document chunks are stored in a vector database for efficient retrieval
62
+ - The RAG system uses a custom prompt template that combines the user's question with retrieved context
63
+ - Responses are generated using the HuggingFace LLM endpoint
64
+ - The system maintains conversation history for context-aware interactions
65
+
66
+ ## Getting Started
67
+
68
+ 1. Make sure you have the required dependencies installed
69
+ 2. Place Paul Graham's essays in the `data/paul_graham_essays.txt` file
70
+ 3. Run the application using `chainlit run app.py`
71
+ 4. Open your browser to the provided local URL
72
+ 5. Start asking questions about Paul Graham's essays
73
+
74
+ ## Best Practices
75
+
76
+ - Ask specific questions to get more focused answers
77
+ - Use natural language - the system understands conversational queries
78
+ - You can ask follow-up questions to dive deeper into topics
79
+ - The system works best with questions that have clear answers in the essays
80
+
81
+ ## Limitations
82
+
83
+ - The system can only answer questions based on the content in Paul Graham's essays
84
+ - Very specific or technical questions might not have enough context in the essays
85
+ - The quality of answers depends on the relevance of the retrieved passages
86
+ - The system might not always provide complete answers for complex topics
87
+
88
+ ## Contributing
89
+
90
+ Feel free to contribute to this project by:
91
+ - Adding more example questions
92
+ - Improving the prompt template
93
+ - Enhancing the document processing pipeline
94
+ - Adding new features to the chat interface
data/paul_graham_essays.txt ADDED
The diff for this file is too large to render. See raw diff
 
pyproject.toml ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "15-app"
3
+ version = "0.1.0"
4
+ description = "Session 15 - Open Source Endpoints"
5
+ readme = "README.md"
6
+ requires-python = ">=3.09"
7
+ dependencies = [
8
+ "asyncio===3.4.3",
9
+ "chainlit==2.2.1",
10
+ "huggingface-hub==0.27.0",
11
+ "langchain-huggingface==0.1.2",
12
+ "langchain==0.3.19",
13
+ "langchain-community==0.3.18",
14
+ "langsmith==0.3.11",
15
+ "python-dotenv==1.0.1",
16
+ "tqdm==4.67.1",
17
+ "langchain-openai==0.3.7",
18
+ "langchain-text-splitters==0.3.6",
19
+ "jupyter>=1.1.1",
20
+ "faiss-cpu>=1.10.0",
21
+ "websockets>=15.0",
22
+ ]
uv.lock ADDED
The diff for this file is too large to render. See raw diff