Spaces:

mafzaal
/

open-source-endpoints

Runtime error

App Files Files Community

mafzaal commited on May 25, 2025

Commit

ac0eae8

1 Parent(s): 52daacf

Implement initial project structure and setup

Browse files

Files changed (8) hide show

.env.sample +5 -0
.gitignore +6 -0
Dockerfile +29 -0
app.py +183 -0
chainlit.md +94 -0
data/paul_graham_essays.txt +0 -0
pyproject.toml +22 -0
uv.lock +0 -0

.env.sample ADDED Viewed

	@@ -0,0 +1,5 @@

+# !!! DO NOT UPDATE THIS FILE DIRECTLY. MAKE A COPY AND RENAME IT `.env` TO PROCEED !!! #
+HF_LLM_ENDPOINT="YOUR_LLM_ENDPOINT_URL_HERE"
+HF_EMBED_ENDPOINT="YOUR_EMBED_MODEL_ENDPOINT_URL_HERE"
+HF_TOKEN="YOUR_HF_TOKEN_HERE"
+# !!! DO NOT UPDATE THIS FILE DIRECTLY. MAKE A COPY AND RENAME IT `.env` TO PROCEED !!! #

.gitignore ADDED Viewed

	@@ -0,0 +1,6 @@

+.env
+__pycache__/
+.chainlit
+*.faiss
+*.pkl
+.files

Dockerfile ADDED Viewed

	@@ -0,0 +1,29 @@

+# Get a distribution that has uv already installed
+FROM ghcr.io/astral-sh/uv:python3.13-bookworm-slim
+# Add user - this is the user that will run the app
+# If you do not set user, the app will run as root (undesirable)
+RUN useradd -m -u 1000 user
+USER user
+# Set the home directory and path
+ENV HOME=/home/user \
+    PATH=/home/user/.local/bin:$PATH
+ENV UVICORN_WS_PROTOCOL=websockets
+# Set the working directory
+WORKDIR $HOME/app
+# Copy the app to the container
+COPY --chown=user . $HOME/app
+# Install the dependencies
+# RUN uv sync --frozen
+RUN uv sync
+# Expose the port
+EXPOSE 7860
+# Run the app
+CMD ["uv", "run", "chainlit", "run", "app.py", "--host", "0.0.0.0", "--port", "7860"]

app.py ADDED Viewed

	@@ -0,0 +1,183 @@

+import os
+import chainlit as cl
+from dotenv import load_dotenv
+from operator import itemgetter
+from langchain_huggingface import HuggingFaceEndpoint
+from langchain_community.document_loaders import TextLoader
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+from langchain_community.vectorstores import FAISS
+from langchain_huggingface import HuggingFaceEndpointEmbeddings
+from langchain_core.prompts import PromptTemplate
+from langchain.schema.output_parser import StrOutputParser
+from langchain.schema.runnable import RunnablePassthrough
+from langchain.schema.runnable.config import RunnableConfig
+from tqdm.asyncio import tqdm_asyncio
+import asyncio
+from tqdm.asyncio import tqdm
+# GLOBAL SCOPE - ENTIRE APPLICATION HAS ACCESS TO VALUES SET IN THIS SCOPE #
+# ---- ENV VARIABLES ---- #
+"""
+This function will load our environment file (.env) if it is present.
+NOTE: Make sure that .env is in your .gitignore file - it is by default, but please ensure it remains there.
+"""
+load_dotenv()
+"""
+We will load our environment variables here.
+"""
+HF_LLM_ENDPOINT = os.environ["HF_LLM_ENDPOINT"]
+HF_EMBED_ENDPOINT = os.environ["HF_EMBED_ENDPOINT"]
+HF_TOKEN = os.environ["HF_TOKEN"]
+# ---- GLOBAL DECLARATIONS ---- #
+# -- RETRIEVAL -- #
+"""
+1. Load Documents from Text File
+2. Split Documents into Chunks
+3. Load HuggingFace Embeddings (remember to use the URL we set above)
+4. Index Files if they do not exist, otherwise load the vectorstore
+"""
+document_loader = TextLoader("./data/paul_graham_essays.txt")
+documents = document_loader.load()
+text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=30)
+split_documents = text_splitter.split_documents(documents)
+hf_embeddings = HuggingFaceEndpointEmbeddings(
+    model=HF_EMBED_ENDPOINT,
+    task="feature-extraction",
+    huggingfacehub_api_token=HF_TOKEN,
+)
+async def add_documents_async(vectorstore, documents):
+    await vectorstore.aadd_documents(documents)
+async def process_batch(vectorstore, batch, is_first_batch, pbar):
+    if is_first_batch:
+        result = await FAISS.afrom_documents(batch, hf_embeddings)
+    else:
+        await add_documents_async(vectorstore, batch)
+        result = vectorstore
+    pbar.update(len(batch))
+    return result
+# Rename async def main() to async def build_retriever() to avoid name conflict
+async def build_retriever():
+    print("Indexing Files")
+    vectorstore = None
+    batch_size = 32
+    batches = [split_documents[i:i+batch_size] for i in range(0, len(split_documents), batch_size)]
+    async def process_all_batches():
+        nonlocal vectorstore
+        tasks = []
+        pbars = []
+        for i, batch in enumerate(batches):
+            pbar = tqdm(total=len(batch), desc=f"Batch {i+1}/{len(batches)}", position=i)
+            pbars.append(pbar)
+            if i == 0:
+                vectorstore = await process_batch(None, batch, True, pbar)
+            else:
+                tasks.append(process_batch(vectorstore, batch, False, pbar))
+        if tasks:
+            await asyncio.gather(*tasks)
+        for pbar in pbars:
+            pbar.close()
+    await process_all_batches()
+    if vectorstore is None:
+        raise RuntimeError("Vectorstore was not created.")
+    hf_retriever = vectorstore.as_retriever()
+    print("\nIndexing complete. Vectorstore is ready for use.")
+    return hf_retriever
+# Update run() to use build_retriever
+async def run():
+    retriever = await build_retriever()
+    return retriever
+hf_retriever = asyncio.run(run())
+# -- AUGMENTED -- #
+"""
+1. Define a String Template
+2. Create a Prompt Template from the String Template
+"""
+### 1. DEFINE STRING TEMPLATE
+RAG_PROMPT_TEMPLATE = """
+<|start_header_id|>system<|end_header_id|>
+You are a helpful assistant. You answer user questions based on provided context. If you can't answer the question with the provided context, say you don't know.<|eot_id|>
+<|start_header_id|>user<|end_header_id|>
+User Query:
+{query}
+Context:
+{context}<|eot_id|>
+<|start_header_id|>assistant<|end_header_id|>
+"""
+### 2. CREATE PROMPT TEMPLATE
+rag_prompt = PromptTemplate.from_template(RAG_PROMPT_TEMPLATE)
+# -- GENERATION -- #
+"""
+1. Create a HuggingFaceEndpoint for the LLM
+"""
+### 1. CREATE HUGGINGFACE ENDPOINT FOR LLM
+hf_llm = HuggingFaceEndpoint(
+    endpoint_url=HF_LLM_ENDPOINT,
+    max_new_tokens=512,
+    top_k=10,
+    top_p=0.95,
+    temperature=0.3,
+    repetition_penalty=1.15,
+    huggingfacehub_api_token=HF_TOKEN,
+)
+@cl.author_rename
+def rename(original_author: str):
+    """
+    This function can be used to rename the 'author' of a message.
+    In this case, we're overriding the 'Assistant' author to be 'Paul Graham Essay Bot'.
+    """
+    rename_dict = {
+        "Assistant" : "Paul Graham Essay Bot"
+    }
+    return rename_dict.get(original_author, original_author)
+@cl.on_chat_start
+async def start_chat():
+    """
+    This function will be called at the start of every user session.
+    We will build our LCEL RAG chain here, and store it in the user session.
+    The user session is a dictionary that is unique to each user session, and is stored in the memory of the server.
+    """
+    lcel_rag_chain = (
+        {"context": itemgetter("query") | hf_retriever, "query": itemgetter("query")}
+        | rag_prompt | hf_llm
+    )
+    cl.user_session.set("lcel_rag_chain", lcel_rag_chain)
+@cl.on_message
+async def main(message: cl.Message):
+    """
+    This function will be called every time a message is recieved from a session.
+    We will use the LCEL RAG chain to generate a response to the user query.
+    The LCEL RAG chain is stored in the user session, and is unique to each user session - this is why we can access it here.
+    """
+    lcel_rag_chain = cl.user_session.get("lcel_rag_chain")
+    msg = cl.Message(content="")
+    for chunk in await cl.make_async(lcel_rag_chain.stream)(
+        {"query": message.content},
+        config=RunnableConfig(callbacks=[cl.LangchainCallbackHandler()]),
+    ):
+        await msg.stream_token(chunk)
+    await msg.send()

chainlit.md ADDED Viewed

	@@ -0,0 +1,94 @@

+# Paul Graham Essays RAG Application
+This application is a Retrieval-Augmented Generation (RAG) system that allows users to ask questions about Paul Graham's essays. The system uses semantic search to find relevant passages from the essays and generates responses based on the retrieved context.
+## Features
+- Semantic search using HuggingFace embeddings
+- Context-aware response generation
+- Chunk-based document processing
+- Interactive chat interface
+- Support for multiple questions in a single session
+## How It Works
+1. The application processes Paul Graham's essays by splitting them into chunks of 1000 characters with 30 character overlap
+2. When a user asks a question, the system:
+   - Converts the question into an embedding
+   - Finds the most relevant passages from the essays
+   - Combines the question and context
+   - Generates a response using a HuggingFace LLM
+## Example Questions
+Here are some example questions you can ask the system:
+1. "What are Paul Graham's views on startup funding and when should founders raise money?"
+2. "How does Paul Graham define a good startup idea and what are the key characteristics he looks for?"
+3. "What does Paul Graham say about the relationship between programming languages and productivity?"
+4. "What are Paul Graham's thoughts on the importance of focus and how it relates to startup success?"
+5. "How does Paul Graham describe the process of finding product-market fit?"
+6. "What are Paul Graham's views on the role of luck in startup success?"
+7. "How does Paul Graham define a 'good' programmer and what qualities does he emphasize?"
+8. "What does Paul Graham say about the importance of user feedback in the early stages of a startup?"
+9. "How does Paul Graham describe the relationship between founders and investors?"
+10. "What are Paul Graham's thoughts on the role of competition in the startup ecosystem?"
+## Technical Details
+### Embedding Model
+- Uses [BAAI/bge-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5)
+- A powerful English language embedding model optimized for semantic search
+- Provides high-quality vector representations for text chunks
+- Enables efficient similarity search across the essay corpus
+### Language Model
+- Uses [NousResearch/Meta-Llama-3.1-8B-Instruct](https://huggingface.co/NousResearch/Meta-Llama-3.1-8B-Instruct)
+- An 8B parameter instruction-tuned model
+- Optimized for dialogue and instruction following
+- Capable of generating detailed, context-aware responses
+### System Architecture
+- Document chunks are stored in a vector database for efficient retrieval
+- The RAG system uses a custom prompt template that combines the user's question with retrieved context
+- Responses are generated using the HuggingFace LLM endpoint
+- The system maintains conversation history for context-aware interactions
+## Getting Started
+1. Make sure you have the required dependencies installed
+2. Place Paul Graham's essays in the `data/paul_graham_essays.txt` file
+3. Run the application using `chainlit run app.py`
+4. Open your browser to the provided local URL
+5. Start asking questions about Paul Graham's essays
+## Best Practices
+- Ask specific questions to get more focused answers
+- Use natural language - the system understands conversational queries
+- You can ask follow-up questions to dive deeper into topics
+- The system works best with questions that have clear answers in the essays
+## Limitations
+- The system can only answer questions based on the content in Paul Graham's essays
+- Very specific or technical questions might not have enough context in the essays
+- The quality of answers depends on the relevance of the retrieved passages
+- The system might not always provide complete answers for complex topics
+## Contributing
+Feel free to contribute to this project by:
+- Adding more example questions
+- Improving the prompt template
+- Enhancing the document processing pipeline
+- Adding new features to the chat interface

data/paul_graham_essays.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

pyproject.toml ADDED Viewed

	@@ -0,0 +1,22 @@

+[project]
+name = "15-app"
+version = "0.1.0"
+description = "Session 15 - Open Source Endpoints"
+readme = "README.md"
+requires-python = ">=3.09"
+dependencies = [
+    "asyncio===3.4.3",
+    "chainlit==2.2.1",
+    "huggingface-hub==0.27.0",
+    "langchain-huggingface==0.1.2",
+    "langchain==0.3.19",
+    "langchain-community==0.3.18",
+    "langsmith==0.3.11",
+    "python-dotenv==1.0.1",
+    "tqdm==4.67.1",
+    "langchain-openai==0.3.7",
+    "langchain-text-splitters==0.3.6",
+    "jupyter>=1.1.1",
+    "faiss-cpu>=1.10.0",
+    "websockets>=15.0",
+]

uv.lock ADDED Viewed

The diff for this file is too large to render. See raw diff