Spaces:
Sleeping
Sleeping
Upload 5 files
Browse files- Dockerfile +18 -0
- README.md +21 -5
- app.py +101 -0
- combined_data.json +0 -0
- pyproject.toml +42 -0
Dockerfile
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM ghcr.io/astral-sh/uv:python3.13-bookworm-slim
|
| 2 |
+
|
| 3 |
+
RUN useradd -m -u 1000 user
|
| 4 |
+
USER user
|
| 5 |
+
|
| 6 |
+
ENV HOME=/home/user \
|
| 7 |
+
PATH=/home/user/.local/bin:$PATH \
|
| 8 |
+
UVICORN_WS_PROTOCOL=websockets
|
| 9 |
+
|
| 10 |
+
WORKDIR $HOME/app
|
| 11 |
+
|
| 12 |
+
COPY --chown=user . $HOME/app
|
| 13 |
+
|
| 14 |
+
RUN uv sync
|
| 15 |
+
|
| 16 |
+
EXPOSE 7860
|
| 17 |
+
|
| 18 |
+
CMD ["uv", "run", "chainlit", "run", "app.py", "--host", "0.0.0.0", "--port", "7860"]
|
README.md
CHANGED
|
@@ -1,10 +1,26 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
sdk: docker
|
|
|
|
| 7 |
pinned: false
|
|
|
|
|
|
|
|
|
|
| 8 |
---
|
| 9 |
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title : Pilates App Fine_Tuned
|
| 3 |
+
emoji: 📚
|
| 4 |
+
colorFrom: red
|
| 5 |
+
colorTo: red
|
| 6 |
sdk: docker
|
| 7 |
+
app_file: app.py
|
| 8 |
pinned: false
|
| 9 |
+
license: mit
|
| 10 |
+
short_description: Tool to provide users reformer exercises
|
| 11 |
+
startup_duration_timeout: 1h
|
| 12 |
---
|
| 13 |
|
| 14 |
+
# Pilates Reformer RAG App Fine_Tuned
|
| 15 |
+
|
| 16 |
+
This Chainlit app answers questions using Pilates reformer videos and textbooks. All data is preloaded from `combined_data.json`.
|
| 17 |
+
|
| 18 |
+
## Run Locally
|
| 19 |
+
|
| 20 |
+
```bash
|
| 21 |
+
uv run chainlit run app.py
|
| 22 |
+
```
|
| 23 |
+
|
| 24 |
+
## Or Deploy to Hugging Face Space with Docker
|
| 25 |
+
Just upload this directory and you're done.
|
| 26 |
+
|
app.py
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import json
|
| 3 |
+
from langchain_core.documents import Document
|
| 4 |
+
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
| 5 |
+
from langchain_community.vectorstores import FAISS
|
| 6 |
+
from langchain_huggingface import HuggingFaceEmbeddings
|
| 7 |
+
from langchain_openai import ChatOpenAI
|
| 8 |
+
from langchain.chains import RetrievalQA
|
| 9 |
+
import chainlit as cl
|
| 10 |
+
|
| 11 |
+
from operator import itemgetter
|
| 12 |
+
from langchain_core.output_parsers import StrOutputParser
|
| 13 |
+
from langchain_core.runnables import RunnablePassthrough, RunnableParallel
|
| 14 |
+
|
| 15 |
+
# === Load and prepare data ===
|
| 16 |
+
with open("combined_data.json", "r") as f:
|
| 17 |
+
raw_data = json.load(f)
|
| 18 |
+
|
| 19 |
+
all_docs = [
|
| 20 |
+
Document(page_content=entry["content"], metadata=entry["metadata"])
|
| 21 |
+
for entry in raw_data
|
| 22 |
+
]
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
| 26 |
+
|
| 27 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=750, chunk_overlap=100)
|
| 28 |
+
split_documents = text_splitter.split_documents(all_docs)
|
| 29 |
+
len(split_documents)
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
embeddings = HuggingFaceEmbeddings(model_name="bsmith3715/legal-ft-demo_final")
|
| 33 |
+
|
| 34 |
+
from langchain_qdrant import QdrantVectorStore
|
| 35 |
+
from qdrant_client import QdrantClient
|
| 36 |
+
from qdrant_client.http.models import Distance, VectorParams
|
| 37 |
+
|
| 38 |
+
client = QdrantClient(":memory:")
|
| 39 |
+
|
| 40 |
+
client.create_collection(
|
| 41 |
+
collection_name="reformer_docs",
|
| 42 |
+
vectors_config=VectorParams(size=768, distance=Distance.COSINE),
|
| 43 |
+
)
|
| 44 |
+
|
| 45 |
+
vector_store_ft = QdrantVectorStore(
|
| 46 |
+
client=client,
|
| 47 |
+
collection_name="reformer_docs",
|
| 48 |
+
embedding=embeddings,
|
| 49 |
+
)
|
| 50 |
+
|
| 51 |
+
# === Use your fine-tuned Hugging Face embeddings ===
|
| 52 |
+
embedding_model = HuggingFaceEmbeddings(
|
| 53 |
+
model_name="AneetaXavier/reformer-pilates-embed-ft-49fc1835-9968-433d-9c45-1538ea91dcc9"
|
| 54 |
+
)
|
| 55 |
+
|
| 56 |
+
# === Set up FAISS vector store ===
|
| 57 |
+
_ = vector_store_ft.add_documents(documents=split_documents)
|
| 58 |
+
|
| 59 |
+
retriever_finetune = vector_store_ft.as_retriever(search_kwargs={"k": 5})
|
| 60 |
+
|
| 61 |
+
# === Load LLM ===
|
| 62 |
+
llm = ChatOpenAI(model_name="gpt-4.1-mini", temperature=0)
|
| 63 |
+
|
| 64 |
+
from langchain.prompts import ChatPromptTemplate
|
| 65 |
+
|
| 66 |
+
RAG_PROMPT = """\
|
| 67 |
+
You are a helpful assistant who answers questions based on provided context. You must only use the provided context, and cannot use your own knowledge.
|
| 68 |
+
|
| 69 |
+
### Question
|
| 70 |
+
{question}
|
| 71 |
+
|
| 72 |
+
### Context
|
| 73 |
+
{context}
|
| 74 |
+
"""
|
| 75 |
+
|
| 76 |
+
rag_prompt = ChatPromptTemplate.from_template(RAG_PROMPT)
|
| 77 |
+
|
| 78 |
+
finetune_rag_chain = (
|
| 79 |
+
{"context": itemgetter("question") | retriever_finetune, "question": itemgetter("question")}
|
| 80 |
+
| RunnablePassthrough.assign(context=itemgetter("context"))
|
| 81 |
+
| {"response": rag_prompt | llm | StrOutputParser(), "context": itemgetter("context")}
|
| 82 |
+
)
|
| 83 |
+
|
| 84 |
+
# === Chainlit start event ===
|
| 85 |
+
@cl.on_chat_start
|
| 86 |
+
async def start():
|
| 87 |
+
await cl.Message("🤸 Ready! Ask me anything about Reformer Pilates.").send()
|
| 88 |
+
cl.user_session.set("qa_chain", finetune_rag_chain)
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
@cl.on_message
|
| 92 |
+
async def main(message):
|
| 93 |
+
chain = cl.user_session.get("chain")
|
| 94 |
+
|
| 95 |
+
msg = cl.Message(content="")
|
| 96 |
+
result = await chain.arun_pipeline(message.content)
|
| 97 |
+
|
| 98 |
+
async for stream_resp in result["response"]:
|
| 99 |
+
await msg.stream_token(stream_resp)
|
| 100 |
+
|
| 101 |
+
await msg.send()
|
combined_data.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
pyproject.toml
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[project]
|
| 2 |
+
name = "pilates_fine_tuned"
|
| 3 |
+
version = "0.1.0"
|
| 4 |
+
description = "A fine-tuned pilates project."
|
| 5 |
+
dependencies = [
|
| 6 |
+
"accelerate>=1.3.0",
|
| 7 |
+
"arxiv>=2.2.0",
|
| 8 |
+
"beautifulsoup4>=4.13.3",
|
| 9 |
+
"chainlit>=2.5.5",
|
| 10 |
+
"datasets>=3.6.0",
|
| 11 |
+
"faiss-cpu>=1.11.0",
|
| 12 |
+
"ipykernel>=6.29.5",
|
| 13 |
+
"ipywidgets>=8.1.5",
|
| 14 |
+
"langchain>=0.3.25",
|
| 15 |
+
"langchain-cohere>=0.4.4",
|
| 16 |
+
"langchain-community>=0.3.24",
|
| 17 |
+
"langchain-core>=0.3.34",
|
| 18 |
+
"langchain-huggingface>=0.1.2",
|
| 19 |
+
"langchain-openai>=0.3.16",
|
| 20 |
+
"langchain-qdrant>=0.2.0",
|
| 21 |
+
"langchain-text-splitters>=0.3.8",
|
| 22 |
+
"langgraph>=0.4.3",
|
| 23 |
+
"libmagic>=1.0",
|
| 24 |
+
"lxml>=5.3.1",
|
| 25 |
+
"nltk==3.9.1",
|
| 26 |
+
"pandas>=2.2.3",
|
| 27 |
+
"pyarrow>=20.0.0",
|
| 28 |
+
"pymupdf>=1.25.5",
|
| 29 |
+
"python-pptx==1.0.2",
|
| 30 |
+
"pytube>=15.0.0",
|
| 31 |
+
"ragas>=0.2.15",
|
| 32 |
+
"sentence-transformers>=3.4.1",
|
| 33 |
+
"torch>=2.7.0",
|
| 34 |
+
"tqdm>=4.67.1",
|
| 35 |
+
"transformers[torch]>=4.48.3",
|
| 36 |
+
"unstructured>=0.17.2",
|
| 37 |
+
"wandb>=0.19.6",
|
| 38 |
+
"websockets==11.0.3",
|
| 39 |
+
"youtube-transcript-api>=1.0.3",
|
| 40 |
+
]
|
| 41 |
+
|
| 42 |
+
|