bsmith3715 commited on
Commit
1fbe708
·
verified ·
1 Parent(s): 2b9c81e

Upload 5 files

Browse files
Files changed (5) hide show
  1. Dockerfile +18 -0
  2. README.md +21 -5
  3. app.py +101 -0
  4. combined_data.json +0 -0
  5. pyproject.toml +42 -0
Dockerfile ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM ghcr.io/astral-sh/uv:python3.13-bookworm-slim
2
+
3
+ RUN useradd -m -u 1000 user
4
+ USER user
5
+
6
+ ENV HOME=/home/user \
7
+ PATH=/home/user/.local/bin:$PATH \
8
+ UVICORN_WS_PROTOCOL=websockets
9
+
10
+ WORKDIR $HOME/app
11
+
12
+ COPY --chown=user . $HOME/app
13
+
14
+ RUN uv sync
15
+
16
+ EXPOSE 7860
17
+
18
+ CMD ["uv", "run", "chainlit", "run", "app.py", "--host", "0.0.0.0", "--port", "7860"]
README.md CHANGED
@@ -1,10 +1,26 @@
1
  ---
2
- title: DEMO 6 3
3
- emoji: 📉
4
- colorFrom: green
5
- colorTo: blue
6
  sdk: docker
 
7
  pinned: false
 
 
 
8
  ---
9
 
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title : Pilates App Fine_Tuned
3
+ emoji: 📚
4
+ colorFrom: red
5
+ colorTo: red
6
  sdk: docker
7
+ app_file: app.py
8
  pinned: false
9
+ license: mit
10
+ short_description: Tool to provide users reformer exercises
11
+ startup_duration_timeout: 1h
12
  ---
13
 
14
+ # Pilates Reformer RAG App Fine_Tuned
15
+
16
+ This Chainlit app answers questions using Pilates reformer videos and textbooks. All data is preloaded from `combined_data.json`.
17
+
18
+ ## Run Locally
19
+
20
+ ```bash
21
+ uv run chainlit run app.py
22
+ ```
23
+
24
+ ## Or Deploy to Hugging Face Space with Docker
25
+ Just upload this directory and you're done.
26
+
app.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ from langchain_core.documents import Document
4
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
5
+ from langchain_community.vectorstores import FAISS
6
+ from langchain_huggingface import HuggingFaceEmbeddings
7
+ from langchain_openai import ChatOpenAI
8
+ from langchain.chains import RetrievalQA
9
+ import chainlit as cl
10
+
11
+ from operator import itemgetter
12
+ from langchain_core.output_parsers import StrOutputParser
13
+ from langchain_core.runnables import RunnablePassthrough, RunnableParallel
14
+
15
+ # === Load and prepare data ===
16
+ with open("combined_data.json", "r") as f:
17
+ raw_data = json.load(f)
18
+
19
+ all_docs = [
20
+ Document(page_content=entry["content"], metadata=entry["metadata"])
21
+ for entry in raw_data
22
+ ]
23
+
24
+
25
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
26
+
27
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=750, chunk_overlap=100)
28
+ split_documents = text_splitter.split_documents(all_docs)
29
+ len(split_documents)
30
+
31
+
32
+ embeddings = HuggingFaceEmbeddings(model_name="bsmith3715/legal-ft-demo_final")
33
+
34
+ from langchain_qdrant import QdrantVectorStore
35
+ from qdrant_client import QdrantClient
36
+ from qdrant_client.http.models import Distance, VectorParams
37
+
38
+ client = QdrantClient(":memory:")
39
+
40
+ client.create_collection(
41
+ collection_name="reformer_docs",
42
+ vectors_config=VectorParams(size=768, distance=Distance.COSINE),
43
+ )
44
+
45
+ vector_store_ft = QdrantVectorStore(
46
+ client=client,
47
+ collection_name="reformer_docs",
48
+ embedding=embeddings,
49
+ )
50
+
51
+ # === Use your fine-tuned Hugging Face embeddings ===
52
+ embedding_model = HuggingFaceEmbeddings(
53
+ model_name="AneetaXavier/reformer-pilates-embed-ft-49fc1835-9968-433d-9c45-1538ea91dcc9"
54
+ )
55
+
56
+ # === Set up FAISS vector store ===
57
+ _ = vector_store_ft.add_documents(documents=split_documents)
58
+
59
+ retriever_finetune = vector_store_ft.as_retriever(search_kwargs={"k": 5})
60
+
61
+ # === Load LLM ===
62
+ llm = ChatOpenAI(model_name="gpt-4.1-mini", temperature=0)
63
+
64
+ from langchain.prompts import ChatPromptTemplate
65
+
66
+ RAG_PROMPT = """\
67
+ You are a helpful assistant who answers questions based on provided context. You must only use the provided context, and cannot use your own knowledge.
68
+
69
+ ### Question
70
+ {question}
71
+
72
+ ### Context
73
+ {context}
74
+ """
75
+
76
+ rag_prompt = ChatPromptTemplate.from_template(RAG_PROMPT)
77
+
78
+ finetune_rag_chain = (
79
+ {"context": itemgetter("question") | retriever_finetune, "question": itemgetter("question")}
80
+ | RunnablePassthrough.assign(context=itemgetter("context"))
81
+ | {"response": rag_prompt | llm | StrOutputParser(), "context": itemgetter("context")}
82
+ )
83
+
84
+ # === Chainlit start event ===
85
+ @cl.on_chat_start
86
+ async def start():
87
+ await cl.Message("🤸 Ready! Ask me anything about Reformer Pilates.").send()
88
+ cl.user_session.set("qa_chain", finetune_rag_chain)
89
+
90
+
91
+ @cl.on_message
92
+ async def main(message):
93
+ chain = cl.user_session.get("chain")
94
+
95
+ msg = cl.Message(content="")
96
+ result = await chain.arun_pipeline(message.content)
97
+
98
+ async for stream_resp in result["response"]:
99
+ await msg.stream_token(stream_resp)
100
+
101
+ await msg.send()
combined_data.json ADDED
The diff for this file is too large to render. See raw diff
 
pyproject.toml ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "pilates_fine_tuned"
3
+ version = "0.1.0"
4
+ description = "A fine-tuned pilates project."
5
+ dependencies = [
6
+ "accelerate>=1.3.0",
7
+ "arxiv>=2.2.0",
8
+ "beautifulsoup4>=4.13.3",
9
+ "chainlit>=2.5.5",
10
+ "datasets>=3.6.0",
11
+ "faiss-cpu>=1.11.0",
12
+ "ipykernel>=6.29.5",
13
+ "ipywidgets>=8.1.5",
14
+ "langchain>=0.3.25",
15
+ "langchain-cohere>=0.4.4",
16
+ "langchain-community>=0.3.24",
17
+ "langchain-core>=0.3.34",
18
+ "langchain-huggingface>=0.1.2",
19
+ "langchain-openai>=0.3.16",
20
+ "langchain-qdrant>=0.2.0",
21
+ "langchain-text-splitters>=0.3.8",
22
+ "langgraph>=0.4.3",
23
+ "libmagic>=1.0",
24
+ "lxml>=5.3.1",
25
+ "nltk==3.9.1",
26
+ "pandas>=2.2.3",
27
+ "pyarrow>=20.0.0",
28
+ "pymupdf>=1.25.5",
29
+ "python-pptx==1.0.2",
30
+ "pytube>=15.0.0",
31
+ "ragas>=0.2.15",
32
+ "sentence-transformers>=3.4.1",
33
+ "torch>=2.7.0",
34
+ "tqdm>=4.67.1",
35
+ "transformers[torch]>=4.48.3",
36
+ "unstructured>=0.17.2",
37
+ "wandb>=0.19.6",
38
+ "websockets==11.0.3",
39
+ "youtube-transcript-api>=1.0.3",
40
+ ]
41
+
42
+