Spaces:

bsmith3715
/

DEMO_6_3

Sleeping

App Files Files Community

bsmith3715 commited on Jun 3, 2025

Commit

1fbe708

verified ·

1 Parent(s): 2b9c81e

Upload 5 files

Browse files

Files changed (5) hide show

Dockerfile +18 -0
README.md +21 -5
app.py +101 -0
combined_data.json +0 -0
pyproject.toml +42 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,18 @@

+FROM ghcr.io/astral-sh/uv:python3.13-bookworm-slim
+RUN useradd -m -u 1000 user
+USER user
+ENV HOME=/home/user \
+    PATH=/home/user/.local/bin:$PATH \
+    UVICORN_WS_PROTOCOL=websockets
+WORKDIR $HOME/app
+COPY --chown=user . $HOME/app
+RUN uv sync
+EXPOSE 7860
+CMD ["uv", "run", "chainlit", "run", "app.py", "--host", "0.0.0.0", "--port", "7860"]

README.md CHANGED Viewed

@@ -1,10 +1,26 @@
 ---
-title: DEMO 6 3
-emoji: 📉
-colorFrom: green
-colorTo: blue
 sdk: docker
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title : Pilates App Fine_Tuned
+emoji: 📚
+colorFrom: red
+colorTo: red
 sdk: docker
+app_file: app.py
 pinned: false
+license: mit
+short_description: Tool to provide users reformer exercises
+startup_duration_timeout: 1h
 ---
+# Pilates Reformer RAG App Fine_Tuned
+This Chainlit app answers questions using Pilates reformer videos and textbooks. All data is preloaded from `combined_data.json`.
+## Run Locally
+```bash
+uv run chainlit run app.py
+```
+## Or Deploy to Hugging Face Space with Docker
+Just upload this directory and you're done.

app.py ADDED Viewed

	@@ -0,0 +1,101 @@

+import os
+import json
+from langchain_core.documents import Document
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+from langchain_community.vectorstores import FAISS
+from langchain_huggingface import HuggingFaceEmbeddings
+from langchain_openai import ChatOpenAI
+from langchain.chains import RetrievalQA
+import chainlit as cl
+from operator import itemgetter
+from langchain_core.output_parsers import StrOutputParser
+from langchain_core.runnables import RunnablePassthrough, RunnableParallel
+# === Load and prepare data ===
+with open("combined_data.json", "r") as f:
+    raw_data = json.load(f)
+all_docs = [
+    Document(page_content=entry["content"], metadata=entry["metadata"])
+    for entry in raw_data
+]
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+text_splitter = RecursiveCharacterTextSplitter(chunk_size=750, chunk_overlap=100)
+split_documents = text_splitter.split_documents(all_docs)
+len(split_documents)
+embeddings = HuggingFaceEmbeddings(model_name="bsmith3715/legal-ft-demo_final")
+from langchain_qdrant import QdrantVectorStore
+from qdrant_client import QdrantClient
+from qdrant_client.http.models import Distance, VectorParams
+client = QdrantClient(":memory:")
+client.create_collection(
+    collection_name="reformer_docs",
+    vectors_config=VectorParams(size=768, distance=Distance.COSINE),
+)
+vector_store_ft = QdrantVectorStore(
+    client=client,
+    collection_name="reformer_docs",
+    embedding=embeddings,
+)
+# === Use your fine-tuned Hugging Face embeddings ===
+embedding_model = HuggingFaceEmbeddings(
+    model_name="AneetaXavier/reformer-pilates-embed-ft-49fc1835-9968-433d-9c45-1538ea91dcc9"
+)
+# === Set up FAISS vector store ===
+_ = vector_store_ft.add_documents(documents=split_documents)
+retriever_finetune = vector_store_ft.as_retriever(search_kwargs={"k": 5})
+# === Load LLM ===
+llm = ChatOpenAI(model_name="gpt-4.1-mini", temperature=0)
+from langchain.prompts import ChatPromptTemplate
+RAG_PROMPT = """\
+You are a helpful assistant who answers questions based on provided context. You must only use the provided context, and cannot use your own knowledge.
+### Question
+{question}
+### Context
+{context}
+"""
+rag_prompt = ChatPromptTemplate.from_template(RAG_PROMPT)
+finetune_rag_chain = (
+    {"context": itemgetter("question") | retriever_finetune, "question": itemgetter("question")}
+    | RunnablePassthrough.assign(context=itemgetter("context"))
+    | {"response": rag_prompt | llm | StrOutputParser(), "context": itemgetter("context")}
+)
+# === Chainlit start event ===
+@cl.on_chat_start
+async def start():
+    await cl.Message("🤸 Ready! Ask me anything about Reformer Pilates.").send()
+    cl.user_session.set("qa_chain", finetune_rag_chain)
+@cl.on_message
+async def main(message):
+    chain = cl.user_session.get("chain")
+    msg = cl.Message(content="")
+    result = await chain.arun_pipeline(message.content)
+    async for stream_resp in result["response"]:
+        await msg.stream_token(stream_resp)
+    await msg.send()

combined_data.json ADDED Viewed

The diff for this file is too large to render. See raw diff

pyproject.toml ADDED Viewed

	@@ -0,0 +1,42 @@

+[project]
+name = "pilates_fine_tuned"
+version = "0.1.0"
+description = "A fine-tuned pilates project."
+dependencies = [
+    "accelerate>=1.3.0",
+    "arxiv>=2.2.0",
+    "beautifulsoup4>=4.13.3",
+    "chainlit>=2.5.5",
+    "datasets>=3.6.0",
+    "faiss-cpu>=1.11.0",
+    "ipykernel>=6.29.5",
+    "ipywidgets>=8.1.5",
+    "langchain>=0.3.25",
+    "langchain-cohere>=0.4.4",
+    "langchain-community>=0.3.24",
+    "langchain-core>=0.3.34",
+    "langchain-huggingface>=0.1.2",
+    "langchain-openai>=0.3.16",
+    "langchain-qdrant>=0.2.0",
+    "langchain-text-splitters>=0.3.8",
+    "langgraph>=0.4.3",
+    "libmagic>=1.0",
+    "lxml>=5.3.1",
+    "nltk==3.9.1",
+    "pandas>=2.2.3",
+    "pyarrow>=20.0.0",
+    "pymupdf>=1.25.5",
+    "python-pptx==1.0.2",
+    "pytube>=15.0.0",
+    "ragas>=0.2.15",
+    "sentence-transformers>=3.4.1",
+    "torch>=2.7.0",
+    "tqdm>=4.67.1",
+    "transformers[torch]>=4.48.3",
+    "unstructured>=0.17.2",
+    "wandb>=0.19.6",
+    "websockets==11.0.3",
+    "youtube-transcript-api>=1.0.3",
+]