Spaces:

VcRlAgent
/

langApp2

Sleeping

App Files Files Community

VcRlAgent commited on Sep 25, 2025

Commit

91cdd71

1 Parent(s): e893fb4

all updates

Browse files

Files changed (6) hide show

README.md +11 -14
app.py +109 -0
data/notes.txt +8 -0
requirements.txt +17 -3
src/streamlit_app copy.py +40 -0
src/streamlit_app.py +102 -34

README.md CHANGED Viewed

@@ -1,20 +1,17 @@
 ---
-title: LangApp2
-emoji: 🚀
-colorFrom: red
-colorTo: red
-sdk: docker
-app_port: 8501
-tags:
-- streamlit
 pinned: false
-short_description: Language App using LLMs
-license: unknown
 ---
-# Welcome to Streamlit!
-Edit `/src/streamlit_app.py` to customize this app to your heart's desire. :heart:
-If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
-forums](https://discuss.streamlit.io).

 ---
+title: Tiny LLM Starter – LangChain + LlamaIndex
+emoji: 🧪
+colorFrom: purple
+colorTo: indigo
+sdk: streamlit
+sdk_version: 1.36.0
+app_file: app.py
 pinned: false
+license: mit
 ---
+Two minimal demos that run on **free CPU**:
+1) **LangChain Chat** using a local tiny HF model
+2) **LlamaIndex mini-RAG** over a tiny text file

app.py ADDED Viewed

	@@ -0,0 +1,109 @@

+import os
+import streamlit as st
+# LangChain (local HF pipeline)
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
+from langchain_huggingface import HuggingFacePipeline
+from langchain.prompts import PromptTemplate
+from langchain.schema import StrOutputParser
+# LlamaIndex (modular imports)
+from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, Settings
+from llama_index.embeddings.huggingface import HuggingFaceEmbedding
+from llama_index.llms.huggingface import HuggingFaceLLM
+st.set_page_config(page_title="Tiny LLM Starter", page_icon="🧪", layout="centered")
+st.title("🧪 Tiny LLM Starter – LangChain + LlamaIndex")
+# ---- Sidebar config ----
+st.sidebar.header("Model Settings")
+MODEL_ID = st.sidebar.text_input("HF model id (seq2seq)", value="google/flan-t5-small")
+MAX_NEW_TOKENS = st.sidebar.slider("max_new_tokens", 32, 512, 256, 32)
+TEMP = st.sidebar.slider("temperature", 0.0, 1.0, 0.2, 0.1)
+st.sidebar.markdown(
+    """
+**Tips**
+- Uses local CPU (no key required)
+- Small model → lower memory, faster cold start
+- You can later add an `HF_TOKEN` secret for hosted inference
+"""
+)
+# ---- Cache helpers to avoid reloading on every interaction ----
+@st.cache_resource(show_spinner=True)
+def load_langchain_pipeline(model_id: str, max_new_tokens: int):
+    tok = AutoTokenizer.from_pretrained(model_id)
+    mdl = AutoModelForSeq2SeqLM.from_pretrained(model_id)
+    gen = pipeline(
+        task="text2text-generation",
+        model=mdl,
+        tokenizer=tok,
+        max_new_tokens=max_new_tokens,
+    )
+    return HuggingFacePipeline(pipeline=gen)
+@st.cache_resource(show_spinner=True)
+def load_llamaindex_stack(model_id: str, max_new_tokens: int, temperature: float):
+    # Tiny, fast sentence-transformers model for embeddings
+    embed = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2")
+    # Wrap the same tiny HF model for LlamaIndex
+    llm = HuggingFaceLLM(
+        model_name=model_id,
+        tokenizer_name=model_id,
+        context_window=2048,
+        generate_kwargs={"max_new_tokens": max_new_tokens, "temperature": temperature},
+        device_map="cpu",
+    )
+    Settings.embed_model = embed
+    Settings.llm = llm
+    # Load small docs (data/notes.txt)
+    docs = SimpleDirectoryReader(input_dirs=["data"]).load_data()
+    index = VectorStoreIndex.from_documents(docs)
+    query_engine = index.as_query_engine(similarity_top_k=3)
+    return query_engine
+tab1, tab2 = st.tabs(["🟣 LangChain Chat", "🟡 LlamaIndex mini-RAG"])
+# -------- Tab 1: LangChain Chat --------
+with tab1:
+    st.subheader("LangChain (local HF pipeline)")
+    lc_llm = load_langchain_pipeline(MODEL_ID, MAX_NEW_TOKENS)
+    user_q = st.text_input("Ask anything:", value="What is this app?")
+    if st.button("Generate (LangChain)", type="primary"):
+        prompt = PromptTemplate.from_template(
+            "You are a concise, helpful assistant.\n\nQuestion: {q}\nAnswer:"
+        )
+        chain = prompt | lc_llm | StrOutputParser()
+        with st.spinner("Thinking..."):
+            out = chain.invoke({"q": user_q})
+        st.write(out)
+# -------- Tab 2: LlamaIndex mini-RAG --------
+with tab2:
+    st.subheader("LlamaIndex over a tiny text file")
+    st.caption("Uploads are optional; otherwise it uses ./data/notes.txt")
+    uploaded = st.file_uploader("Upload a .txt file to index (optional)", type=["txt"])
+    # If user uploads a file, write it into ./data and rebuild the index
+    if uploaded is not None:
+        os.makedirs("data", exist_ok=True)
+        with open(os.path.join("data", "user.txt"), "wb") as f:
+            f.write(uploaded.read())
+    qe = load_llamaindex_stack(MODEL_ID, MAX_NEW_TOKENS, TEMP)
+    rag_q = st.text_input("Ask about the indexed text:", value="What does the notes file say?")
+    if st.button("Search + Answer (LlamaIndex)"):
+        with st.spinner("Searching + generating..."):
+            ans = qe.query(rag_q)
+        st.write(ans.response)
+        with st.expander("Show retrieved nodes"):
+            for n in ans.source_nodes:
+                st.markdown(f"**Score:** {n.score:.3f}")
+                st.code(n.node.get_content()[:500])

data/notes.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+Welcome to your first LlamaIndex demo!
+This file is deliberately small. Ask things like:
+- What does this demo do?
+- Which libraries does it use?
+- How do I switch models?
+Answer should mention Streamlit, LangChain, and LlamaIndex.

requirements.txt CHANGED Viewed

@@ -1,3 +1,17 @@
-altair
-pandas
-streamlit

+streamlit>=1.36
+transformers>=4.42
+torch>=2.2
+huggingface_hub>=0.23
+# LangChain (modular imports)
+langchain>=0.2.8
+langchain-community>=0.2.8
+langchain-huggingface>=0.0.3
+# LlamaIndex (modular packages)
+llama-index>=0.10.35
+llama-index-llms-huggingface>=0.2.1
+llama-index-embeddings-huggingface>=0.2.0
+# Small, fast embeddings
+sentence-transformers>=2.6.1

src/streamlit_app copy.py ADDED Viewed

	@@ -0,0 +1,40 @@

+import altair as alt
+import numpy as np
+import pandas as pd
+import streamlit as st
+"""
+# Welcome to Streamlit!
+Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
+If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
+forums](https://discuss.streamlit.io).
+In the meantime, below is an example of what you can do with just a few lines of code:
+"""
+num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
+num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
+indices = np.linspace(0, 1, num_points)
+theta = 2 * np.pi * num_turns * indices
+radius = indices
+x = radius * np.cos(theta)
+y = radius * np.sin(theta)
+df = pd.DataFrame({
+    "x": x,
+    "y": y,
+    "idx": indices,
+    "rand": np.random.randn(num_points),
+})
+st.altair_chart(alt.Chart(df, height=700, width=700)
+    .mark_point(filled=True)
+    .encode(
+        x=alt.X("x", axis=None),
+        y=alt.Y("y", axis=None),
+        color=alt.Color("idx", legend=None, scale=alt.Scale()),
+        size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
+    ))

src/streamlit_app.py CHANGED Viewed

@@ -1,40 +1,108 @@
-import altair as alt
-import numpy as np
-import pandas as pd
 import streamlit as st
-"""
-# Welcome to Streamlit!
-Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
-If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
-forums](https://discuss.streamlit.io).
-In the meantime, below is an example of what you can do with just a few lines of code:
 """
-num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
-num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
-indices = np.linspace(0, 1, num_points)
-theta = 2 * np.pi * num_turns * indices
-radius = indices
-x = radius * np.cos(theta)
-y = radius * np.sin(theta)
-df = pd.DataFrame({
-    "x": x,
-    "y": y,
-    "idx": indices,
-    "rand": np.random.randn(num_points),
-})
-st.altair_chart(alt.Chart(df, height=700, width=700)
-    .mark_point(filled=True)
-    .encode(
-        x=alt.X("x", axis=None),
-        y=alt.Y("y", axis=None),
-        color=alt.Color("idx", legend=None, scale=alt.Scale()),
-        size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
-    ))

+import os
 import streamlit as st
+# LangChain (local HF pipeline)
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
+from langchain_huggingface import HuggingFacePipeline
+from langchain.prompts import PromptTemplate
+from langchain.schema import StrOutputParser
+# LlamaIndex (modular imports)
+from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, Settings
+from llama_index.embeddings.huggingface import HuggingFaceEmbedding
+from llama_index.llms.huggingface import HuggingFaceLLM
+st.set_page_config(page_title="Tiny LLM Starter", page_icon="🧪", layout="centered")
+st.title("🧪 Tiny LLM Starter – LangChain + LlamaIndex")
+# ---- Sidebar config ----
+st.sidebar.header("Model Settings")
+MODEL_ID = st.sidebar.text_input("HF model id (seq2seq)", value="google/flan-t5-small")
+MAX_NEW_TOKENS = st.sidebar.slider("max_new_tokens", 32, 512, 256, 32)
+TEMP = st.sidebar.slider("temperature", 0.0, 1.0, 0.2, 0.1)
+st.sidebar.markdown(
+    """
+**Tips**
+- Uses local CPU (no key required)
+- Small model → lower memory, faster cold start
+- You can later add an `HF_TOKEN` secret for hosted inference
 """
+)
+# ---- Cache helpers to avoid reloading on every interaction ----
+@st.cache_resource(show_spinner=True)
+def load_langchain_pipeline(model_id: str, max_new_tokens: int):
+    tok = AutoTokenizer.from_pretrained(model_id)
+    mdl = AutoModelForSeq2SeqLM.from_pretrained(model_id)
+    gen = pipeline(
+        task="text2text-generation",
+        model=mdl,
+        tokenizer=tok,
+        max_new_tokens=max_new_tokens,
+    )
+    return HuggingFacePipeline(pipeline=gen)
+@st.cache_resource(show_spinner=True)
+def load_llamaindex_stack(model_id: str, max_new_tokens: int, temperature: float):
+    # Tiny, fast sentence-transformers model for embeddings
+    embed = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2")
+    # Wrap the same tiny HF model for LlamaIndex
+    llm = HuggingFaceLLM(
+        model_name=model_id,
+        tokenizer_name=model_id,
+        context_window=2048,
+        generate_kwargs={"max_new_tokens": max_new_tokens, "temperature": temperature},
+        device_map="cpu",
+    )
+    Settings.embed_model = embed
+    Settings.llm = llm
+    # Load small docs (data/notes.txt)
+    docs = SimpleDirectoryReader(input_dirs=["data"]).load_data()
+    index = VectorStoreIndex.from_documents(docs)
+    query_engine = index.as_query_engine(similarity_top_k=3)
+    return query_engine
+tab1, tab2 = st.tabs(["🟣 LangChain Chat", "🟡 LlamaIndex mini-RAG"])
+# -------- Tab 1: LangChain Chat --------
+with tab1:
+    st.subheader("LangChain (local HF pipeline)")
+    lc_llm = load_langchain_pipeline(MODEL_ID, MAX_NEW_TOKENS)
+    user_q = st.text_input("Ask anything:", value="What is this app?")
+    if st.button("Generate (LangChain)", type="primary"):
+        prompt = PromptTemplate.from_template(
+            "You are a concise, helpful assistant.\n\nQuestion: {q}\nAnswer:"
+        )
+        chain = prompt | lc_llm | StrOutputParser()
+        with st.spinner("Thinking..."):
+            out = chain.invoke({"q": user_q})
+        st.write(out)
+# -------- Tab 2: LlamaIndex mini-RAG --------
+with tab2:
+    st.subheader("LlamaIndex over a tiny text file")
+    st.caption("Uploads are optional; otherwise it uses ./data/notes.txt")
+    uploaded = st.file_uploader("Upload a .txt file to index (optional)", type=["txt"])
+    # If user uploads a file, write it into ./data and rebuild the index
+    if uploaded is not None:
+        os.makedirs("data", exist_ok=True)
+        with open(os.path.join("data", "user.txt"), "wb") as f:
+            f.write(uploaded.read())
+    qe = load_llamaindex_stack(MODEL_ID, MAX_NEW_TOKENS, TEMP)
+    rag_q = st.text_input("Ask about the indexed text:", value="What does the notes file say?")
+    if st.button("Search + Answer (LlamaIndex)"):
+        with st.spinner("Searching + generating..."):
+            ans = qe.query(rag_q)
+        st.write(ans.response)
+        with st.expander("Show retrieved nodes"):
+            for n in ans.source_nodes:
+                st.markdown(f"**Score:** {n.score:.3f}")
+                st.code(n.node.get_content()[:500])