VcRlAgent commited on
Commit
91cdd71
·
1 Parent(s): e893fb4

all updates

Browse files
Files changed (6) hide show
  1. README.md +11 -14
  2. app.py +109 -0
  3. data/notes.txt +8 -0
  4. requirements.txt +17 -3
  5. src/streamlit_app copy.py +40 -0
  6. src/streamlit_app.py +102 -34
README.md CHANGED
@@ -1,20 +1,17 @@
1
  ---
2
- title: LangApp2
3
- emoji: 🚀
4
- colorFrom: red
5
- colorTo: red
6
- sdk: docker
7
- app_port: 8501
8
- tags:
9
- - streamlit
10
  pinned: false
11
- short_description: Language App using LLMs
12
- license: unknown
13
  ---
14
 
15
- # Welcome to Streamlit!
16
 
17
- Edit `/src/streamlit_app.py` to customize this app to your heart's desire. :heart:
 
18
 
19
- If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
20
- forums](https://discuss.streamlit.io).
 
1
  ---
2
+ title: Tiny LLM Starter – LangChain + LlamaIndex
3
+ emoji: 🧪
4
+ colorFrom: purple
5
+ colorTo: indigo
6
+ sdk: streamlit
7
+ sdk_version: 1.36.0
8
+ app_file: app.py
 
9
  pinned: false
10
+ license: mit
 
11
  ---
12
 
13
+ Two minimal demos that run on **free CPU**:
14
 
15
+ 1) **LangChain Chat** using a local tiny HF model
16
+ 2) **LlamaIndex mini-RAG** over a tiny text file
17
 
 
 
app.py ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import streamlit as st
3
+
4
+ # LangChain (local HF pipeline)
5
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
6
+ from langchain_huggingface import HuggingFacePipeline
7
+ from langchain.prompts import PromptTemplate
8
+ from langchain.schema import StrOutputParser
9
+
10
+ # LlamaIndex (modular imports)
11
+ from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, Settings
12
+ from llama_index.embeddings.huggingface import HuggingFaceEmbedding
13
+ from llama_index.llms.huggingface import HuggingFaceLLM
14
+
15
+ st.set_page_config(page_title="Tiny LLM Starter", page_icon="🧪", layout="centered")
16
+ st.title("🧪 Tiny LLM Starter – LangChain + LlamaIndex")
17
+
18
+ # ---- Sidebar config ----
19
+ st.sidebar.header("Model Settings")
20
+ MODEL_ID = st.sidebar.text_input("HF model id (seq2seq)", value="google/flan-t5-small")
21
+ MAX_NEW_TOKENS = st.sidebar.slider("max_new_tokens", 32, 512, 256, 32)
22
+ TEMP = st.sidebar.slider("temperature", 0.0, 1.0, 0.2, 0.1)
23
+
24
+ st.sidebar.markdown(
25
+ """
26
+ **Tips**
27
+ - Uses local CPU (no key required)
28
+ - Small model → lower memory, faster cold start
29
+ - You can later add an `HF_TOKEN` secret for hosted inference
30
+ """
31
+ )
32
+
33
+ # ---- Cache helpers to avoid reloading on every interaction ----
34
+ @st.cache_resource(show_spinner=True)
35
+ def load_langchain_pipeline(model_id: str, max_new_tokens: int):
36
+ tok = AutoTokenizer.from_pretrained(model_id)
37
+ mdl = AutoModelForSeq2SeqLM.from_pretrained(model_id)
38
+ gen = pipeline(
39
+ task="text2text-generation",
40
+ model=mdl,
41
+ tokenizer=tok,
42
+ max_new_tokens=max_new_tokens,
43
+ )
44
+ return HuggingFacePipeline(pipeline=gen)
45
+
46
+ @st.cache_resource(show_spinner=True)
47
+ def load_llamaindex_stack(model_id: str, max_new_tokens: int, temperature: float):
48
+ # Tiny, fast sentence-transformers model for embeddings
49
+ embed = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2")
50
+
51
+ # Wrap the same tiny HF model for LlamaIndex
52
+ llm = HuggingFaceLLM(
53
+ model_name=model_id,
54
+ tokenizer_name=model_id,
55
+ context_window=2048,
56
+ generate_kwargs={"max_new_tokens": max_new_tokens, "temperature": temperature},
57
+ device_map="cpu",
58
+ )
59
+
60
+ Settings.embed_model = embed
61
+ Settings.llm = llm
62
+
63
+ # Load small docs (data/notes.txt)
64
+ docs = SimpleDirectoryReader(input_dirs=["data"]).load_data()
65
+ index = VectorStoreIndex.from_documents(docs)
66
+ query_engine = index.as_query_engine(similarity_top_k=3)
67
+ return query_engine
68
+
69
+ tab1, tab2 = st.tabs(["🟣 LangChain Chat", "🟡 LlamaIndex mini-RAG"])
70
+
71
+ # -------- Tab 1: LangChain Chat --------
72
+ with tab1:
73
+ st.subheader("LangChain (local HF pipeline)")
74
+ lc_llm = load_langchain_pipeline(MODEL_ID, MAX_NEW_TOKENS)
75
+
76
+ user_q = st.text_input("Ask anything:", value="What is this app?")
77
+ if st.button("Generate (LangChain)", type="primary"):
78
+ prompt = PromptTemplate.from_template(
79
+ "You are a concise, helpful assistant.\n\nQuestion: {q}\nAnswer:"
80
+ )
81
+ chain = prompt | lc_llm | StrOutputParser()
82
+ with st.spinner("Thinking..."):
83
+ out = chain.invoke({"q": user_q})
84
+ st.write(out)
85
+
86
+ # -------- Tab 2: LlamaIndex mini-RAG --------
87
+ with tab2:
88
+ st.subheader("LlamaIndex over a tiny text file")
89
+ st.caption("Uploads are optional; otherwise it uses ./data/notes.txt")
90
+ uploaded = st.file_uploader("Upload a .txt file to index (optional)", type=["txt"])
91
+
92
+ # If user uploads a file, write it into ./data and rebuild the index
93
+ if uploaded is not None:
94
+ os.makedirs("data", exist_ok=True)
95
+ with open(os.path.join("data", "user.txt"), "wb") as f:
96
+ f.write(uploaded.read())
97
+
98
+ qe = load_llamaindex_stack(MODEL_ID, MAX_NEW_TOKENS, TEMP)
99
+
100
+ rag_q = st.text_input("Ask about the indexed text:", value="What does the notes file say?")
101
+ if st.button("Search + Answer (LlamaIndex)"):
102
+ with st.spinner("Searching + generating..."):
103
+ ans = qe.query(rag_q)
104
+ st.write(ans.response)
105
+ with st.expander("Show retrieved nodes"):
106
+ for n in ans.source_nodes:
107
+ st.markdown(f"**Score:** {n.score:.3f}")
108
+ st.code(n.node.get_content()[:500])
109
+
data/notes.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ Welcome to your first LlamaIndex demo!
2
+ This file is deliberately small. Ask things like:
3
+ - What does this demo do?
4
+ - Which libraries does it use?
5
+ - How do I switch models?
6
+
7
+ Answer should mention Streamlit, LangChain, and LlamaIndex.
8
+
requirements.txt CHANGED
@@ -1,3 +1,17 @@
1
- altair
2
- pandas
3
- streamlit
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ streamlit>=1.36
2
+ transformers>=4.42
3
+ torch>=2.2
4
+ huggingface_hub>=0.23
5
+
6
+ # LangChain (modular imports)
7
+ langchain>=0.2.8
8
+ langchain-community>=0.2.8
9
+ langchain-huggingface>=0.0.3
10
+
11
+ # LlamaIndex (modular packages)
12
+ llama-index>=0.10.35
13
+ llama-index-llms-huggingface>=0.2.1
14
+ llama-index-embeddings-huggingface>=0.2.0
15
+
16
+ # Small, fast embeddings
17
+ sentence-transformers>=2.6.1
src/streamlit_app copy.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import altair as alt
2
+ import numpy as np
3
+ import pandas as pd
4
+ import streamlit as st
5
+
6
+ """
7
+ # Welcome to Streamlit!
8
+
9
+ Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
10
+ If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
11
+ forums](https://discuss.streamlit.io).
12
+
13
+ In the meantime, below is an example of what you can do with just a few lines of code:
14
+ """
15
+
16
+ num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
17
+ num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
18
+
19
+ indices = np.linspace(0, 1, num_points)
20
+ theta = 2 * np.pi * num_turns * indices
21
+ radius = indices
22
+
23
+ x = radius * np.cos(theta)
24
+ y = radius * np.sin(theta)
25
+
26
+ df = pd.DataFrame({
27
+ "x": x,
28
+ "y": y,
29
+ "idx": indices,
30
+ "rand": np.random.randn(num_points),
31
+ })
32
+
33
+ st.altair_chart(alt.Chart(df, height=700, width=700)
34
+ .mark_point(filled=True)
35
+ .encode(
36
+ x=alt.X("x", axis=None),
37
+ y=alt.Y("y", axis=None),
38
+ color=alt.Color("idx", legend=None, scale=alt.Scale()),
39
+ size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
40
+ ))
src/streamlit_app.py CHANGED
@@ -1,40 +1,108 @@
1
- import altair as alt
2
- import numpy as np
3
- import pandas as pd
4
  import streamlit as st
5
 
6
- """
7
- # Welcome to Streamlit!
 
 
 
 
 
 
 
 
8
 
9
- Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
10
- If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
11
- forums](https://discuss.streamlit.io).
12
 
13
- In the meantime, below is an example of what you can do with just a few lines of code:
 
 
 
 
 
 
 
 
 
 
 
14
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
- num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
17
- num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
18
-
19
- indices = np.linspace(0, 1, num_points)
20
- theta = 2 * np.pi * num_turns * indices
21
- radius = indices
22
-
23
- x = radius * np.cos(theta)
24
- y = radius * np.sin(theta)
25
-
26
- df = pd.DataFrame({
27
- "x": x,
28
- "y": y,
29
- "idx": indices,
30
- "rand": np.random.randn(num_points),
31
- })
32
-
33
- st.altair_chart(alt.Chart(df, height=700, width=700)
34
- .mark_point(filled=True)
35
- .encode(
36
- x=alt.X("x", axis=None),
37
- y=alt.Y("y", axis=None),
38
- color=alt.Color("idx", legend=None, scale=alt.Scale()),
39
- size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
40
- ))
 
1
+ import os
 
 
2
  import streamlit as st
3
 
4
+ # LangChain (local HF pipeline)
5
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
6
+ from langchain_huggingface import HuggingFacePipeline
7
+ from langchain.prompts import PromptTemplate
8
+ from langchain.schema import StrOutputParser
9
+
10
+ # LlamaIndex (modular imports)
11
+ from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, Settings
12
+ from llama_index.embeddings.huggingface import HuggingFaceEmbedding
13
+ from llama_index.llms.huggingface import HuggingFaceLLM
14
 
15
+ st.set_page_config(page_title="Tiny LLM Starter", page_icon="🧪", layout="centered")
16
+ st.title("🧪 Tiny LLM Starter LangChain + LlamaIndex")
 
17
 
18
+ # ---- Sidebar config ----
19
+ st.sidebar.header("Model Settings")
20
+ MODEL_ID = st.sidebar.text_input("HF model id (seq2seq)", value="google/flan-t5-small")
21
+ MAX_NEW_TOKENS = st.sidebar.slider("max_new_tokens", 32, 512, 256, 32)
22
+ TEMP = st.sidebar.slider("temperature", 0.0, 1.0, 0.2, 0.1)
23
+
24
+ st.sidebar.markdown(
25
+ """
26
+ **Tips**
27
+ - Uses local CPU (no key required)
28
+ - Small model → lower memory, faster cold start
29
+ - You can later add an `HF_TOKEN` secret for hosted inference
30
  """
31
+ )
32
+
33
+ # ---- Cache helpers to avoid reloading on every interaction ----
34
+ @st.cache_resource(show_spinner=True)
35
+ def load_langchain_pipeline(model_id: str, max_new_tokens: int):
36
+ tok = AutoTokenizer.from_pretrained(model_id)
37
+ mdl = AutoModelForSeq2SeqLM.from_pretrained(model_id)
38
+ gen = pipeline(
39
+ task="text2text-generation",
40
+ model=mdl,
41
+ tokenizer=tok,
42
+ max_new_tokens=max_new_tokens,
43
+ )
44
+ return HuggingFacePipeline(pipeline=gen)
45
+
46
+ @st.cache_resource(show_spinner=True)
47
+ def load_llamaindex_stack(model_id: str, max_new_tokens: int, temperature: float):
48
+ # Tiny, fast sentence-transformers model for embeddings
49
+ embed = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2")
50
+
51
+ # Wrap the same tiny HF model for LlamaIndex
52
+ llm = HuggingFaceLLM(
53
+ model_name=model_id,
54
+ tokenizer_name=model_id,
55
+ context_window=2048,
56
+ generate_kwargs={"max_new_tokens": max_new_tokens, "temperature": temperature},
57
+ device_map="cpu",
58
+ )
59
+
60
+ Settings.embed_model = embed
61
+ Settings.llm = llm
62
+
63
+ # Load small docs (data/notes.txt)
64
+ docs = SimpleDirectoryReader(input_dirs=["data"]).load_data()
65
+ index = VectorStoreIndex.from_documents(docs)
66
+ query_engine = index.as_query_engine(similarity_top_k=3)
67
+ return query_engine
68
+
69
+ tab1, tab2 = st.tabs(["🟣 LangChain Chat", "🟡 LlamaIndex mini-RAG"])
70
+
71
+ # -------- Tab 1: LangChain Chat --------
72
+ with tab1:
73
+ st.subheader("LangChain (local HF pipeline)")
74
+ lc_llm = load_langchain_pipeline(MODEL_ID, MAX_NEW_TOKENS)
75
+
76
+ user_q = st.text_input("Ask anything:", value="What is this app?")
77
+ if st.button("Generate (LangChain)", type="primary"):
78
+ prompt = PromptTemplate.from_template(
79
+ "You are a concise, helpful assistant.\n\nQuestion: {q}\nAnswer:"
80
+ )
81
+ chain = prompt | lc_llm | StrOutputParser()
82
+ with st.spinner("Thinking..."):
83
+ out = chain.invoke({"q": user_q})
84
+ st.write(out)
85
+
86
+ # -------- Tab 2: LlamaIndex mini-RAG --------
87
+ with tab2:
88
+ st.subheader("LlamaIndex over a tiny text file")
89
+ st.caption("Uploads are optional; otherwise it uses ./data/notes.txt")
90
+ uploaded = st.file_uploader("Upload a .txt file to index (optional)", type=["txt"])
91
+
92
+ # If user uploads a file, write it into ./data and rebuild the index
93
+ if uploaded is not None:
94
+ os.makedirs("data", exist_ok=True)
95
+ with open(os.path.join("data", "user.txt"), "wb") as f:
96
+ f.write(uploaded.read())
97
+
98
+ qe = load_llamaindex_stack(MODEL_ID, MAX_NEW_TOKENS, TEMP)
99
 
100
+ rag_q = st.text_input("Ask about the indexed text:", value="What does the notes file say?")
101
+ if st.button("Search + Answer (LlamaIndex)"):
102
+ with st.spinner("Searching + generating..."):
103
+ ans = qe.query(rag_q)
104
+ st.write(ans.response)
105
+ with st.expander("Show retrieved nodes"):
106
+ for n in ans.source_nodes:
107
+ st.markdown(f"**Score:** {n.score:.3f}")
108
+ st.code(n.node.get_content()[:500])