dejanseo commited on
Commit
aef81ea
Β·
verified Β·
1 Parent(s): 787875d

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +13 -20
src/streamlit_app.py CHANGED
@@ -14,13 +14,18 @@ MODEL_NAME = "cross-encoder/ms-marco-electra-base"
14
  MAX_SNIPPET_CHARS = 450
15
  MAX_SENTENCES = 5
16
 
 
 
 
 
 
 
17
  st.set_page_config(
18
- page_title="Snippet Generator",
19
  page_icon="βœ‚οΈ",
20
  layout="centered"
21
  )
22
 
23
-
24
  @st.cache_resource
25
  def load_model():
26
  """Load CrossEncoder model."""
@@ -117,35 +122,26 @@ def generate_snippet(query: str, document: str, model, max_chars: int, max_sents
117
 
118
 
119
  # --- Streamlit UI ---
120
- st.title("βœ‚οΈ Snippet Generator")
121
- st.caption("Recreates Google Vertex AI / Gemini grounding-style snippets")
122
 
123
  st.markdown("""
124
- This tool generates extractive snippets from documents using a Cross-Encoder model trained on MS MARCO search relevance data.
125
-
126
- **How it works:**
127
- 1. Segments document into sentences
128
- 2. Scores each sentence against your query using `cross-encoder/ms-marco-electra-base`
129
- 3. Selects top-scoring sentences within budget
130
- 4. Stitches them in document order with `...` for gaps
131
  """)
132
 
133
- st.markdown("---")
134
-
135
- query = st.text_input("πŸ” Query", value="best prostate cancer treatment in the world")
136
 
137
  document = st.text_area(
138
- "πŸ“„ Document",
139
  height=250,
140
  placeholder="Paste document content here..."
141
  )
142
 
143
- with st.expander("βš™οΈ Settings"):
144
  max_chars = st.slider("Max snippet characters", 200, 1500, MAX_SNIPPET_CHARS, 50)
145
  max_sents = st.slider("Max sentences", 2, 15, MAX_SENTENCES)
146
  show_debug = st.checkbox("Show debug info", value=True)
147
 
148
- if st.button("Generate Snippet", type="primary"):
149
  if query and document:
150
  with st.spinner("Loading model & scoring sentences..."):
151
  model = load_model()
@@ -161,6 +157,3 @@ if st.button("Generate Snippet", type="primary"):
161
  st.text(f"{score:.4f}: {sent[:80]}...")
162
  else:
163
  st.warning("Please enter both a query and document.")
164
-
165
- st.markdown("---")
166
- st.caption("Model: `cross-encoder/ms-marco-electra-base` | [GitHub](https://github.com/UKPLab/sentence-transformers)")
 
14
  MAX_SNIPPET_CHARS = 450
15
  MAX_SENTENCES = 5
16
 
17
+ st.logo(
18
+ image="https://dejan.ai/wp-content/uploads/2024/02/dejan-300x103.png",
19
+ link="https://dejan.ai/",
20
+ size="large"
21
+ )
22
+
23
  st.set_page_config(
24
+ page_title="Snippet Generator by DEJAN AI",
25
  page_icon="βœ‚οΈ",
26
  layout="centered"
27
  )
28
 
 
29
  @st.cache_resource
30
  def load_model():
31
  """Load CrossEncoder model."""
 
122
 
123
 
124
  # --- Streamlit UI ---
125
+ st.title("Grounding Snippet Generator", help="cross-encoder/ms-marco-electra-base")
 
126
 
127
  st.markdown("""
128
+ How much of your page will be used to ground the model for a particular fanout query?
 
 
 
 
 
 
129
  """)
130
 
131
+ query = st.text_input("Query", value="best prostate cancer treatment in the world")
 
 
132
 
133
  document = st.text_area(
134
+ "Web Page Text",
135
  height=250,
136
  placeholder="Paste document content here..."
137
  )
138
 
139
+ with st.expander("Settings"):
140
  max_chars = st.slider("Max snippet characters", 200, 1500, MAX_SNIPPET_CHARS, 50)
141
  max_sents = st.slider("Max sentences", 2, 15, MAX_SENTENCES)
142
  show_debug = st.checkbox("Show debug info", value=True)
143
 
144
+ if st.button("Generate Snippet", help="cross-encoder/ms-marco-electra-base"):
145
  if query and document:
146
  with st.spinner("Loading model & scoring sentences..."):
147
  model = load_model()
 
157
  st.text(f"{score:.4f}: {sent[:80]}...")
158
  else:
159
  st.warning("Please enter both a query and document.")