ChatBotsTA commited on
Commit
ab1429e
·
verified ·
1 Parent(s): fb60b9f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -8
app.py CHANGED
@@ -1,5 +1,3 @@
1
- # Cell 9
2
- app_code = r'''
3
  import os, io, re, json, base64, requests, numpy as np
4
  import streamlit as st
5
  from pypdf import PdfReader
@@ -123,7 +121,6 @@ def extract_text_from_pdf(file) -> str:
123
 
124
  def make_word_freq_chart(text: str, top_k=20):
125
  text = text.lower()
126
- # lightweight stopword list
127
  stop = set(("the a an and of to in is are for with on by as at this that from be was were it its it’s into or if not your you we they their our can may such more most other also than which".split()))
128
  tokens = re.findall(r"[a-zA-Z]{3,}", text)
129
  freq = {}
@@ -203,14 +200,12 @@ if uploaded:
203
  st.session_state.chunks = split_into_chunks(st.session_state.doc_text)
204
  with st.spinner("Thinking..."):
205
  try:
206
- # embed once/cache
207
  if st.session_state.chunk_vecs is None:
208
  vecs = embed_texts(st.session_state.chunks)
209
  st.session_state.chunk_vecs = vecs
210
  else:
211
  vecs = st.session_state.chunk_vecs
212
 
213
- # question embedding
214
  q_vec = embed_texts([question])
215
  sims = cosine_sim(q_vec, vecs).flatten()
216
  top_idx = np.argsort(sims)[::-1][:3]
@@ -235,6 +230,3 @@ if uploaded:
235
 
236
  else:
237
  st.info("Upload a PDF to get started.")
238
- '''
239
- Path("app.py").write_text(app_code, encoding="utf-8")
240
- print("Wrote app.py")
 
 
 
1
  import os, io, re, json, base64, requests, numpy as np
2
  import streamlit as st
3
  from pypdf import PdfReader
 
121
 
122
  def make_word_freq_chart(text: str, top_k=20):
123
  text = text.lower()
 
124
  stop = set(("the a an and of to in is are for with on by as at this that from be was were it its it’s into or if not your you we they their our can may such more most other also than which".split()))
125
  tokens = re.findall(r"[a-zA-Z]{3,}", text)
126
  freq = {}
 
200
  st.session_state.chunks = split_into_chunks(st.session_state.doc_text)
201
  with st.spinner("Thinking..."):
202
  try:
 
203
  if st.session_state.chunk_vecs is None:
204
  vecs = embed_texts(st.session_state.chunks)
205
  st.session_state.chunk_vecs = vecs
206
  else:
207
  vecs = st.session_state.chunk_vecs
208
 
 
209
  q_vec = embed_texts([question])
210
  sims = cosine_sim(q_vec, vecs).flatten()
211
  top_idx = np.argsort(sims)[::-1][:3]
 
230
 
231
  else:
232
  st.info("Upload a PDF to get started.")