Xingyuan Ding commited on
Commit
5e809ca
·
1 Parent(s): 0056882

test chromadb

Browse files
Files changed (3) hide show
  1. .gitignore +1 -0
  2. app.py +46 -22
  3. requirements.txt +3 -3
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .DS_Store
app.py CHANGED
@@ -1,31 +1,31 @@
1
- import streamlit as st
2
- from PIL import Image
3
- import easyocr
4
- import numpy as np
5
- import cv2
6
 
7
- reader = easyocr.Reader(['en'])
8
 
9
- uploaded_file = st.file_uploader("Please upload your image file", type=["jpg", "jpeg", "png"])
10
 
11
- if uploaded_file is not None:
12
- image = Image.open(uploaded_file)
13
 
14
- st.image(image, caption="Uploaded Image", use_column_width=True)
15
 
16
- image_np = np.array(image)
17
- image_np = cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)
18
- with st.spinner("Performing OCR..."):
19
- result = reader.readtext(image_np)
20
 
21
- if result:
22
- st.subheader("OCR Results:")
23
- extracted_text = "\n".join([text[1] for text in result])
24
- st.text_area("Extracted Text", extracted_text, height=200)
25
- else:
26
- st.warning("No text found in the image.")
27
- else:
28
- st.info("Please upload an image file")
29
 
30
 
31
  # import streamlit as st
@@ -37,3 +37,27 @@ else:
37
  # if text:
38
  # out = pipe(text)
39
  # st.json(out)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # import streamlit as st
2
+ # from PIL import Image
3
+ # import easyocr
4
+ # import numpy as np
5
+ # import cv2
6
 
7
+ # reader = easyocr.Reader(['en'])
8
 
9
+ # uploaded_file = st.file_uploader("Please upload your image file", type=["jpg", "jpeg", "png"])
10
 
11
+ # if uploaded_file is not None:
12
+ # image = Image.open(uploaded_file)
13
 
14
+ # st.image(image, caption="Uploaded Image", use_column_width=True)
15
 
16
+ # image_np = np.array(image)
17
+ # image_np = cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)
18
+ # with st.spinner("Performing OCR..."):
19
+ # result = reader.readtext(image_np)
20
 
21
+ # if result:
22
+ # st.subheader("OCR Results:")
23
+ # extracted_text = "\n".join([text[1] for text in result])
24
+ # st.text_area("Extracted Text", extracted_text, height=200)
25
+ # else:
26
+ # st.warning("No text found in the image.")
27
+ # else:
28
+ # st.info("Please upload an image file")
29
 
30
 
31
  # import streamlit as st
 
37
  # if text:
38
  # out = pipe(text)
39
  # st.json(out)
40
+
41
+ import streamlit as st
42
+ from langchain_community.vectorstores import Chroma
43
+ from langchain_huggingface import HuggingFaceEmbeddings
44
+
45
+ CHROMA_GOOD_PATH = "chroma_good"
46
+ CHROMA_BAD_PATH = "chroma_bad"
47
+
48
+ text = st.text_area("enter some text")
49
+ embedding_function = HuggingFaceEmbeddings()
50
+ db_good = Chroma(persist_directory=CHROMA_GOOD_PATH, embedding_function=embedding_function)
51
+ db_bad = Chroma(persist_directory=CHROMA_BAD_PATH, embedding_function=embedding_function)
52
+
53
+ if text:
54
+ results_good = db_good.similarity_search_with_relevance_scores(text, k=3)
55
+ if len(results_good) == 0 or results_good[0][1] < 0.7:
56
+ print(f"Unable to find matching results_good.")
57
+ return
58
+ st.json(results_good)
59
+ results_bad = db_bad.similarity_search_with_relevance_scores(text, k=3)
60
+ if len(results_bad) == 0 or results_bad[0][1] < 0.7:
61
+ print(f"Unable to find matching results_bad.")
62
+ return
63
+ st.json(results_bad)
requirements.txt CHANGED
@@ -2,11 +2,11 @@ streamlit
2
  chromadb
3
  easyocr
4
  langchain
5
- langchain-community
6
- langchain-openai
7
  numpy
8
  opencv-python-headless
9
  openai
10
  Pillow
 
11
  transformers
12
- torch
 
2
  chromadb
3
  easyocr
4
  langchain
5
+ langchain-huggingface
 
6
  numpy
7
  opencv-python-headless
8
  openai
9
  Pillow
10
+ sentence-transformers
11
  transformers
12
+ torch