mkoot007 commited on
Commit
ceb18fb
·
1 Parent(s): 9923f90

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -28
app.py CHANGED
@@ -1,39 +1,49 @@
1
  import streamlit as st
2
  from PIL import Image
3
- import torch
4
- from transformers import AutoFeatureExtractor, AutoModelForCausalLM, AutoTokenizer
 
5
 
6
- # Load the image captioning model
7
- model_name = "Salesforce/blip-image-captioning-large"
8
- feature_extractor = AutoFeatureExtractor.from_pretrained(model_name)
9
- tokenizer = AutoTokenizer.from_pretrained(model_name)
10
- model = AutoModelForCausalLM.from_pretrained(model_name)
11
 
12
- # Streamlit app title and description
13
- st.title("Image Information Extractor")
14
- st.write("Upload an image, and this app will extract information from it.")
15
 
16
- # Upload image
17
- image = st.file_uploader("Upload an image", type=["jpg", "png", "jpeg"])
 
18
 
19
- if image:
20
- # Display the uploaded image
21
- st.image(image, use_column_width=True, caption="Uploaded Image")
 
22
 
23
- # Process the image and extract information
24
- image = Image.open(image)
25
- st.write("Extracting information from the image...")
 
 
 
26
 
27
- # Use the image captioning model to generate a description
28
- inputs = tokenizer(image, return_tensors="pt")
29
- with st.spinner("Generating caption..."):
30
- captions = model.generate(**inputs)
31
- caption = tokenizer.decode(captions[0], skip_special_tokens=True)
32
 
33
- # Display the image description
34
- st.write("Image Description:")
35
- st.write(caption)
36
 
37
- # You can add more processing or explanations here if needed
 
 
 
 
38
 
39
- st.sidebar.text("Built with ❤️ by Your Name")
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
  from PIL import Image
3
+ import io
4
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
5
+ from easyocr import Reader
6
 
7
+ # Load the OCR model and text explanation model
8
+ ocr_reader = Reader(['en'])
 
 
 
9
 
10
+ # Load the text explanation model
11
+ text_generator = AutoModelForCausalLM.from_pretrained("HuggingFaceH4/zephyr-7b-alpha")
12
+ text_tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-alpha")
13
 
14
+ # Define a function to extract text from an image using OCR
15
+ def extract_text(image):
16
+ return ocr_reader.readtext(image)
17
 
18
+ # Define a function to explain the extracted text using text generation
19
+ def explain_text(text, text_generator, text_tokenizer):
20
+ # Extracted text
21
+ extracted_text = " ".join([res[1] for res in text])
22
 
23
+ # Generate an explanation using the text explanation model
24
+ input_ids = text_tokenizer.encode(extracted_text, return_tensors="pt")
25
+ explanation_ids = text_generator.generate(input_ids, max_length=100, num_return_sequences=1)
26
+ explanation = text_tokenizer.decode(explanation_ids[0], skip_special_tokens=True)
27
+
28
+ return explanation
29
 
30
+ # Create a Streamlit layout
31
+ st.title("Text Extraction and Explanation")
 
 
 
32
 
33
+ # Allow users to upload an image
34
+ uploaded_file = st.file_uploader("Upload an image:")
 
35
 
36
+ # Extract text from the uploaded image and explain it
37
+ if uploaded_file is not None:
38
+ image = Image.open(uploaded_file)
39
+ ocr_results = extract_text(image)
40
+ explanation = explain_text(ocr_results, text_generator, text_tokenizer)
41
 
42
+ st.markdown("**Extracted text:**")
43
+ st.markdown(" ".join([res[1] for res in ocr_results]))
44
+
45
+ st.markdown("**Explanation:**")
46
+ st.markdown(explanation)
47
+
48
+ else:
49
+ st.markdown("Please upload an image to extract text and get an explanation.")