ProfRom commited on
Commit
cb6c6b2
·
verified ·
1 Parent(s): 4d6c27c

Turner - Unit 8 Assignment - Final submission

Browse files
Files changed (2) hide show
  1. app.py +44 -131
  2. requirements.txt +5 -8
app.py CHANGED
@@ -1,131 +1,44 @@
1
- # Caption Generator w/English-to-Spanish Translation
2
- # A. Harper | ARIN 460 | December 2025
3
-
4
- # Load into Hugging Face Space (using the Gradio Framework)
5
- # Include requirements.txt file (list: gradio, pandas, torch, sentencepiece, tensorflow, Image, transformers)
6
-
7
- # To run, navigate to the App tab. Click the red Generate button.
8
- # The app will randomly select image, generate (English) caption,
9
- # then generate Spanish translation.
10
-
11
-
12
- # Import gradio - app framework
13
- import gradio as gr
14
-
15
-
16
- # Two image datasources are available.
17
- # Minor adjustments (add/remove # to deactivate/activate) to switch between datasources.
18
- # AA comments refer to images in the DataFrame / from Coco database
19
- # BB comments refer to images stored in local Gradio app folder
20
-
21
-
22
- # Import os and random to support random selection of image (from folder)
23
- import os
24
- import random
25
-
26
-
27
- # Import pandas datasets, transformers, torch
28
- import pandas as pd
29
-
30
- from datasets import load_dataset
31
-
32
- from transformers import (
33
- BlipProcessor,
34
- BlipForConditionalGeneration,
35
- AutoTokenizer,
36
- AutoModelForSeq2SeqLM,
37
- MarianMTModel,
38
- MarianTokenizer
39
- )
40
-
41
- from PIL import Image
42
- import torch
43
-
44
-
45
- # AA: Load dataset. Initial image source.
46
- #Load dataset (henryscheible/coco_val2014_tiny)
47
- dataset = load_dataset("henryscheible/coco_val2014_tiny", split="validation")
48
-
49
-
50
- # Reduce dataset to 20 rows, i.e., get sample
51
- samples = dataset.select(range(20))
52
-
53
-
54
- #Convert to dataframe
55
- df = pd.DataFrame(samples)
56
-
57
-
58
- # BB: Direct to Photos folder
59
- IMAGE_FOLDER = "Photos"
60
-
61
- image_paths = [
62
- os.path.join(IMAGE_FOLDER, f)
63
- for f in os.listdir(IMAGE_FOLDER)
64
- if f.lower().endswith((".jpg", ".jpeg", ".png"))
65
- ]
66
-
67
- #Load the image captioning model (Salesforce/blip-image-captioning-large)
68
- processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
69
- model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")
70
-
71
-
72
- #Load transformer for translating captions from English to Spanish
73
- model_name = "Helsinki-NLP/opus-mt-en-es"
74
- trans_tokenizer = MarianTokenizer.from_pretrained(model_name)
75
- trans_model = MarianMTModel.from_pretrained(model_name)
76
-
77
-
78
- #Configure captioning function
79
-
80
- def caption_random_image():
81
-
82
-
83
- # AA: pick random row - from DF
84
- ##sample = df.sample(1).iloc[0]
85
-
86
-
87
- # BB: Pick a random image path - image from folder
88
- img_path = random.choice(image_paths)
89
-
90
-
91
- # BB: Load into PIL - image from folder - image from folder
92
- image = Image.open(img_path).convert("RGB")
93
-
94
-
95
- # AA: Image - for DF
96
- ##image = sample["image"]
97
-
98
-
99
- # Unconditional image captioning
100
- inputs = processor(image, return_tensors="pt")
101
-
102
-
103
- out = model.generate(**inputs)
104
- caption_eng = processor.decode(out[0], skip_special_tokens=True)
105
-
106
-
107
- # Translate caption from English to Spanish
108
- trans_inputs = trans_tokenizer.encode(caption_eng, return_tensors="pt")
109
- trans_out = trans_model.generate(trans_inputs)
110
- caption_es = trans_tokenizer.decode(trans_out[0], skip_special_tokens=True)
111
-
112
-
113
- return image, caption_eng, caption_es
114
-
115
-
116
-
117
-
118
- demo = gr.Interface(
119
- fn=caption_random_image,
120
- inputs=None,
121
- outputs=[
122
- gr.Image(type="pil", label="Random Image"),
123
- gr.Textbox(label="Caption (English)"),
124
- gr.Textbox(label="Caption (Spanish)")
125
- ],
126
- title="Image Captioning (with English to Spanish translation)",
127
- description="Selects a random image (from either the local folder or henryscheible/coco data subset); generates a BLIP caption; then translates the (English) caption to Spanish."
128
- )
129
-
130
-
131
- demo.launch()
 
1
+ # Import Modules
2
+ import gradio as gr
3
+ import pytesseract
4
+ from PIL import Image
5
+ from transformers import pipeline
6
+
7
+ # Instantiate summarization pipeline
8
+ summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
9
+
10
+ def process_document(image):
11
+ # OCR to extract string values from image
12
+ extracted_text = pytesseract.image_to_string(image)
13
+
14
+ # If no text found,
15
+ if not extracted_text.strip():
16
+ return "No text detected in the image.", "Summary Not Available"
17
+
18
+ # Summarize extracted text - set minimum text value to 50, otherwise summary would be pointless
19
+ if len(extracted_text) > 50:
20
+ try:
21
+ # Generate summary (min_length ensures it's not too short)
22
+ summary_result = summarizer(extracted_text, max_length=100, min_length=30, do_sample=True, temperature=.7, repetition_penalty=1.8)
23
+ summary_text = summary_result[0]['summary_text']
24
+ except Exception as e: # On summarization error, return error message
25
+ summary_text = f"Error during summarization: {str(e)}"
26
+ else:
27
+ summary_text = "Text is too short to summarize."
28
+
29
+ return extracted_text, summary_text
30
+
31
+ # Create Gradio interface
32
+ interface = gr.Interface(
33
+ fn=process_document,
34
+ inputs=gr.Image(type="pil", label="Upload Document Image"),
35
+ outputs=[
36
+ gr.Textbox(label="Extracted Text (OCR)", lines=10),
37
+ gr.Textbox(label="Summary", lines=5)
38
+ ],
39
+ title="Multimodal Document Intelligence",
40
+ description="Upload a receipt, invoice, or article. The model will extract the text and provide a summary."
41
+ )
42
+
43
+ # Launch
44
+ interface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -1,8 +1,5 @@
1
- gradio
2
- pandas
3
- torch
4
- sentencepiece
5
- tensorflow
6
- Image
7
- transformers
8
-
 
1
+ transformers
2
+ gradio
3
+ pytesseract
4
+ pillow
5
+ torch