Spaces:

ProfRom
/

TestSpace3

Sleeping

App Files Files Community

ProfRom commited on Dec 9, 2025

Commit

cb6c6b2

verified ·

1 Parent(s): 4d6c27c

Turner - Unit 8 Assignment - Final submission

Browse files

Files changed (2) hide show

app.py +44 -131
requirements.txt +5 -8

app.py CHANGED Viewed

@@ -1,131 +1,44 @@
-# Caption Generator w/English-to-Spanish Translation
-# A. Harper | ARIN 460 | December 2025
-# Load into Hugging Face Space (using the Gradio Framework)
-# Include requirements.txt file (list: gradio, pandas, torch, sentencepiece, tensorflow, Image, transformers)
-# To run, navigate to the App tab. Click the red Generate button.
-# The app will randomly select image, generate (English) caption,
-    # then generate Spanish translation.
-# Import gradio - app framework
-import gradio as gr
-# Two image datasources are available.
-# Minor adjustments (add/remove # to deactivate/activate) to switch between datasources.
-# AA comments refer to images in the DataFrame / from Coco database
-# BB comments refer to images stored in local Gradio app folder
-# Import os and random to support random selection of image (from folder)
-import os
-import random
-# Import pandas datasets, transformers, torch
-import pandas as pd
-from datasets import load_dataset
-from transformers import (
-    BlipProcessor,
-    BlipForConditionalGeneration,
-    AutoTokenizer,
-    AutoModelForSeq2SeqLM,
-    MarianMTModel,
-    MarianTokenizer
-)
-from PIL import Image
-import torch
-# AA: Load dataset. Initial image source.
-#Load dataset (henryscheible/coco_val2014_tiny)
-dataset = load_dataset("henryscheible/coco_val2014_tiny", split="validation")
-# Reduce dataset to 20 rows, i.e., get sample
-samples = dataset.select(range(20))
-#Convert to dataframe
-df = pd.DataFrame(samples)
-# BB: Direct to Photos folder
-IMAGE_FOLDER = "Photos"
-image_paths = [
-    os.path.join(IMAGE_FOLDER, f)
-    for f in os.listdir(IMAGE_FOLDER)
-    if f.lower().endswith((".jpg", ".jpeg", ".png"))
-]
-#Load the image captioning model (Salesforce/blip-image-captioning-large)
-processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
-model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")
-#Load transformer for translating captions from English to Spanish
-model_name = "Helsinki-NLP/opus-mt-en-es"
-trans_tokenizer = MarianTokenizer.from_pretrained(model_name)
-trans_model = MarianMTModel.from_pretrained(model_name)
-#Configure captioning function
-def caption_random_image():
-    # AA: pick random row - from DF
-    ##sample = df.sample(1).iloc[0]
-    # BB: Pick a random image path - image from folder
-    img_path = random.choice(image_paths)
-    # BB: Load into PIL - image from folder - image from folder
-    image = Image.open(img_path).convert("RGB")
-    # AA: Image - for DF
-    ##image = sample["image"]
-    # Unconditional image captioning
-    inputs = processor(image, return_tensors="pt")
-    out = model.generate(**inputs)
-    caption_eng = processor.decode(out[0], skip_special_tokens=True)
-    # Translate caption from English to Spanish
-    trans_inputs = trans_tokenizer.encode(caption_eng, return_tensors="pt")
-    trans_out = trans_model.generate(trans_inputs)
-    caption_es = trans_tokenizer.decode(trans_out[0], skip_special_tokens=True)
-    return image, caption_eng, caption_es
-demo = gr.Interface(
-    fn=caption_random_image,
-    inputs=None,
-    outputs=[
-        gr.Image(type="pil", label="Random Image"),
-        gr.Textbox(label="Caption (English)"),
-        gr.Textbox(label="Caption (Spanish)")
-    ],
-    title="Image Captioning (with English to Spanish translation)",
-    description="Selects a random image (from either the local folder or henryscheible/coco data subset); generates a BLIP caption; then translates the (English) caption to Spanish."
-)
-demo.launch()

+# Import Modules
+import gradio as gr
+import pytesseract
+from PIL import Image
+from transformers import pipeline
+# Instantiate summarization pipeline
+summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
+def process_document(image):
+    # OCR to extract string values from image
+    extracted_text = pytesseract.image_to_string(image)
+    # If no text found,
+    if not extracted_text.strip():
+        return "No text detected in the image.", "Summary Not Available"
+    # Summarize extracted text - set minimum text value to 50, otherwise summary would be pointless
+    if len(extracted_text) > 50:
+        try:
+            # Generate summary (min_length ensures it's not too short)
+            summary_result = summarizer(extracted_text, max_length=100, min_length=30, do_sample=True, temperature=.7, repetition_penalty=1.8)
+            summary_text = summary_result[0]['summary_text']
+        except Exception as e: # On summarization error, return error message
+            summary_text = f"Error during summarization: {str(e)}"
+    else:
+        summary_text = "Text is too short to summarize."
+    return extracted_text, summary_text
+# Create Gradio interface
+interface = gr.Interface(
+    fn=process_document,
+    inputs=gr.Image(type="pil", label="Upload Document Image"),
+    outputs=[
+        gr.Textbox(label="Extracted Text (OCR)", lines=10),
+        gr.Textbox(label="Summary", lines=5)
+    ],
+    title="Multimodal Document Intelligence",
+    description="Upload a receipt, invoice, or article. The model will extract the text and provide a summary."
+)
+# Launch
+interface.launch()

requirements.txt CHANGED Viewed

@@ -1,8 +1,5 @@
-gradio
-pandas
-torch
-sentencepiece
-tensorflow
-Image
-transformers

+transformers
+gradio
+pytesseract
+pillow
+torch