Spaces:

CR7CAD
/

Assignment1

Sleeping

App Files Files Community

CR7CAD commited on Mar 8

Commit

5b9e396

verified ·

1 Parent(s): 8525085

Update app.py

Browse files

Files changed (1) hide show

app.py +111 -28

app.py CHANGED Viewed

@@ -1,36 +1,119 @@
-import streamlit as st
-from transformers import pipeline
 from PIL import Image
-# Set the title of the app
-st.title("Image-to-Text Converter using Donut")
-# Description of the app
-st.write("Upload an image to extract text using the Donut model (naver-clova-ix/donut-base).")
-# Create a file uploader for image files
-uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
-# Initialize the pipeline
-@st.cache_resource(show_spinner=False)
-def load_pipeline():
-    return pipeline("image-to-text", model="naver-clova-ix/donut-base")
-pipe = load_pipeline()
-if uploaded_file is not None:
     try:
-        # Open the image file and convert to RGB (if necessary)
-        image = Image.open(uploaded_file).convert("RGB")
-        st.image(image, caption="Uploaded Image", use_column_width=True)
-        # Process the image through the pipeline
-        result = pipe(image)
-        # Extract generated text from the result list
-        generated_text = result[0].get("generated_text", "No text generated.")
-        st.subheader("Extracted Text")
-        st.text_area("Result", generated_text, height=200)
     except Exception as e:
-        st.error(f"An error occurred: {e}")

+import os
+import argparse
 from PIL import Image
+from transformers import pipeline
+def load_model():
+    """Load the image-to-text model."""
+    print("Loading image-to-text model...")
+    try:
+        pipe = pipeline("image-to-text", model="naver-clova-ix/donut-base")
+        print("Model loaded successfully")
+        return pipe
+    except Exception as e:
+        print(f"Error loading model: {str(e)}")
+        raise
+def extract_text_from_image(image_path, model):
+    """Extract text from an image using the loaded model.
+    Args:
+        image_path (str): Path to the image file
+        model: The loaded image-to-text pipeline
+    Returns:
+        str: Extracted text from the image
+    """
     try:
+        # Check if the file exists
+        if not os.path.exists(image_path):
+            raise FileNotFoundError(f"Image file not found: {image_path}")
+        # Open and process the image
+        image = Image.open(image_path)
+        # Extract text using the model
+        result = model(image)
+        # Get the generated text from the result
+        if result and len(result) > 0:
+            return result[0]['generated_text']
+        else:
+            return "No text detected in the image"
     except Exception as e:
+        print(f"Error processing image: {str(e)}")
+        return f"Error: {str(e)}"
+def process_directory(directory_path, model, output_file=None):
+    """Process all images in a directory.
+    Args:
+        directory_path (str): Path to directory containing images
+        model: The loaded image-to-text pipeline
+        output_file (str, optional): Path to save results to a text file
+    """
+    results = {}
+    # Check if the directory exists
+    if not os.path.exists(directory_path):
+        print(f"Directory not found: {directory_path}")
+        return
+    # Process each file in the directory
+    for filename in os.listdir(directory_path):
+        # Check if the file is an image
+        if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp', '.webp')):
+            image_path = os.path.join(directory_path, filename)
+            print(f"Processing {filename}...")
+            # Extract text from the image
+            text = extract_text_from_image(image_path, model)
+            results[filename] = text
+            print(f"Result for {filename}: {text}")
+    # Save results to a file if output_file is specified
+    if output_file and results:
+        with open(output_file, 'w', encoding='utf-8') as f:
+            for filename, text in results.items():
+                f.write(f"File: {filename}\n")
+                f.write(f"Text: {text}\n")
+                f.write("-" * 50 + "\n")
+        print(f"Results saved to {output_file}")
+    return results
+def main():
+    # Parse command line arguments
+    parser = argparse.ArgumentParser(description='Extract text from images using Donut model')
+    parser.add_argument('--image', help='Path to an image file')
+    parser.add_argument('--dir', help='Path to a directory containing images')
+    parser.add_argument('--output', help='Path to save output to a text file')
+    args = parser.parse_args()
+    # Load the model
+    model = load_model()
+    # Process a single image or a directory of images
+    if args.image:
+        # Process a single image
+        text = extract_text_from_image(args.image, model)
+        print(f"Extracted text: {text}")
+        # Save to file if output is specified
+        if args.output:
+            with open(args.output, 'w', encoding='utf-8') as f:
+                f.write(f"File: {os.path.basename(args.image)}\n")
+                f.write(f"Text: {text}\n")
+            print(f"Result saved to {args.output}")
+    elif args.dir:
+        # Process a directory of images
+        process_directory(args.dir, model, args.output)
+    else:
+        print("Please provide either --image or --dir argument")
+if __name__ == "__main__":
+    main()