Josebert commited on
Commit
c0d9719
Β·
verified Β·
1 Parent(s): 8aea703

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -139
app.py CHANGED
@@ -1,144 +1,29 @@
1
- import os
2
- import base64
3
  import gradio as gr
4
- import logging
5
- from huggingface_hub import InferenceClient
6
  from PIL import Image
7
- import io
8
-
9
- # Configure logging
10
- logging.basicConfig(level=logging.INFO)
11
- logger = logging.getLogger(__name__)
12
-
13
- # Optional imports with error handling
14
- try:
15
- from pdf2image import convert_from_path
16
- import easyocr
17
- HAS_OCR = True
18
- except ImportError:
19
- HAS_OCR = False
20
- print("OCR features will be disabled. Install pdf2image and easyocr for full functionality.")
21
-
22
- # Initialize Hugging Face client
23
- client = InferenceClient(model="google/gemma-7b-it")
24
-
25
- def initialize_ocr():
26
- """Initialize OCR with error handling"""
27
- if not HAS_OCR:
28
- return None
29
- try:
30
- return easyocr.Reader(['en'], gpu=True) # Enable GPU if available
31
- except Exception as e:
32
- logger.error(f"Failed to initialize OCR: {e}")
33
- return None
34
-
35
- reader = initialize_ocr()
36
-
37
- # Add custom CSS for better styling
38
- custom_css = """
39
- .container { max-width: 1200px; margin: auto; }
40
- .gradio-container { font-family: 'Arial', sans-serif; }
41
- .gr-button { background-color: #2196F3 !important; color: white !important; }
42
- .gr-button:hover { background-color: #1976D2 !important; }
43
- .feedback { margin-top: 20px; padding: 10px; border-radius: 4px; }
44
- .success { background-color: #4CAF50; color: white; }
45
- .error { background-color: #f44336; color: white; }
46
- .footer { text-align: center; margin-top: 20px; color: #666; }
47
- """
48
-
49
- def make_api_call(text_content, image_path=None, retries=3):
50
- """Enhanced API call handler with better prompting"""
51
- try:
52
- prompt = f"""Analyze this document and provide a detailed analysis with:
53
-
54
- πŸ“ CORRECTED TEXT:
55
- [Provide the text with proper formatting, corrected spelling and grammar]
56
-
57
- πŸ“‹ SUMMARY:
58
- [A concise 2-3 sentence summary of the main content]
59
-
60
- πŸ”‘ KEY POINTS:
61
- [List the 3-5 most important points]
62
-
63
- 🏷️ ENTITIES DETECTED:
64
- - Dates: [List any dates found]
65
- - Names: [List any names found]
66
- - Numbers/Values: [List any significant numbers/values]
67
- - Organizations: [List any organizations mentioned]
68
-
69
- πŸ“„ Original Content: {text_content}"""
70
-
71
- for attempt in range(retries):
72
- try:
73
- response = client.text_generation(
74
- prompt=prompt,
75
- max_new_tokens=1500, # Increased token limit
76
- temperature=0.7,
77
- top_p=0.95,
78
- )
79
- return response
80
- except Exception as e:
81
- if attempt == retries - 1:
82
- raise e
83
- logger.warning(f"Attempt {attempt + 1}/{retries} failed, retrying...")
84
- continue
85
-
86
- except Exception as e:
87
- logger.error(f"API call failed: {e}")
88
- return f"Error processing request: {str(e)}"
89
-
90
- # ... rest of your existing code ...
91
-
92
- # Create enhanced Gradio interface
93
- demo = gr.Interface(
94
- fn=process_and_analyze,
95
- inputs=[
96
- gr.File(
97
- label="πŸ“Ž Upload Document",
98
- file_types=[".pdf", ".png", ".jpg", ".jpeg"],
99
- type="file"
100
- )
101
- ],
102
- outputs=[
103
- gr.Textbox(
104
- label="πŸ” Analysis Results",
105
- lines=20,
106
- show_copy_button=True
107
- )
108
- ],
109
- title="πŸ€– Smart Document Analyzer Pro",
110
- description="""
111
- ### Upload your documents for instant AI-powered analysis!
112
-
113
- This tool can:
114
- - πŸ“ Extract and correct text from images and PDFs
115
- - πŸ“Š Provide detailed summaries and key points
116
- - πŸ” Identify important entities (dates, names, numbers)
117
- - ✨ Format and structure the content
118
- """,
119
- examples=[
120
- ["example1.pdf"],
121
- ["example2.jpg"],
122
- ],
123
- theme=gr.themes.Soft().set(
124
- primary_hue="blue",
125
- secondary_hue="indigo",
126
- ),
127
- css=custom_css,
128
- allow_flagging="never",
129
  )
130
 
131
- # Add markdown for footer
132
- demo.footer = """
133
- <div class="footer">
134
- <p>πŸš€ Powered by Hugging Face & EasyOCR | Built with Gradio</p>
135
- <p>For optimal results, use clear images or well-scanned PDFs</p>
136
- </div>
137
- """
138
-
139
  if __name__ == "__main__":
140
- demo.launch(
141
- share=True,
142
- enable_queue=True,
143
- show_error=True,
144
- )
 
 
 
1
  import gradio as gr
2
+ from transformers import TrOCRProcessor, VisionEncoderDecoderModel
 
3
  from PIL import Image
4
+ import requests
5
+ from io import BytesIO
6
+
7
+ # Load TrOCR model and processor
8
+ processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
9
+ model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten")
10
+
11
+ def extract_text_from_image(image):
12
+ """Extract text from an uploaded image using Hugging Face TrOCR model."""
13
+ image = image.convert("RGB")
14
+ pixel_values = processor(image, return_tensors="pt").pixel_values
15
+ generated_ids = model.generate(pixel_values)
16
+ extracted_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
17
+ return extracted_text
18
+
19
+ # Create Gradio Interface
20
+ interface = gr.Interface(
21
+ fn=extract_text_from_image,
22
+ inputs=gr.Image(type="pil"),
23
+ outputs=gr.Textbox(label="Extracted Text"),
24
+ title="OCR Text Extractor",
25
+ description="Upload an image to extract text using Hugging Face's TrOCR model."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  )
27
 
 
 
 
 
 
 
 
 
28
  if __name__ == "__main__":
29
+ interface.launch(share=True)