Josebert commited on
Commit
5a52e06
Β·
verified Β·
1 Parent(s): f819b18

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +73 -98
app.py CHANGED
@@ -2,11 +2,14 @@ import os
2
  import base64
3
  import gradio as gr
4
  import logging
5
- from datetime import datetime
6
  from huggingface_hub import InferenceClient
7
  from PIL import Image
8
  import io
9
 
 
 
 
 
10
  # Optional imports with error handling
11
  try:
12
  from pdf2image import convert_from_path
@@ -16,19 +19,11 @@ except ImportError:
16
  HAS_OCR = False
17
  print("OCR features will be disabled. Install pdf2image and easyocr for full functionality.")
18
 
19
- # Configure logging
20
- logging.basicConfig(
21
- level=logging.INFO,
22
- format='%(asctime)s - %(levelname)s - %(message)s'
23
  )
24
- logger = logging.getLogger(__name__)
25
-
26
- # API configuration
27
- api_token = os.getenv("HUGGINGFACE_TOKEN")
28
- if not api_token:
29
- raise ValueError("API token not found. Set HUGGINGFACE_TOKEN in .env file or environment variables.")
30
-
31
- client = InferenceClient(token=api_token)
32
 
33
  def initialize_ocr():
34
  """Initialize OCR with error handling"""
@@ -42,58 +37,57 @@ def initialize_ocr():
42
 
43
  reader = initialize_ocr()
44
 
45
- def encode_image_to_base64(image_path):
46
- """Convert image to base64 string"""
47
- with open(image_path, "rb") as image_file:
48
- return base64.b64encode(image_file.read()).decode('utf-8')
49
-
50
  def make_api_call(text_content, image_path=None, retries=3):
51
  """Enhanced API call handler with retries and image support"""
52
- messages = [
53
- {
 
54
  "role": "user",
55
- "content": [
56
- {
57
- "type": "text",
58
- "text": f"""Analyze this document content and provide:
59
- 1. Corrected text with proper formatting
60
- 2. Brief summary
61
- 3. Key points or important information
62
- 4. Any detected entities (dates, names, numbers)
63
 
64
- Content: {text_content}"""
65
- }
66
- ]
67
- }
68
- ]
69
-
70
- if image_path and os.path.exists(image_path):
71
- base64_image = encode_image_to_base64(image_path)
72
- messages[0]["content"].append({
73
- "type": "image_url",
74
- "image_url": {
75
- "url": f"data:image/jpeg;base64,{base64_image}"
76
- }
77
- })
78
-
79
- for attempt in range(retries):
80
- try:
81
- response = client.text_generation(
82
- model="google/gemma-7b-it",
83
- prompt=str(messages),
84
- max_new_tokens=1000,
85
- temperature=0.7,
86
- top_p=0.95,
87
- )
88
- return response
89
- except Exception as e:
90
- logger.error(f"API Error (attempt {attempt + 1}/{retries}): {e}")
91
- if attempt == retries - 1:
92
- return f"Error processing request: {str(e)}"
93
- continue
 
 
 
 
94
 
95
  def process_document(file_path):
96
- """Process document with improved error handling"""
97
  if not HAS_OCR or not reader:
98
  return "OCR functionality is not available. Please install required packages."
99
 
@@ -105,7 +99,6 @@ def process_document(file_path):
105
  images = convert_from_path(file_path)
106
  extracted_text = ""
107
  for i, image in enumerate(images):
108
- logger.info(f"Processing page {i+1}/{len(images)}")
109
  temp_path = f"temp_page_{i}.jpg"
110
  image.save(temp_path)
111
  ocr_results = reader.readtext(temp_path, detail=0)
@@ -137,44 +130,26 @@ def process_and_analyze(file):
137
  logger.error(f"Error in processing: {e}")
138
  return f"Error: {str(e)}"
139
 
140
- # Interface styling
141
- css = """
142
- .gradio-container { font-family: 'Arial', sans-serif !important; max-width: 1200px !important; }
143
- .gr-button { background-color: #2e5090 !important; color: white !important; }
144
- .gr-button:hover { opacity: 0.9 !important; }
145
- .gr-form { background-color: #f8f9fa !important; border-radius: 10px !important; }
146
- """
147
-
148
- # Create interface
149
- with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
150
- gr.Markdown("# πŸ“„ Smart Document Analyzer")
151
-
152
- with gr.Row():
153
- with gr.Column():
154
- file_input = gr.File(
155
- label="Upload Document (PDF, PNG, JPG)",
156
- file_types=[".pdf", ".png", ".jpg", ".jpeg"]
157
- )
158
- process_btn = gr.Button("πŸ“ Analyze Document", variant="primary")
159
-
160
- with gr.Column():
161
- output = gr.Textbox(
162
- label="Analysis Results",
163
- lines=15,
164
- show_copy_button=True
165
- )
166
-
167
- gr.Markdown("### πŸ“‹ Instructions\n" +
168
- "1. Upload a PDF or image file\n" +
169
- "2. Click 'Analyze Document'\n" +
170
- "3. Wait for the analysis results\n")
171
-
172
- process_btn.click(
173
- fn=process_and_analyze,
174
- inputs=file_input,
175
- outputs=output,
176
- api_name="analyze"
177
- )
178
 
179
  if __name__ == "__main__":
180
  demo.launch()
 
2
  import base64
3
  import gradio as gr
4
  import logging
 
5
  from huggingface_hub import InferenceClient
6
  from PIL import Image
7
  import io
8
 
9
+ # Configure logging
10
+ logging.basicConfig(level=logging.INFO)
11
+ logger = logging.getLogger(__name__)
12
+
13
  # Optional imports with error handling
14
  try:
15
  from pdf2image import convert_from_path
 
19
  HAS_OCR = False
20
  print("OCR features will be disabled. Install pdf2image and easyocr for full functionality.")
21
 
22
+ # Initialize Hugging Face client with default token for Spaces
23
+ client = InferenceClient(
24
+ model="google/gemma-7b-it",
25
+ token=None # Will use HF_TOKEN from secrets in Spaces
26
  )
 
 
 
 
 
 
 
 
27
 
28
  def initialize_ocr():
29
  """Initialize OCR with error handling"""
 
37
 
38
  reader = initialize_ocr()
39
 
 
 
 
 
 
40
  def make_api_call(text_content, image_path=None, retries=3):
41
  """Enhanced API call handler with retries and image support"""
42
+ try:
43
+ # Prepare message for chat completion
44
+ messages = [{
45
  "role": "user",
46
+ "content": [{
47
+ "type": "text",
48
+ "text": f"""Analyze this document and provide:
49
+ 1. Corrected text
50
+ 2. Brief summary
51
+ 3. Key points
52
+ 4. Detected entities (dates, names, numbers)
 
53
 
54
+ Content: {text_content}"""
55
+ }]
56
+ }]
57
+
58
+ # Add image if available
59
+ if image_path and os.path.exists(image_path):
60
+ with open(image_path, "rb") as img_file:
61
+ base64_image = base64.b64encode(img_file.read()).decode('utf-8')
62
+ messages[0]["content"].append({
63
+ "type": "image_url",
64
+ "image_url": {
65
+ "url": f"data:image/jpeg;base64,{base64_image}"
66
+ }
67
+ })
68
+
69
+ # Make API call with retries
70
+ for attempt in range(retries):
71
+ try:
72
+ completion = client.chat.completions.create(
73
+ model="google/gemma-7b-it",
74
+ messages=messages,
75
+ max_tokens=1000,
76
+ temperature=0.7
77
+ )
78
+ return completion.choices[0].message.content
79
+ except Exception as e:
80
+ if attempt == retries - 1:
81
+ raise e
82
+ logger.warning(f"Attempt {attempt + 1} failed, retrying...")
83
+ continue
84
+
85
+ except Exception as e:
86
+ logger.error(f"API call failed: {e}")
87
+ return f"Error processing request: {str(e)}"
88
 
89
  def process_document(file_path):
90
+ """Process document with OCR"""
91
  if not HAS_OCR or not reader:
92
  return "OCR functionality is not available. Please install required packages."
93
 
 
99
  images = convert_from_path(file_path)
100
  extracted_text = ""
101
  for i, image in enumerate(images):
 
102
  temp_path = f"temp_page_{i}.jpg"
103
  image.save(temp_path)
104
  ocr_results = reader.readtext(temp_path, detail=0)
 
130
  logger.error(f"Error in processing: {e}")
131
  return f"Error: {str(e)}"
132
 
133
+ # Create Gradio interface
134
+ demo = gr.Interface(
135
+ fn=process_and_analyze,
136
+ inputs=gr.File(
137
+ label="Upload Document (PDF, PNG, JPG)",
138
+ file_types=[".pdf", ".png", ".jpg", ".jpeg"]
139
+ ),
140
+ outputs=gr.Textbox(
141
+ label="Analysis Results",
142
+ lines=15,
143
+ show_copy_button=True
144
+ ),
145
+ title="πŸ“„ Smart Document Analyzer",
146
+ description="Upload a document to analyze its content using AI.",
147
+ theme=gr.themes.Soft(),
148
+ css="""
149
+ .gradio-container { max-width: 1200px !important; }
150
+ .gr-button { background-color: #2e5090 !important; }
151
+ """
152
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
 
154
  if __name__ == "__main__":
155
  demo.launch()