Josebert commited on
Commit
c531117
ยท
verified ยท
1 Parent(s): 43dd50a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +100 -111
app.py CHANGED
@@ -1,10 +1,15 @@
1
  import os
 
2
  import gradio as gr
3
- import requests
4
- import json
5
  import logging
6
  from datetime import datetime
7
- import random
 
 
 
 
 
 
8
 
9
  # Optional imports with error handling
10
  try:
@@ -23,13 +28,11 @@ logging.basicConfig(
23
  logger = logging.getLogger(__name__)
24
 
25
  # API configuration
26
- api_token = os.getenv("API_TOKEN")
27
  if not api_token:
28
- raise ValueError("API token not found. Make sure 'API_TOKEN' is set in the Secrets.")
29
 
30
- API_URL = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.3"
31
- HEADERS = {"Authorization": f"Bearer {api_token}"}
32
- TIMEOUT = 30 # seconds
33
 
34
  def initialize_ocr():
35
  """Initialize OCR with error handling"""
@@ -41,34 +44,56 @@ def initialize_ocr():
41
  logger.error(f"Failed to initialize OCR: {e}")
42
  return None
43
 
44
- # Initialize OCR reader
45
  reader = initialize_ocr()
46
 
47
- def make_api_call(prompt, params, retries=3):
48
- """Enhanced API call handler with retries"""
49
- payload = {
50
- "inputs": f"{prompt} [ts:{params['timestamp']}]",
51
- "parameters": {
52
- "temperature": params["temperature"],
53
- "top_p": params["top_p"],
54
- "max_tokens": 1000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  }
56
- }
 
 
 
 
 
 
 
 
 
57
 
58
  for attempt in range(retries):
59
  try:
60
- response = requests.post(
61
- API_URL,
62
- headers=HEADERS,
63
- json=payload,
64
- timeout=TIMEOUT
 
65
  )
66
- response.raise_for_status()
67
- return response.json()
68
- except requests.exceptions.RequestException as e:
69
  logger.error(f"API Error (attempt {attempt + 1}/{retries}): {e}")
70
  if attempt == retries - 1:
71
- return None
72
  continue
73
 
74
  def process_document(file_path):
@@ -85,8 +110,11 @@ def process_document(file_path):
85
  extracted_text = ""
86
  for i, image in enumerate(images):
87
  logger.info(f"Processing page {i+1}/{len(images)}")
88
- ocr_results = reader.readtext(image, detail=0)
 
 
89
  extracted_text += " ".join(ocr_results) + "\n"
 
90
  else:
91
  ocr_results = reader.readtext(file_path, detail=0)
92
  extracted_text = " ".join(ocr_results)
@@ -96,100 +124,61 @@ def process_document(file_path):
96
  logger.error(f"Error processing document: {e}")
97
  return f"Error processing document: {str(e)}"
98
 
99
- def generate_response(file_path):
100
- """Generate response with better error handling"""
 
 
 
101
  try:
102
- extracted_text = process_document(file_path)
103
- if not extracted_text or extracted_text.startswith("Error"):
104
  return extracted_text
 
 
 
105
 
106
- params = {
107
- "temperature": random.uniform(0.7, 0.9),
108
- "top_p": random.uniform(0.85, 0.95),
109
- "timestamp": datetime.now().strftime("%H%M%S")
110
- }
111
-
112
- prompt = f"""Process and enhance this text:
113
- {extracted_text}
114
-
115
- Provide:
116
- 1. Corrected text
117
- 2. Summary
118
- 3. Key points
119
- """
120
-
121
- result = make_api_call(prompt, params)
122
- if result and isinstance(result, list):
123
- return result[0].get("generated_text", "No valid response from model.")
124
- return "Error: Failed to process the document."
125
  except Exception as e:
126
- logger.error(f"Error generating response: {e}")
127
  return f"Error: {str(e)}"
128
 
129
- # Interface styling with improved CSS
130
  css = """
131
- .gradio-container {
132
- font-family: 'Arial', sans-serif !important;
133
- max-width: 1200px !important;
134
- margin: auto !important;
135
- }
136
- .gr-button {
137
- background-color: #2e5090 !important;
138
- color: white !important;
139
- transition: all 0.3s ease !important;
140
- }
141
- .gr-button:hover {
142
- opacity: 0.9 !important;
143
- }
144
- .gr-input {
145
- border: 2px solid #ddd !important;
146
- border-radius: 8px !important;
147
- padding: 8px !important;
148
- }
149
- .gr-form {
150
- background-color: #f8f9fa !important;
151
- padding: 20px !important;
152
- border-radius: 10px !important;
153
- }
154
  """
155
 
156
- # Create interface with better organization
157
- with gr.Blocks(css=css, theme=gr.themes.Default()) as demo:
158
- gr.Markdown("# Document Processing with Mistral")
159
 
160
- with gr.Tabs():
161
- with gr.Tab("Document Processing"):
162
- with gr.Column():
163
- file_input = gr.File(
164
- label="Upload PDF or Image",
165
- file_types=[".pdf", ".png", ".jpg", ".jpeg"]
166
- )
167
- output_text = gr.Textbox(
168
- label="Processed Text",
169
- lines=15,
170
- show_copy_button=True
171
- )
172
- submit_btn = gr.Button(
173
- "Process Document",
174
- variant="primary"
175
- )
176
 
177
- submit_btn.click(
178
- fn=generate_response,
179
- inputs=file_input,
180
- outputs=output_text,
181
- api_name="process_document"
182
  )
 
 
 
 
 
 
 
 
 
 
 
 
183
 
184
  if __name__ == "__main__":
185
- # Check dependencies
186
- if not HAS_OCR:
187
- print("Warning: OCR features are disabled. Install required packages for full functionality.")
188
-
189
- # Launch with optimized settings
190
- demo.launch(
191
- share=True,
192
- server_name="0.0.0.0",
193
- server_port=7860,
194
- show_error=True
195
- )
 
1
  import os
2
+ import base64
3
  import gradio as gr
 
 
4
  import logging
5
  from datetime import datetime
6
+ from huggingface_hub import InferenceClient
7
+ from PIL import Image
8
+ import io
9
+ from dotenv import load_dotenv
10
+
11
+ # Load environment variables
12
+ load_dotenv()
13
 
14
  # Optional imports with error handling
15
  try:
 
28
  logger = logging.getLogger(__name__)
29
 
30
  # API configuration
31
+ api_token = os.getenv("HUGGINGFACE_TOKEN")
32
  if not api_token:
33
+ raise ValueError("API token not found. Set HUGGINGFACE_TOKEN in .env file or environment variables.")
34
 
35
+ client = InferenceClient(token=api_token)
 
 
36
 
37
  def initialize_ocr():
38
  """Initialize OCR with error handling"""
 
44
  logger.error(f"Failed to initialize OCR: {e}")
45
  return None
46
 
 
47
  reader = initialize_ocr()
48
 
49
+ def encode_image_to_base64(image_path):
50
+ """Convert image to base64 string"""
51
+ with open(image_path, "rb") as image_file:
52
+ return base64.b64encode(image_file.read()).decode('utf-8')
53
+
54
+ def make_api_call(text_content, image_path=None, retries=3):
55
+ """Enhanced API call handler with retries and image support"""
56
+ messages = [
57
+ {
58
+ "role": "user",
59
+ "content": [
60
+ {
61
+ "type": "text",
62
+ "text": f"""Analyze this document content and provide:
63
+ 1. Corrected text with proper formatting
64
+ 2. Brief summary
65
+ 3. Key points or important information
66
+ 4. Any detected entities (dates, names, numbers)
67
+
68
+ Content: {text_content}"""
69
+ }
70
+ ]
71
  }
72
+ ]
73
+
74
+ if image_path and os.path.exists(image_path):
75
+ base64_image = encode_image_to_base64(image_path)
76
+ messages[0]["content"].append({
77
+ "type": "image_url",
78
+ "image_url": {
79
+ "url": f"data:image/jpeg;base64,{base64_image}"
80
+ }
81
+ })
82
 
83
  for attempt in range(retries):
84
  try:
85
+ response = client.text_generation(
86
+ model="google/gemma-7b-it",
87
+ prompt=str(messages),
88
+ max_new_tokens=1000,
89
+ temperature=0.7,
90
+ top_p=0.95,
91
  )
92
+ return response
93
+ except Exception as e:
 
94
  logger.error(f"API Error (attempt {attempt + 1}/{retries}): {e}")
95
  if attempt == retries - 1:
96
+ return f"Error processing request: {str(e)}"
97
  continue
98
 
99
  def process_document(file_path):
 
110
  extracted_text = ""
111
  for i, image in enumerate(images):
112
  logger.info(f"Processing page {i+1}/{len(images)}")
113
+ temp_path = f"temp_page_{i}.jpg"
114
+ image.save(temp_path)
115
+ ocr_results = reader.readtext(temp_path, detail=0)
116
  extracted_text += " ".join(ocr_results) + "\n"
117
+ os.remove(temp_path)
118
  else:
119
  ocr_results = reader.readtext(file_path, detail=0)
120
  extracted_text = " ".join(ocr_results)
 
124
  logger.error(f"Error processing document: {e}")
125
  return f"Error processing document: {str(e)}"
126
 
127
+ def process_and_analyze(file):
128
+ """Main processing function"""
129
+ if not file:
130
+ return "Please upload a file."
131
+
132
  try:
133
+ extracted_text = process_document(file.name)
134
+ if extracted_text.startswith("Error") or extracted_text.startswith("OCR functionality"):
135
  return extracted_text
136
+
137
+ result = make_api_call(extracted_text, file.name)
138
+ return result if result else "Failed to analyze the document."
139
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
140
  except Exception as e:
141
+ logger.error(f"Error in processing: {e}")
142
  return f"Error: {str(e)}"
143
 
144
+ # Interface styling
145
  css = """
146
+ .gradio-container { font-family: 'Arial', sans-serif !important; max-width: 1200px !important; }
147
+ .gr-button { background-color: #2e5090 !important; color: white !important; }
148
+ .gr-button:hover { opacity: 0.9 !important; }
149
+ .gr-form { background-color: #f8f9fa !important; border-radius: 10px !important; }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
  """
151
 
152
+ # Create interface
153
+ with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
154
+ gr.Markdown("# ๐Ÿ“„ Smart Document Analyzer")
155
 
156
+ with gr.Row():
157
+ with gr.Column():
158
+ file_input = gr.File(
159
+ label="Upload Document (PDF, PNG, JPG)",
160
+ file_types=[".pdf", ".png", ".jpg", ".jpeg"]
161
+ )
162
+ process_btn = gr.Button("๐Ÿ“ Analyze Document", variant="primary")
 
 
 
 
 
 
 
 
 
163
 
164
+ with gr.Column():
165
+ output = gr.Textbox(
166
+ label="Analysis Results",
167
+ lines=15,
168
+ show_copy_button=True
169
  )
170
+
171
+ gr.Markdown("### ๐Ÿ“‹ Instructions\n" +
172
+ "1. Upload a PDF or image file\n" +
173
+ "2. Click 'Analyze Document'\n" +
174
+ "3. Wait for the analysis results\n")
175
+
176
+ process_btn.click(
177
+ fn=process_and_analyze,
178
+ inputs=file_input,
179
+ outputs=output,
180
+ api_name="analyze"
181
+ )
182
 
183
  if __name__ == "__main__":
184
+ demo.launch()