Josebert commited on
Commit
8e3b552
·
verified ·
1 Parent(s): 924ef97

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +114 -47
app.py CHANGED
@@ -5,11 +5,21 @@ import json
5
  import logging
6
  from datetime import datetime
7
  import random
8
- from pdf2image import convert_from_path
9
- import easyocr
 
 
 
 
 
 
 
10
 
11
  # Configure logging
12
- logging.basicConfig(level=logging.INFO)
 
 
 
13
  logger = logging.getLogger(__name__)
14
 
15
  # API configuration
@@ -19,79 +29,104 @@ if not api_token:
19
 
20
  API_URL = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.3"
21
  HEADERS = {"Authorization": f"Bearer {api_token}"}
 
22
 
23
- # Initialize OCR reader
24
- reader = easyocr.Reader(['en'])
25
-
26
- # Define a system prompt (example)
27
- SYSTEM_PROMPT = "You are a helpful assistant that corrects and enhances text."
 
 
 
 
28
 
29
- def get_unique_parameters():
30
- """Generate unique parameters for each request"""
31
- return {
32
- "temperature": random.uniform(0.7, 0.9),
33
- "top_p": random.uniform(0.85, 0.95),
34
- "timestamp": datetime.now().strftime("%H%M%S"),
35
- }
36
 
37
- def make_api_call(prompt, params):
38
- """Unified API call handler"""
39
  payload = {
40
  "inputs": f"{prompt} [ts:{params['timestamp']}]",
41
  "parameters": {
42
  "temperature": params["temperature"],
43
- "top_p": params["top_p"]
 
44
  }
45
  }
46
 
47
- try:
48
- response = requests.post(API_URL, headers=HEADERS, json=payload)
49
- response.raise_for_status()
50
- return response.json()
51
- except Exception as e:
52
- logger.error(f"API Error: {e}")
53
- return None
 
 
 
 
 
 
 
 
54
 
55
  def process_document(file_path):
 
 
 
 
 
 
 
56
  try:
57
- # Handle PDF files
58
  if file_path.lower().endswith('.pdf'):
59
  images = convert_from_path(file_path)
60
  extracted_text = ""
61
- for image in images:
 
62
  ocr_results = reader.readtext(image, detail=0)
63
  extracted_text += " ".join(ocr_results) + "\n"
64
- # Handle image files
65
  else:
66
  ocr_results = reader.readtext(file_path, detail=0)
67
  extracted_text = " ".join(ocr_results)
68
 
69
- return extracted_text
70
  except Exception as e:
71
  logger.error(f"Error processing document: {e}")
72
- return ""
73
 
74
  def generate_response(file_path):
 
75
  try:
76
- # Extract text from document
77
  extracted_text = process_document(file_path)
 
 
 
 
 
 
 
 
78
 
79
- if not extracted_text.strip():
80
- return "No text extracted from the document."
81
 
82
- params = get_unique_parameters()
83
- prompt = f"Process this extracted text, correct any errors and enhance it:\n{extracted_text}"
 
 
 
84
 
85
  result = make_api_call(prompt, params)
86
- if result:
87
- return result[0].get("generated_text", "No response from the model.")
88
- else:
89
- return "Error processing the document."
90
  except Exception as e:
91
  logger.error(f"Error generating response: {e}")
92
- return "Error processing the document."
93
 
94
- # Interface styling
95
  css = """
96
  .gradio-container {
97
  font-family: 'Arial', sans-serif !important;
@@ -101,28 +136,60 @@ css = """
101
  .gr-button {
102
  background-color: #2e5090 !important;
103
  color: white !important;
 
 
 
 
104
  }
105
  .gr-input {
106
  border: 2px solid #ddd !important;
107
  border-radius: 8px !important;
 
 
 
 
 
 
108
  }
109
  """
110
 
111
- # Create interface
112
  with gr.Blocks(css=css, theme=gr.themes.Default()) as demo:
113
  gr.Markdown("# Document Processing with Mistral")
114
 
115
  with gr.Tabs():
116
  with gr.Tab("Document Processing"):
117
- file_input = gr.File(label="Upload PDF or Image")
118
- output_text = gr.Textbox(label="Processed Text", lines=10)
119
- submit_btn = gr.Button("Process Document")
 
 
 
 
 
 
 
 
 
 
 
120
 
121
  submit_btn.click(
122
  fn=generate_response,
123
  inputs=file_input,
124
- outputs=output_text
 
125
  )
126
 
127
  if __name__ == "__main__":
128
- demo.launch(share=True)
 
 
 
 
 
 
 
 
 
 
 
5
  import logging
6
  from datetime import datetime
7
  import random
8
+
9
+ # Optional imports with error handling
10
+ try:
11
+ from pdf2image import convert_from_path
12
+ import easyocr
13
+ HAS_OCR = True
14
+ except ImportError:
15
+ HAS_OCR = False
16
+ print("OCR features will be disabled. Install pdf2image and easyocr for full functionality.")
17
 
18
  # Configure logging
19
+ logging.basicConfig(
20
+ level=logging.INFO,
21
+ format='%(asctime)s - %(levelname)s - %(message)s'
22
+ )
23
  logger = logging.getLogger(__name__)
24
 
25
  # API configuration
 
29
 
30
  API_URL = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.3"
31
  HEADERS = {"Authorization": f"Bearer {api_token}"}
32
+ TIMEOUT = 30 # seconds
33
 
34
+ def initialize_ocr():
35
+ """Initialize OCR with error handling"""
36
+ if not HAS_OCR:
37
+ return None
38
+ try:
39
+ return easyocr.Reader(['en'])
40
+ except Exception as e:
41
+ logger.error(f"Failed to initialize OCR: {e}")
42
+ return None
43
 
44
+ # Initialize OCR reader
45
+ reader = initialize_ocr()
 
 
 
 
 
46
 
47
+ def make_api_call(prompt, params, retries=3):
48
+ """Enhanced API call handler with retries"""
49
  payload = {
50
  "inputs": f"{prompt} [ts:{params['timestamp']}]",
51
  "parameters": {
52
  "temperature": params["temperature"],
53
+ "top_p": params["top_p"],
54
+ "max_tokens": 1000
55
  }
56
  }
57
 
58
+ for attempt in range(retries):
59
+ try:
60
+ response = requests.post(
61
+ API_URL,
62
+ headers=HEADERS,
63
+ json=payload,
64
+ timeout=TIMEOUT
65
+ )
66
+ response.raise_for_status()
67
+ return response.json()
68
+ except requests.exceptions.RequestException as e:
69
+ logger.error(f"API Error (attempt {attempt + 1}/{retries}): {e}")
70
+ if attempt == retries - 1:
71
+ return None
72
+ continue
73
 
74
  def process_document(file_path):
75
+ """Process document with improved error handling"""
76
+ if not HAS_OCR or not reader:
77
+ return "OCR functionality is not available. Please install required packages."
78
+
79
+ if not os.path.exists(file_path):
80
+ return "File not found."
81
+
82
  try:
 
83
  if file_path.lower().endswith('.pdf'):
84
  images = convert_from_path(file_path)
85
  extracted_text = ""
86
+ for i, image in enumerate(images):
87
+ logger.info(f"Processing page {i+1}/{len(images)}")
88
  ocr_results = reader.readtext(image, detail=0)
89
  extracted_text += " ".join(ocr_results) + "\n"
 
90
  else:
91
  ocr_results = reader.readtext(file_path, detail=0)
92
  extracted_text = " ".join(ocr_results)
93
 
94
+ return extracted_text.strip() or "No text extracted from the document."
95
  except Exception as e:
96
  logger.error(f"Error processing document: {e}")
97
+ return f"Error processing document: {str(e)}"
98
 
99
  def generate_response(file_path):
100
+ """Generate response with better error handling"""
101
  try:
 
102
  extracted_text = process_document(file_path)
103
+ if not extracted_text or extracted_text.startswith("Error"):
104
+ return extracted_text
105
+
106
+ params = {
107
+ "temperature": random.uniform(0.7, 0.9),
108
+ "top_p": random.uniform(0.85, 0.95),
109
+ "timestamp": datetime.now().strftime("%H%M%S")
110
+ }
111
 
112
+ prompt = f"""Process and enhance this text:
113
+ {extracted_text}
114
 
115
+ Provide:
116
+ 1. Corrected text
117
+ 2. Summary
118
+ 3. Key points
119
+ """
120
 
121
  result = make_api_call(prompt, params)
122
+ if result and isinstance(result, list):
123
+ return result[0].get("generated_text", "No valid response from model.")
124
+ return "Error: Failed to process the document."
 
125
  except Exception as e:
126
  logger.error(f"Error generating response: {e}")
127
+ return f"Error: {str(e)}"
128
 
129
+ # Interface styling with improved CSS
130
  css = """
131
  .gradio-container {
132
  font-family: 'Arial', sans-serif !important;
 
136
  .gr-button {
137
  background-color: #2e5090 !important;
138
  color: white !important;
139
+ transition: all 0.3s ease !important;
140
+ }
141
+ .gr-button:hover {
142
+ opacity: 0.9 !important;
143
  }
144
  .gr-input {
145
  border: 2px solid #ddd !important;
146
  border-radius: 8px !important;
147
+ padding: 8px !important;
148
+ }
149
+ .gr-form {
150
+ background-color: #f8f9fa !important;
151
+ padding: 20px !important;
152
+ border-radius: 10px !important;
153
  }
154
  """
155
 
156
+ # Create interface with better organization
157
  with gr.Blocks(css=css, theme=gr.themes.Default()) as demo:
158
  gr.Markdown("# Document Processing with Mistral")
159
 
160
  with gr.Tabs():
161
  with gr.Tab("Document Processing"):
162
+ with gr.Column():
163
+ file_input = gr.File(
164
+ label="Upload PDF or Image",
165
+ file_types=[".pdf", ".png", ".jpg", ".jpeg"]
166
+ )
167
+ output_text = gr.Textbox(
168
+ label="Processed Text",
169
+ lines=15,
170
+ show_copy_button=True
171
+ )
172
+ submit_btn = gr.Button(
173
+ "Process Document",
174
+ variant="primary"
175
+ )
176
 
177
  submit_btn.click(
178
  fn=generate_response,
179
  inputs=file_input,
180
+ outputs=output_text,
181
+ api_name="process_document"
182
  )
183
 
184
  if __name__ == "__main__":
185
+ # Check dependencies
186
+ if not HAS_OCR:
187
+ print("Warning: OCR features are disabled. Install required packages for full functionality.")
188
+
189
+ # Launch with optimized settings
190
+ demo.launch(
191
+ share=True,
192
+ server_name="0.0.0.0",
193
+ server_port=7860,
194
+ show_error=True
195
+ )