SebastianAndreu commited on
Commit
730ef27
·
verified ·
1 Parent(s): c7b31da

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -10
app.py CHANGED
@@ -768,21 +768,40 @@ def extract_text_from_receipt(file_path):
768
  return None, []
769
 
770
  try:
771
- # Set tesseract path for Hugging Face Spaces
772
- pytesseract.pytesseract.tesseract_cmd = '/usr/bin/tesseract'
 
 
 
 
 
 
 
 
 
773
 
774
  text = ""
775
 
776
  if file_path.name.lower().endswith('.pdf'):
777
- # Set poppler path for Hugging Face Spaces
778
- try:
779
- images = convert_from_path(file_path.name, poppler_path='/usr/bin')
780
- except Exception as e:
781
- # If poppler fails, try without path specification
 
782
  try:
783
- images = convert_from_path(file_path.name)
784
- except Exception as e2:
785
- return [[True, f"PDF Error: {str(e2)}", "", ""]], []
 
 
 
 
 
 
 
 
 
786
 
787
  for img in images:
788
  text += pytesseract.image_to_string(img) + "\n"
 
768
  return None, []
769
 
770
  try:
771
+ # Try to find tesseract automatically
772
+ import shutil
773
+ tesseract_path = shutil.which('tesseract')
774
+ if tesseract_path:
775
+ pytesseract.pytesseract.tesseract_cmd = tesseract_path
776
+ else:
777
+ # Try common paths
778
+ for path in ['/usr/bin/tesseract', '/usr/local/bin/tesseract', '/bin/tesseract']:
779
+ if os.path.exists(path):
780
+ pytesseract.pytesseract.tesseract_cmd = path
781
+ break
782
 
783
  text = ""
784
 
785
  if file_path.name.lower().endswith('.pdf'):
786
+ # Try multiple possible poppler paths for Hugging Face
787
+ poppler_paths = [None, '/usr/bin', '/usr/local/bin', '/bin']
788
+ images = None
789
+ last_error = None
790
+
791
+ for poppler_path in poppler_paths:
792
  try:
793
+ if poppler_path:
794
+ images = convert_from_path(file_path.name, poppler_path=poppler_path)
795
+ else:
796
+ images = convert_from_path(file_path.name)
797
+ break # Success! Exit loop
798
+ except Exception as e:
799
+ last_error = str(e)
800
+ continue
801
+
802
+ if images is None:
803
+ # If all paths failed, return helpful error
804
+ return [[True, f"PDF processing unavailable. Please upload images (PNG/JPG) instead. Error: {last_error}", "", ""]], []
805
 
806
  for img in images:
807
  text += pytesseract.image_to_string(img) + "\n"