nexusbert commited on
Commit
30507a7
·
1 Parent(s): bc13d1b
Files changed (1) hide show
  1. app.py +21 -7
app.py CHANGED
@@ -3,12 +3,12 @@ import json
3
  import os
4
  import tempfile
5
  import io
 
6
  from pathlib import Path
7
  from typing import Optional, Tuple
8
  from fastapi import FastAPI, UploadFile, File, HTTPException
9
  from fastapi.responses import JSONResponse
10
  from transformers import AutoTokenizer, AutoModelForCausalLM
11
- from pdfminer.high_level import extract_text as extract_pdf_text
12
  from docx import Document as DocxDocument
13
  from pptx import Presentation
14
  import logging
@@ -17,9 +17,14 @@ import pytesseract
17
  from pdf2image import convert_from_path
18
  import easyocr
19
 
 
 
 
20
  logging.basicConfig(level=logging.INFO)
21
  logger = logging.getLogger(__name__)
22
 
 
 
23
  app = FastAPI(
24
  title="Deckgpt",
25
  description="Upload your startup pitch deck (PDF, PPT, DOCX) and get an investor-style review",
@@ -49,13 +54,22 @@ async def load_model():
49
  try:
50
  ocr_model_dir = os.path.join(tempfile.gettempdir(), '.EasyOCR')
51
  os.makedirs(ocr_model_dir, exist_ok=True)
 
52
 
53
- ocr_reader = easyocr.Reader(
54
- ['en'],
55
- gpu=torch.cuda.is_available(),
56
- model_storage_directory=ocr_model_dir
57
- )
58
- logger.info("✅ OCR reader loaded successfully!")
 
 
 
 
 
 
 
 
59
  except Exception as e:
60
  logger.warning(f"⚠️ EasyOCR failed to load, will use pytesseract fallback: {e}")
61
  ocr_reader = None
 
3
  import os
4
  import tempfile
5
  import io
6
+ import warnings
7
  from pathlib import Path
8
  from typing import Optional, Tuple
9
  from fastapi import FastAPI, UploadFile, File, HTTPException
10
  from fastapi.responses import JSONResponse
11
  from transformers import AutoTokenizer, AutoModelForCausalLM
 
12
  from docx import Document as DocxDocument
13
  from pptx import Presentation
14
  import logging
 
17
  from pdf2image import convert_from_path
18
  import easyocr
19
 
20
+ warnings.filterwarnings("ignore", category=UserWarning, module="pdfminer")
21
+ warnings.filterwarnings("ignore", category=FutureWarning, module="transformers")
22
+
23
  logging.basicConfig(level=logging.INFO)
24
  logger = logging.getLogger(__name__)
25
 
26
+ from pdfminer.high_level import extract_text as extract_pdf_text
27
+
28
  app = FastAPI(
29
  title="Deckgpt",
30
  description="Upload your startup pitch deck (PDF, PPT, DOCX) and get an investor-style review",
 
54
  try:
55
  ocr_model_dir = os.path.join(tempfile.gettempdir(), '.EasyOCR')
56
  os.makedirs(ocr_model_dir, exist_ok=True)
57
+ os.chmod(ocr_model_dir, 0o777)
58
 
59
+ old_home = os.environ.get('HOME')
60
+ try:
61
+ os.environ['HOME'] = tempfile.gettempdir()
62
+ ocr_reader = easyocr.Reader(
63
+ ['en'],
64
+ gpu=torch.cuda.is_available(),
65
+ model_storage_directory=ocr_model_dir
66
+ )
67
+ logger.info("✅ OCR reader loaded successfully!")
68
+ finally:
69
+ if old_home:
70
+ os.environ['HOME'] = old_home
71
+ elif 'HOME' in os.environ:
72
+ del os.environ['HOME']
73
  except Exception as e:
74
  logger.warning(f"⚠️ EasyOCR failed to load, will use pytesseract fallback: {e}")
75
  ocr_reader = None