parthnuwal7 commited on
Commit
5b0f561
Β·
1 Parent(s): 99cc145

Adding git and git-lfs to the HF spaces

Browse files
Files changed (3) hide show
  1. Dockerfile +6 -1
  2. PYABSA_FIX.md +109 -0
  3. src/utils/data_processor.py +36 -5
Dockerfile CHANGED
@@ -3,10 +3,12 @@ FROM python:3.10-slim
3
  # Set working directory
4
  WORKDIR /app
5
 
6
- # Install system dependencies
7
  RUN apt-get update && apt-get install -y \
8
  build-essential \
9
  curl \
 
 
10
  && rm -rf /var/lib/apt/lists/*
11
 
12
  # Copy all files first
@@ -15,6 +17,9 @@ COPY . .
15
  # Install Python dependencies
16
  RUN pip install --no-cache-dir -r requirements.txt
17
 
 
 
 
18
  # Expose port
19
  EXPOSE 7860
20
 
 
3
  # Set working directory
4
  WORKDIR /app
5
 
6
+ # Install system dependencies including git for PyABSA
7
  RUN apt-get update && apt-get install -y \
8
  build-essential \
9
  curl \
10
+ git \
11
+ git-lfs \
12
  && rm -rf /var/lib/apt/lists/*
13
 
14
  # Copy all files first
 
17
  # Install Python dependencies
18
  RUN pip install --no-cache-dir -r requirements.txt
19
 
20
+ # Pre-download PyABSA checkpoint during build to avoid runtime git issues
21
+ RUN python -c "import pyabsa; from pyabsa import ATEPCCheckpointManager; ATEPCCheckpointManager.get_checkpoint('multilingual')" || echo "PyABSA checkpoint download failed, will retry at runtime"
22
+
23
  # Expose port
24
  EXPOSE 7860
25
 
PYABSA_FIX.md ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # PyABSA Loading Fix - Deployment Checklist
2
+
3
+ ## Changes Made
4
+
5
+ ### 1. Dockerfile Updates βœ…
6
+ - Added `git` and `git-lfs` to system dependencies
7
+ - Added PyABSA checkpoint pre-download during Docker build
8
+ - This ensures the model is ready before the app starts
9
+
10
+ ### 2. Data Processor Improvements βœ…
11
+ - Added git availability check with detailed logging
12
+ - Better error handling with full traceback
13
+ - Attempts local checkpoint first, then downloads if needed
14
+ - Clear status messages (βœ…, ❌, ⚠️) for easy debugging
15
+
16
+ ### 3. Environment Configuration βœ…
17
+ - Set `GIT_PYTHON_REFRESH=quiet` to suppress git warnings
18
+ - Suppressed deprecation warnings from PyABSA
19
+
20
+ ## Deployment Steps
21
+
22
+ ### Option A: Redeploy to HuggingFace Spaces
23
+
24
+ 1. **Commit changes:**
25
+ ```bash
26
+ cd c:\Users\Lenovo\insights\ABSA
27
+ git add Dockerfile src/utils/data_processor.py
28
+ git commit -m "Fix PyABSA loading with git installation"
29
+ git push
30
+ ```
31
+
32
+ 2. **HF Spaces will automatically rebuild** with the new Dockerfile
33
+ 3. **Check logs** to verify PyABSA loads:
34
+ - Look for: `βœ… PyABSA model loaded successfully`
35
+ - Should NOT see: `⚠️ Using fallback method`
36
+
37
+ ### Option B: Test Locally First
38
+
39
+ 1. **Rebuild Docker image:**
40
+ ```bash
41
+ cd c:\Users\Lenovo\insights\ABSA
42
+ docker build -t absa-backend .
43
+ ```
44
+
45
+ 2. **Run container:**
46
+ ```bash
47
+ docker run -p 7860:7860 --env-file .env absa-backend
48
+ ```
49
+
50
+ 3. **Check logs for:**
51
+ ```
52
+ Git available: git version X.X.X
53
+ βœ… PyABSA model loaded successfully
54
+ ```
55
+
56
+ ## Expected Behavior
57
+
58
+ ### Before Fix ❌
59
+ ```
60
+ WARNING:utils.data_processor:Failed to load PyABSA model: Bad git executable
61
+ ⚠️ Using fallback method
62
+ ```
63
+
64
+ ### After Fix βœ…
65
+ ```
66
+ INFO:utils.data_processor:Git available: git version 2.X.X
67
+ INFO:utils.data_processor:This may take a few minutes on first run...
68
+ βœ… PyABSA model loaded successfully from downloaded checkpoint
69
+ ```
70
+
71
+ ## Troubleshooting
72
+
73
+ ### If PyABSA still fails to load:
74
+
75
+ 1. **Check Docker build logs:**
76
+ - Verify git was installed successfully
77
+ - Check if PyABSA checkpoint download during build succeeded
78
+
79
+ 2. **Check runtime logs:**
80
+ - Look for full traceback after "❌ Failed to load PyABSA model"
81
+ - Verify git command is available
82
+
83
+ 3. **Manual checkpoint download** (if needed):
84
+ ```python
85
+ # Run this once to cache the checkpoint
86
+ import pyabsa
87
+ from pyabsa import ATEPCCheckpointManager
88
+ checkpoint = ATEPCCheckpointManager.get_checkpoint('multilingual')
89
+ print(f"Checkpoint saved to: {checkpoint}")
90
+ ```
91
+
92
+ ## What This Fixes
93
+
94
+ - βœ… PyABSA can now download checkpoints (git available)
95
+ - βœ… Better error messages for debugging
96
+ - βœ… Checkpoint pre-downloaded during build (faster startup)
97
+ - βœ… Fallback still works if PyABSA fails
98
+ - βœ… No more cryptic git errors in production
99
+
100
+ ## Next Steps
101
+
102
+ After successful deployment, verify in HF Spaces logs:
103
+ 1. Git is detected
104
+ 2. PyABSA loads without errors
105
+ 3. Reviews are processed with actual aspect extraction (not fallback)
106
+
107
+ ---
108
+
109
+ **Deploy now:** Push changes to trigger HF Spaces rebuild! πŸš€
src/utils/data_processor.py CHANGED
@@ -18,6 +18,9 @@ import requests
18
  import os
19
  import time
20
 
 
 
 
21
  # Set up logging
22
  logging.basicConfig(level=logging.INFO)
23
  logger = logging.getLogger(__name__)
@@ -207,16 +210,44 @@ class ABSAProcessor:
207
  def _load_pyabsa_model(self):
208
  """Load PyABSA multilingual model with caching."""
209
  try:
 
 
 
 
210
  import pyabsa
211
  from pyabsa import ATEPCCheckpointManager
212
 
213
- # Use multilingual checkpoint - works for English and some Hindi
214
- checkpoint = ATEPCCheckpointManager.get_checkpoint('multilingual')
215
- self.model = pyabsa.load_aspect_extractor(checkpoint=checkpoint)
216
- logger.info("PyABSA model loaded successfully")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
217
 
218
  except Exception as e:
219
- logger.warning(f"Failed to load PyABSA model: {str(e)}. Using fallback.")
 
 
 
220
  self.model = None
221
 
222
  def set_task_manager(self, task_manager):
 
18
  import os
19
  import time
20
 
21
+ # Suppress GitPython warnings for PyABSA
22
+ os.environ['GIT_PYTHON_REFRESH'] = 'quiet'
23
+
24
  # Set up logging
25
  logging.basicConfig(level=logging.INFO)
26
  logger = logging.getLogger(__name__)
 
210
  def _load_pyabsa_model(self):
211
  """Load PyABSA multilingual model with caching."""
212
  try:
213
+ # Suppress additional git warnings
214
+ import warnings
215
+ warnings.filterwarnings('ignore', category=DeprecationWarning)
216
+
217
  import pyabsa
218
  from pyabsa import ATEPCCheckpointManager
219
 
220
+ # Check if git is available
221
+ try:
222
+ import subprocess
223
+ git_check = subprocess.run(['git', '--version'], capture_output=True, timeout=5)
224
+ if git_check.returncode == 0:
225
+ logger.info(f"Git available: {git_check.stdout.decode().strip()}")
226
+ else:
227
+ logger.warning("Git command failed, PyABSA may have issues downloading checkpoints")
228
+ except Exception as git_err:
229
+ logger.warning(f"Git not found or not executable: {str(git_err)}")
230
+
231
+ # Try local checkpoint first (if exists in checkpoints/ directory)
232
+ local_checkpoint = os.path.join(os.path.dirname(__file__), '..', '..', 'checkpoints', 'ATEPC_MULTILINGUAL_CHECKPOINT')
233
+
234
+ if os.path.exists(local_checkpoint):
235
+ logger.info(f"Loading PyABSA from local checkpoint: {local_checkpoint}")
236
+ self.model = pyabsa.load_aspect_extractor(checkpoint=local_checkpoint)
237
+ logger.info("βœ… PyABSA model loaded successfully from local checkpoint")
238
+ else:
239
+ # Fallback to downloading checkpoint (requires git)
240
+ logger.info("Local checkpoint not found, downloading multilingual checkpoint...")
241
+ logger.info("This may take a few minutes on first run...")
242
+ checkpoint = ATEPCCheckpointManager.get_checkpoint('multilingual')
243
+ self.model = pyabsa.load_aspect_extractor(checkpoint=checkpoint)
244
+ logger.info("βœ… PyABSA model loaded successfully from downloaded checkpoint")
245
 
246
  except Exception as e:
247
+ import traceback
248
+ logger.error(f"❌ Failed to load PyABSA model: {str(e)}")
249
+ logger.error(f"Traceback: {traceback.format_exc()}")
250
+ logger.warning("⚠️ Using fallback method for aspect extraction")
251
  self.model = None
252
 
253
  def set_task_manager(self, task_manager):