Noha90 commited on
Commit
e4358a7
·
1 Parent(s): ae52f76

Fix tokenizer loading and file permissions

Browse files
Files changed (2) hide show
  1. Dockerfile +9 -4
  2. app.py +22 -6
Dockerfile CHANGED
@@ -2,29 +2,34 @@ FROM python:3.9-slim
2
 
3
  WORKDIR /app
4
 
 
5
  RUN apt-get update && \
6
  apt-get install -y --no-install-recommends \
7
  build-essential \
8
  && rm -rf /var/lib/apt/lists/*
9
 
 
10
  RUN useradd -m -u 1000 user && \
11
  chown -R user:user /app
12
 
 
13
  ENV TRANSFORMERS_CACHE=/app/.cache/huggingface
14
  ENV HF_HOME=/app/.cache/huggingface
15
  ENV PORT=7860
16
 
 
17
  RUN mkdir -p /app/.cache/huggingface && \
18
  chown -R user:user /app/.cache
19
 
20
- USER user
 
21
 
22
- COPY --chown=user:user requirements.txt .
 
23
 
 
24
  RUN pip install --no-cache-dir --user -r requirements.txt
25
 
26
- COPY --chown=user:user . .
27
-
28
  EXPOSE 7860
29
 
30
  CMD ["python", "app.py"]
 
2
 
3
  WORKDIR /app
4
 
5
+ # Install system dependencies
6
  RUN apt-get update && \
7
  apt-get install -y --no-install-recommends \
8
  build-essential \
9
  && rm -rf /var/lib/apt/lists/*
10
 
11
+ # Create non-root user
12
  RUN useradd -m -u 1000 user && \
13
  chown -R user:user /app
14
 
15
+ # Set environment variables
16
  ENV TRANSFORMERS_CACHE=/app/.cache/huggingface
17
  ENV HF_HOME=/app/.cache/huggingface
18
  ENV PORT=7860
19
 
20
+ # Create cache directory with proper permissions
21
  RUN mkdir -p /app/.cache/huggingface && \
22
  chown -R user:user /app/.cache
23
 
24
+ # Copy files first
25
+ COPY --chown=user:user . .
26
 
27
+ # Switch to non-root user
28
+ USER user
29
 
30
+ # Install dependencies
31
  RUN pip install --no-cache-dir --user -r requirements.txt
32
 
 
 
33
  EXPOSE 7860
34
 
35
  CMD ["python", "app.py"]
app.py CHANGED
@@ -1,14 +1,29 @@
1
  from flask import Flask, request, render_template
2
- from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
3
  from collections import Counter
4
  import datetime, json
 
5
 
6
  app = Flask(__name__)
7
 
8
- model = AutoModelForTokenClassification.from_pretrained("roberta_model", local_files_only=True)
9
- tokenizer = AutoTokenizer.from_pretrained("roberta_model", local_files_only=True)
10
-
11
- nlp = pipeline("token-classification", model=model, tokenizer=tokenizer, aggregation_strategy="simple")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
  @app.route("/", methods=["GET", "POST"])
14
  def index():
@@ -37,4 +52,5 @@ def index():
37
  tag_labels=tag_labels, tag_counts=tag_counts)
38
 
39
  if __name__ == "__main__":
40
- app.run(debug=True)
 
 
1
  from flask import Flask, request, render_template
2
+ from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline, RobertaTokenizerFast
3
  from collections import Counter
4
  import datetime, json
5
+ import os
6
 
7
  app = Flask(__name__)
8
 
9
+ try:
10
+ # Try loading the model first
11
+ model = AutoModelForTokenClassification.from_pretrained("roberta_model", local_files_only=True)
12
+
13
+ # Load tokenizer using RobertaTokenizerFast directly
14
+ tokenizer = RobertaTokenizerFast(
15
+ vocab_file="roberta_model/vocab.json",
16
+ merges_file="roberta_model/merges.txt",
17
+ tokenizer_file="roberta_model/tokenizer.json"
18
+ )
19
+
20
+ # Initialize the pipeline
21
+ nlp = pipeline("token-classification", model=model, tokenizer=tokenizer, aggregation_strategy="simple")
22
+ print("Model and tokenizer loaded successfully!")
23
+
24
+ except Exception as e:
25
+ print(f"Error loading model or tokenizer: {str(e)}")
26
+ raise
27
 
28
  @app.route("/", methods=["GET", "POST"])
29
  def index():
 
52
  tag_labels=tag_labels, tag_counts=tag_counts)
53
 
54
  if __name__ == "__main__":
55
+ port = int(os.environ.get("PORT", 7860))
56
+ app.run(host="0.0.0.0", port=port)