Spaces:

kohlin
/

nlp-project

Configuration error

App Files Files Community

kohlin commited on Feb 17, 2025

Commit

978a5b4

0 Parent(s):

Initial Commit

Browse files

Files changed (8) hide show

.github/workflows/ci-cd.yml +50 -0
.github/workflows/deploy.yml +28 -0
.github/workflows/docker-build.yml +30 -0
Dockerfile +10 -0
README.md +0 -0
app.py +18 -0
requirements.txt +7 -0
train.py +56 -0

.github/workflows/ci-cd.yml ADDED Viewed

	@@ -0,0 +1,50 @@

+name: CI/CD Pipeline
+on:
+  push:
+    branches:
+      - feature/*
+      - develop
+      - main
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout Code
+        uses: actions/checkout@v3
+      - name: Set up Python
+        uses: actions/setup-python@v3
+        with:
+          python-version: 3.9
+      - name: Install Dependencies
+        run: pip install -r requirements.txt
+      - name: Run Tests
+        run: python -m unittest discover -s tests
+  merge-to-develop:
+    needs: build
+    runs-on: ubuntu-latest
+    if: github.ref == 'refs/heads/feature/*'
+    steps:
+      - name: Merge feature branch to develop
+        run: |
+          git fetch origin
+          git checkout develop
+          git merge --no-ff origin/${GITHUB_REF#refs/heads/}
+          git push origin develop
+  merge-to-main:
+    needs: merge-to-develop
+    runs-on: ubuntu-latest
+    if: github.ref == 'refs/heads/develop'
+    steps:
+      - name: Merge develop branch to main
+        run: |
+          git fetch origin
+          git checkout main
+          git merge --no-ff origin/develop
+          git push origin main

.github/workflows/deploy.yml ADDED Viewed

	@@ -0,0 +1,28 @@

+name: Deploy to Hugging Face
+on:
+  push:
+    branches:
+      - main
+jobs:
+  deploy:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v3
+      - name: Set up Python
+        uses: actions/setup-python@v3
+        with:
+          python-version: '3.9'
+      - name: Push to Hugging Face
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        run: |
+          git config --global user.email "nikita.datascience@gmail.com"
+          git config --global user.name "nkofficial-1005"
+          git remote add hf https://kohlin:$HF_TOKEN@huggingface.co/spaces/kohlin/nlp-project
+          git push hf main

.github/workflows/docker-build.yml ADDED Viewed

	@@ -0,0 +1,30 @@

+name: Docker Build and Push
+on:
+  push:
+    branches:
+      - main
+jobs:
+  build-and-push:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v3
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v2
+      - name: Log in to Docker Hub
+        uses: docker/login-action@v2
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_PASSWORD }}
+      - name: Build and Push Docker Image
+        uses: docker/build-push-action@v4
+        with:
+          context: .
+          push: true
+          tags: kohlin/nlp-project:latest

Dockerfile ADDED Viewed

	@@ -0,0 +1,10 @@

+FROM python:3.9
+WORKDIR /app
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+COPY . .
+CMD ["python", "app.py"]

README.md ADDED Viewed

File without changes

app.py ADDED Viewed

	@@ -0,0 +1,18 @@

+import gradio as gr
+from transformers import pipeline, AutoTokenizer, AutoModelForTokenClassification
+# Load fine-tuned model
+model_path = "./ner_model"
+tokenizer = AutoTokenizer.from_pretrained(model_path)
+model = AutoModelForTokenClassification.from_pretrained(model_path)
+# Create NER pipeline
+ner_pipeline = pipeline("ner", model=model, tokenizer=tokenizer)
+def ner_prediction(text):
+    entities = ner_pipeline(text)
+    return {e["word"]: e["entity"] for e in entities}
+# Gradio UI
+iface = gr.Interface(fn=ner_prediction, inputs="text", outputs="label")
+iface.launch(server_name="0.0.0.0", server_port=7860)

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+transformers
+datasets
+torch
+seqeval
+gradio
+fastapi
+uvicorn

train.py ADDED Viewed

	@@ -0,0 +1,56 @@

+import torch
+from transformers import AutoTokenizer, AutoModelForTokenClassification, TrainingArguments, Trainer
+from datasets import load_dataset, load_metric
+# Load dataset
+dataset = load_dataset("conll2003")
+# Load tokenizer
+model_checkpoint = "dbmdz/bert-large-cased-finetuned-conll03-english"
+tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
+# Tokenize the dataset
+def tokenize_and_align_labels(examples):
+    tokenized_inputs = tokenizer(examples["tokens"], truncation=True, is_split_into_words=True)
+    return tokenized_inputs
+tokenized_datasets = dataset.map(tokenize_and_align_labels, batched=True)
+# Load model
+model = AutoModelForTokenClassification.from_pretrained(model_checkpoint, num_labels=9)
+# Training arguments
+training_args = TrainingArguments(
+    output_dir="./ner_model",
+    evaluation_strategy="epoch",
+    save_strategy="epoch",
+    learning_rate=2e-5,
+    per_device_train_batch_size=16,
+    per_device_eval_batch_size=16,
+    num_train_epochs=3,
+    weight_decay=0.01,
+)
+# Load metric
+metric = load_metric("seqeval")
+def compute_metrics(eval_pred):
+    predictions, labels = eval_pred
+    return metric.compute(predictions=predictions.argmax(-1), references=labels)
+# Trainer
+trainer = Trainer(
+    model=model,
+    args=training_args,
+    train_dataset=tokenized_datasets["train"],
+    eval_dataset=tokenized_datasets["validation"],
+    tokenizer=tokenizer,
+    compute_metrics=compute_metrics,
+)
+# Train model
+trainer.train()
+# Save model
+trainer.save_model("./ner_model")
+tokenizer.save_pretrained("./ner_model")