Spaces:

Nomio4640
/

NLP-intelligence

Sleeping

App Files Files Community

Nomio4640 commited on Mar 19

Commit

4c114c1

1 Parent(s): 59f9987

added hf configs

Browse files

Files changed (5) hide show

Dockerfile +37 -0
nginx.conf +28 -0
nlp_core/ner_engine.py +43 -3
requirements.txt +5 -1
start.sh +29 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,37 @@

+FROM node:20-slim AS frontend-builder
+WORKDIR /app/frontend
+COPY frontend/package*.json ./
+RUN npm ci
+COPY frontend/ ./
+ENV NEXT_PUBLIC_API_URL=http://localhost:8000
+RUN npm run build
+FROM python:3.11-slim
+# gcc is required to compile hdbscan from source
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    gcc g++ nodejs npm nginx curl \
+    && rm -rf /var/lib/apt/lists/*
+WORKDIR /app
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+COPY nlp_core/ ./nlp_core/
+COPY adapters/ ./adapters/
+COPY --from=frontend-builder /app/frontend/.next ./frontend/.next
+COPY --from=frontend-builder /app/frontend/public ./frontend/public
+COPY --from=frontend-builder /app/frontend/package*.json ./frontend/
+COPY --from=frontend-builder /app/frontend/node_modules ./frontend/node_modules
+COPY nginx.conf /etc/nginx/sites-available/default
+EXPOSE 7860
+COPY start.sh .
+RUN chmod +x start.sh
+CMD ["./start.sh"]

nginx.conf ADDED Viewed

	@@ -0,0 +1,28 @@

+server {
+    listen 7860;
+    server_name _;
+    client_max_body_size 100M;
+    proxy_read_timeout 300s;
+    proxy_send_timeout 300s;
+    location /api/ {
+        proxy_pass http://localhost:8000/api/;
+        proxy_http_version 1.1;
+        proxy_set_header Host $host;
+        proxy_set_header X-Real-IP $remote_addr;
+        proxy_set_header Connection "";
+    }
+    location /docs {
+        proxy_pass http://localhost:8000/docs;
+    }
+    location / {
+        proxy_pass http://localhost:3000;
+        proxy_http_version 1.1;
+        proxy_set_header Upgrade $http_upgrade;
+        proxy_set_header Connection "upgrade";
+        proxy_set_header Host $host;
+    }
+}

nlp_core/ner_engine.py CHANGED Viewed

@@ -58,6 +58,46 @@ class NEREngine:
             ))
         return results
-    def recognize_batch(self, texts: List[str]) -> List[List[EntityResult]]:
-        """Run NER on a batch of texts."""
-        return [self.recognize(t) for t in texts]

             ))
         return results
+    def recognize_batch(self, texts: List[str], batch_size: int = 16) -> List[List[EntityResult]]:
+        """Run NER on a batch of texts utilizing Hugging Face pipeline batching."""
+        if not texts:
+            return []
+        # Filter empty texts to avoid pipeline errors
+        valid_texts = []
+        valid_indices = []
+        for i, text in enumerate(texts):
+            if text and text.strip():
+                valid_texts.append(text)
+                valid_indices.append(i)
+        # Preallocate empty results for all texts
+        out: List[List[EntityResult]] = [[] for _ in texts]
+        if not valid_texts:
+            return out
+        pipe = self._load_pipeline()
+        try:
+            # Send batch directly to pipeline
+            raw_results = pipe(valid_texts, batch_size=batch_size)
+            for idx, raw in zip(valid_indices, raw_results):
+                cleaned = self._clean_entities(raw)
+                entity_results = []
+                for ent in cleaned:
+                    entity_results.append(EntityResult(
+                        word=ent.get("word", ""),
+                        entity_group=ent.get("entity_group", "MISC"),
+                        score=float(ent.get("score", 0.0)),
+                        start=int(ent.get("start", 0)),
+                        end=int(ent.get("end", 0)),
+                    ))
+                out[idx] = entity_results
+        except Exception as e:
+            print(f"[NEREngine] Batch processing error: {e}")
+            # Fallback to single text processing if pipeline batch fails
+            for idx, text in zip(valid_indices, valid_texts):
+                out[idx] = self.recognize(text)
+        return out

requirements.txt CHANGED Viewed

@@ -7,9 +7,10 @@ python-multipart>=0.0.6
 # NLP Core
 transformers>=4.35.0
-torch>=2.0.0
 sentence-transformers>=2.2.0
 bertopic>=0.16.0
 networkx>=3.0
 # Utilities
@@ -17,3 +18,6 @@ pandas>=2.0.0
 tiktoken>=0.5.0
 sentencepiece>=0.1.99
 protobuf>=3.20.0

 # NLP Core
 transformers>=4.35.0
+torch==2.2.0+cpu --extra-index-url https://download.pytorch.org/whl/cpu
 sentence-transformers>=2.2.0
 bertopic>=0.16.0
+hdbscan
 networkx>=3.0
 # Utilities
 tiktoken>=0.5.0
 sentencepiece>=0.1.99
 protobuf>=3.20.0
+scikit-learn

start.sh ADDED Viewed

	@@ -0,0 +1,29 @@

+#!/bin/bash
+set -e
+echo "=== Starting NLP Intelligence ==="
+cd /app
+PYTHONPATH=/app uvicorn adapters.api.main:app \
+    --host 0.0.0.0 \
+    --port 8000 \
+    --workers 1 \
+    --timeout-keep-alive 120 &
+FASTAPI_PID=$!
+echo "FastAPI started (PID $FASTAPI_PID)"
+cd /app/frontend
+npm run start -- --port 3000 &
+NEXTJS_PID=$!
+echo "Next.js started (PID $NEXTJS_PID)"
+sleep 5
+nginx -g "daemon off;" &
+NGINX_PID=$!
+echo "nginx started — app on port 7860"
+wait -n $FASTAPI_PID $NEXTJS_PID $NGINX_PID
+echo "A process exited — shutting down"