Himanshu Gangwar commited on
Commit Β·
dd5f3f5
1
Parent(s): 15c1303
feat: Update Dockerfile and scripts for Neo4j initialization and environment variable handling
Browse files- Dockerfile +25 -10
- app.py +2 -2
- init_neo4j.py +1 -1
- startup.sh +11 -13
Dockerfile
CHANGED
|
@@ -1,16 +1,29 @@
|
|
| 1 |
# Hugging Face Space Dockerfile for MedGraphy
|
|
|
|
| 2 |
FROM python:3.10-slim
|
| 3 |
|
| 4 |
-
# Install system dependencies
|
| 5 |
RUN apt-get update && apt-get install -y \
|
| 6 |
wget \
|
| 7 |
curl \
|
| 8 |
git \
|
|
|
|
|
|
|
| 9 |
&& rm -rf /var/lib/apt/lists/*
|
| 10 |
|
| 11 |
-
#
|
| 12 |
-
RUN
|
| 13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
|
| 15 |
# Set working directory
|
| 16 |
WORKDIR /app
|
|
@@ -21,22 +34,24 @@ RUN pip install --no-cache-dir -r requirements.txt
|
|
| 21 |
|
| 22 |
COPY . .
|
| 23 |
|
| 24 |
-
# Create data directory
|
| 25 |
-
RUN mkdir -p data db
|
| 26 |
|
| 27 |
# Make startup script executable
|
| 28 |
RUN chmod +x startup.sh
|
| 29 |
|
| 30 |
-
# Expose
|
| 31 |
-
EXPOSE 7860
|
| 32 |
|
| 33 |
-
# Set environment variables
|
| 34 |
ENV NEO4J_URI=bolt://localhost:7687
|
| 35 |
ENV NEO4J_USERNAME=neo4j
|
| 36 |
-
ENV NEO4J_PASSWORD=
|
| 37 |
ENV NEO4J_DATABASE=neo4j
|
| 38 |
ENV GRADIO_SERVER_NAME=0.0.0.0
|
| 39 |
ENV GRADIO_SERVER_PORT=7860
|
|
|
|
|
|
|
| 40 |
|
| 41 |
# Run startup script
|
| 42 |
CMD ["bash", "startup.sh"]
|
|
|
|
| 1 |
# Hugging Face Space Dockerfile for MedGraphy
|
| 2 |
+
# Runs Neo4j directly (no Docker-in-Docker)
|
| 3 |
FROM python:3.10-slim
|
| 4 |
|
| 5 |
+
# Install system dependencies including Java for Neo4j
|
| 6 |
RUN apt-get update && apt-get install -y \
|
| 7 |
wget \
|
| 8 |
curl \
|
| 9 |
git \
|
| 10 |
+
openjdk-17-jre-headless \
|
| 11 |
+
procps \
|
| 12 |
&& rm -rf /var/lib/apt/lists/*
|
| 13 |
|
| 14 |
+
# Download and install Neo4j
|
| 15 |
+
RUN mkdir -p /neo4j && \
|
| 16 |
+
cd /neo4j && \
|
| 17 |
+
wget -q https://dist.neo4j.org/neo4j-community-5.15.0-unix.tar.gz && \
|
| 18 |
+
tar -xzf neo4j-community-5.15.0-unix.tar.gz && \
|
| 19 |
+
mv neo4j-community-5.15.0 neo4j-community && \
|
| 20 |
+
rm neo4j-community-5.15.0-unix.tar.gz
|
| 21 |
+
|
| 22 |
+
# Configure Neo4j
|
| 23 |
+
RUN cd /neo4j/neo4j-community && \
|
| 24 |
+
sed -i 's/#server.default_listen_address=0.0.0.0/server.default_listen_address=0.0.0.0/' conf/neo4j.conf && \
|
| 25 |
+
sed -i 's/#initial.dbms.default_database=neo4j/initial.dbms.default_database=neo4j/' conf/neo4j.conf && \
|
| 26 |
+
echo "dbms.security.auth_enabled=false" >> conf/neo4j.conf
|
| 27 |
|
| 28 |
# Set working directory
|
| 29 |
WORKDIR /app
|
|
|
|
| 34 |
|
| 35 |
COPY . .
|
| 36 |
|
| 37 |
+
# Create data directory
|
| 38 |
+
RUN mkdir -p data db /tmp/medgraph
|
| 39 |
|
| 40 |
# Make startup script executable
|
| 41 |
RUN chmod +x startup.sh
|
| 42 |
|
| 43 |
+
# Expose ports
|
| 44 |
+
EXPOSE 7860 7474 7687
|
| 45 |
|
| 46 |
+
# Set environment variables
|
| 47 |
ENV NEO4J_URI=bolt://localhost:7687
|
| 48 |
ENV NEO4J_USERNAME=neo4j
|
| 49 |
+
ENV NEO4J_PASSWORD=neo4j
|
| 50 |
ENV NEO4J_DATABASE=neo4j
|
| 51 |
ENV GRADIO_SERVER_NAME=0.0.0.0
|
| 52 |
ENV GRADIO_SERVER_PORT=7860
|
| 53 |
+
ENV JAVA_HOME=/usr/lib/jvm/java-17-openjdk-amd64
|
| 54 |
+
ENV NEO4J_HOME=/neo4j/neo4j-community
|
| 55 |
|
| 56 |
# Run startup script
|
| 57 |
CMD ["bash", "startup.sh"]
|
app.py
CHANGED
|
@@ -12,10 +12,10 @@ load_dotenv()
|
|
| 12 |
|
| 13 |
# Load credentials from environment or Hugging Face Spaces secrets
|
| 14 |
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
|
| 15 |
-
# Use local Neo4j instance running
|
| 16 |
NEO4J_URI = os.getenv("NEO4J_URI", "bolt://localhost:7687")
|
| 17 |
NEO4J_USER = os.getenv("NEO4J_USERNAME", "neo4j")
|
| 18 |
-
NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD", "
|
| 19 |
NEO4J_DATABASE = os.getenv("NEO4J_DATABASE", "neo4j")
|
| 20 |
FAISS_INDEX_PATH = "db/medicine_embeddings.index"
|
| 21 |
METADATA_PATH = "db/metadata.json"
|
|
|
|
| 12 |
|
| 13 |
# Load credentials from environment or Hugging Face Spaces secrets
|
| 14 |
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
|
| 15 |
+
# Use local Neo4j instance running directly (not Docker)
|
| 16 |
NEO4J_URI = os.getenv("NEO4J_URI", "bolt://localhost:7687")
|
| 17 |
NEO4J_USER = os.getenv("NEO4J_USERNAME", "neo4j")
|
| 18 |
+
NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD", "neo4j")
|
| 19 |
NEO4J_DATABASE = os.getenv("NEO4J_DATABASE", "neo4j")
|
| 20 |
FAISS_INDEX_PATH = "db/medicine_embeddings.index"
|
| 21 |
METADATA_PATH = "db/metadata.json"
|
init_neo4j.py
CHANGED
|
@@ -17,7 +17,7 @@ load_dotenv()
|
|
| 17 |
# Configuration
|
| 18 |
NEO4J_URI = os.getenv("NEO4J_URI", "bolt://localhost:7687")
|
| 19 |
NEO4J_USER = os.getenv("NEO4J_USERNAME", "neo4j")
|
| 20 |
-
NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD", "
|
| 21 |
EMBEDDING_MODEL = "all-MiniLM-L6-v2"
|
| 22 |
VECTOR_INDEX_NAME = "medicine_embeddings"
|
| 23 |
CSV_PATH = "data/Medicine_Details.csv"
|
|
|
|
| 17 |
# Configuration
|
| 18 |
NEO4J_URI = os.getenv("NEO4J_URI", "bolt://localhost:7687")
|
| 19 |
NEO4J_USER = os.getenv("NEO4J_USERNAME", "neo4j")
|
| 20 |
+
NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD", "neo4j")
|
| 21 |
EMBEDDING_MODEL = "all-MiniLM-L6-v2"
|
| 22 |
VECTOR_INDEX_NAME = "medicine_embeddings"
|
| 23 |
CSV_PATH = "data/Medicine_Details.csv"
|
startup.sh
CHANGED
|
@@ -3,21 +3,22 @@ set -e
|
|
| 3 |
|
| 4 |
echo "π Starting MedGraphy application..."
|
| 5 |
|
| 6 |
-
# Create
|
| 7 |
mkdir -p /tmp/medgraph
|
| 8 |
|
| 9 |
-
# Check if
|
| 10 |
if [ -f "/tmp/medgraph/neo4j_initialized.flag" ]; then
|
| 11 |
-
echo "β
Found existing Neo4j
|
| 12 |
SKIP_INIT=true
|
| 13 |
else
|
| 14 |
-
echo "π First time setup
|
| 15 |
SKIP_INIT=false
|
| 16 |
fi
|
| 17 |
|
| 18 |
-
# Start Neo4j
|
| 19 |
-
echo "
|
| 20 |
-
|
|
|
|
| 21 |
|
| 22 |
# Wait for Neo4j to be ready
|
| 23 |
echo "β³ Waiting for Neo4j to be ready..."
|
|
@@ -36,8 +37,8 @@ done
|
|
| 36 |
if [ $attempt -eq $max_attempts ]; then
|
| 37 |
echo "β Neo4j failed to start within expected time"
|
| 38 |
echo "π Neo4j logs:"
|
| 39 |
-
|
| 40 |
-
|
| 41 |
fi
|
| 42 |
|
| 43 |
# Initialize data if needed
|
|
@@ -49,10 +50,7 @@ if [ "$SKIP_INIT" = false ]; then
|
|
| 49 |
echo "β
Neo4j initialization complete!"
|
| 50 |
touch /tmp/medgraph/neo4j_initialized.flag
|
| 51 |
else
|
| 52 |
-
echo "
|
| 53 |
-
echo "π Neo4j logs:"
|
| 54 |
-
docker-compose logs neo4j
|
| 55 |
-
exit 1
|
| 56 |
fi
|
| 57 |
else
|
| 58 |
echo "β
Using existing Neo4j data"
|
|
|
|
| 3 |
|
| 4 |
echo "π Starting MedGraphy application..."
|
| 5 |
|
| 6 |
+
# Create necessary directories
|
| 7 |
mkdir -p /tmp/medgraph
|
| 8 |
|
| 9 |
+
# Check if already initialized
|
| 10 |
if [ -f "/tmp/medgraph/neo4j_initialized.flag" ]; then
|
| 11 |
+
echo "β
Found existing Neo4j data marker"
|
| 12 |
SKIP_INIT=true
|
| 13 |
else
|
| 14 |
+
echo "π First time setup - will initialize Neo4j with data..."
|
| 15 |
SKIP_INIT=false
|
| 16 |
fi
|
| 17 |
|
| 18 |
+
# Start Neo4j directly (not via Docker)
|
| 19 |
+
echo "π Starting Neo4j..."
|
| 20 |
+
export NEO4J_HOME=/neo4j/neo4j-community
|
| 21 |
+
$NEO4J_HOME/bin/neo4j start
|
| 22 |
|
| 23 |
# Wait for Neo4j to be ready
|
| 24 |
echo "β³ Waiting for Neo4j to be ready..."
|
|
|
|
| 37 |
if [ $attempt -eq $max_attempts ]; then
|
| 38 |
echo "β Neo4j failed to start within expected time"
|
| 39 |
echo "π Neo4j logs:"
|
| 40 |
+
cat $NEO4J_HOME/logs/neo4j.log 2>/dev/null || echo "No logs available"
|
| 41 |
+
# Continue anyway - app will work with FAISS only
|
| 42 |
fi
|
| 43 |
|
| 44 |
# Initialize data if needed
|
|
|
|
| 50 |
echo "β
Neo4j initialization complete!"
|
| 51 |
touch /tmp/medgraph/neo4j_initialized.flag
|
| 52 |
else
|
| 53 |
+
echo "β οΈ Neo4j initialization failed, continuing with FAISS only..."
|
|
|
|
|
|
|
|
|
|
| 54 |
fi
|
| 55 |
else
|
| 56 |
echo "β
Using existing Neo4j data"
|