train-mbed

Paused

App Files Files Community

amos1088 commited on Aug 5, 2025

Commit

c9bee67

1 Parent(s): de267c3

tt

Browse files

Files changed (3) hide show

Dockerfile +18 -15
app.py +41 -57
requirements.txt +1 -2

Dockerfile CHANGED Viewed

@@ -1,36 +1,39 @@
-FROM nvidia/cuda:12.3.2-cudnn9-devel-ubuntu22.04
 ENV DEBIAN_FRONTEND=noninteractive
 ENV OMP_NUM_THREADS=4
 ENV DISABLE_TRITON=1
 ENV ACCELERATE_USE_DEEPSPEED=0
-# Install OS deps
 RUN apt-get update && apt-get install -y \
-    git wget curl python3 python3-pip python3-dev build-essential \
-    libssl-dev zlib1g-dev libbz2-dev libreadline-dev libsqlite3-dev \
     ffmpeg libsm6 libxext6 libgl1-mesa-glx \
     && rm -rf /var/lib/apt/lists/*
-# Set Python as default
-RUN update-alternatives --install /usr/bin/python python /usr/bin/python3 1
 RUN pip install --upgrade pip
-# ---- 1. Install Torch first (needed for flash-attn) ----
-RUN pip install --no-cache-dir torch --index-url https://download.pytorch.org/whl/cu121
-# ---- 2. Install requirements (without flash-attn) ----
 WORKDIR /app
 COPY requirements.txt /app/requirements.txt
-RUN pip install --no-cache-dir -r requirements.txt
-# ---- 3. Install flash-attn separately ----
-RUN pip install --no-build-isolation flash-attn
-# Copy your code
 COPY . /app
 EXPOSE 7860
-# Launch Gradio app
 CMD ["python", "app.py"]

+# Use official PyTorch with CUDA 12.1 (works with flash-attn)
+FROM pytorch/pytorch:2.3.0-cuda12.1-cudnn8-devel
 ENV DEBIAN_FRONTEND=noninteractive
 ENV OMP_NUM_THREADS=4
 ENV DISABLE_TRITON=1
 ENV ACCELERATE_USE_DEEPSPEED=0
+ENV TRANSFORMERS_VERBOSITY=info
+ENV PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
+ENV FLASH_ATTENTION_FORCE=1
+# Install system dependencies
 RUN apt-get update && apt-get install -y \
+    git wget curl build-essential python3-dev \
     ffmpeg libsm6 libxext6 libgl1-mesa-glx \
     && rm -rf /var/lib/apt/lists/*
+# Upgrade pip first
 RUN pip install --upgrade pip
+# Copy requirements (without flash-attn)
 WORKDIR /app
 COPY requirements.txt /app/requirements.txt
+RUN grep -v "flash-attn" requirements.txt > requirements-clean.txt
+# Install all Python deps except flash-attn
+RUN pip install --no-cache-dir -r requirements-clean.txt
+# Install flash-attn last to ensure Torch is ready
+RUN pip install --no-build-isolation flash-attn==2.8.2
+# Copy application
 COPY . /app
+# Expose Gradio
 EXPOSE 7860
+# Default command to launch your app
 CMD ["python", "app.py"]

app.py CHANGED Viewed

@@ -275,10 +275,7 @@ def get_data_status():
     """Get data download status"""
     return f"{data_download_status['message']}"
 def run_inference(query, document_title, document_content, checkpoint="latest"):
-    import torch
     global current_model, current_tokenizer
     # Load the model if not already loaded
@@ -295,49 +292,32 @@ def run_inference(query, document_title, document_content, checkpoint="latest"):
         else:
             load_model_and_tokenizer(checkpoint)
-    # Prepare prompt exactly like training
-    prompt = f"""you would get a query and document's title and content and return Relevant/Irrelevant.
-Query:
-{query}
-Document:
-title: {document_title}
-content: {document_content}
-"""
-    # Helper function to score log-probability
-    def score_response(model, tokenizer, prompt, response):
-        inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
-        labels = tokenizer(response, return_tensors="pt").to(model.device)
-        # Concatenate prompt and response (excluding last token of response for shifting)
-        input_ids = torch.cat([inputs.input_ids, labels.input_ids[:, :-1]], dim=1)
-        attention_mask = torch.ones_like(input_ids)
-        with torch.no_grad():
-            outputs = model(input_ids=input_ids, attention_mask=attention_mask, use_cache=False)
-            log_probs = torch.log_softmax(outputs.logits, dim=-1)
-        # Compute average log-prob for the response tokens
-        target_ids = labels.input_ids
-        seq_logprob = 0
-        count = 0
-        for i in range(target_ids.shape[1]):
-            token_id = target_ids[0, i].item()
-            if token_id == tokenizer.pad_token_id:
-                continue
-            seq_logprob += log_probs[0, inputs.input_ids.shape[1] + i - 1, token_id].item()
-            count += 1
-        return seq_logprob / max(count, 1)
-    # Compute log-prob for both options
-    score_relevant = score_response(current_model, current_tokenizer, prompt, "Relevant")
-    score_irrelevant = score_response(current_model, current_tokenizer, prompt, "Irrelevant")
-    # Return the higher-probability label
-    return "Relevant" if score_relevant > score_irrelevant else "Irrelevant"
 def list_checkpoints():
@@ -432,19 +412,19 @@ with gr.Blocks(title="Phi-3 DPO Training on BEIR") as demo:
         import time
-        def batch_inference(csv_file, checkpoint="latest", batch_size=16):
             import pandas as pd
             if csv_file is None:
                 raise ValueError("No CSV file uploaded.")
-            # Gradio File can be str (path) or dict
             csv_path = csv_file if isinstance(csv_file, str) else getattr(csv_file, "name", None)
             if csv_path is None:
                 raise ValueError("Invalid file input from Gradio.")
             df = pd.read_csv(csv_path)
             if "prompt" not in df.columns:
                 raise ValueError("CSV must have a 'prompt' column")
@@ -468,7 +448,7 @@ with gr.Blocks(title="Phi-3 DPO Training on BEIR") as demo:
             correct = 0
             total = len(prompts)
-            # Create temp output CSV
             output_path = "/tmp/batch_inference_results.csv"
             for i in range(0, total, batch_size):
@@ -492,40 +472,44 @@ with gr.Blocks(title="Phi-3 DPO Training on BEIR") as demo:
                     )
                 batch_decoded = current_tokenizer.batch_decode(outputs, skip_special_tokens=True)
                 for prompt, decoded in zip(batch_prompts, batch_decoded):
-                    response = decoded[len(prompt):].strip()
-                    if "relevant" in response.lower():
-                        pred = "Relevant" if "irrelevant" not in response.lower() else "Irrelevant"
                     else:
-                        pred = response
                     predictions.append(pred)
-                # Optional: compute running accuracy
                 if "chosen" in df.columns:
                     for j, pred in enumerate(predictions[-len(batch_prompts):]):
                         idx = i + j
                         if str(df["chosen"].iloc[idx]).strip().lower() == pred.lower():
                             correct += 1
-                # Update progress every batch
-                progress = (i + batch_size) / total * 100
                 df_partial = df.copy()
                 df_partial.loc[:len(predictions) - 1, "prediction"] = predictions
                 df_partial.to_csv(output_path, index=False)
                 stats = f"Processed {min(i + batch_size, total)}/{total} rows ({progress:.1f}%)"
                 if "chosen" in df.columns:
-                    stats += f"\nCurrent Accuracy: {correct / max(1, len(predictions)) * 100:.2f}%"
-                # Yield progress to Gradio
                 yield output_path, stats
             # Final stats
             final_stats = f"✅ Processed {total} rows"
             if "chosen" in df.columns:
                 final_stats += f"\nFinal Accuracy: {correct / total * 100:.2f}%"
-            yield output_path, final_stats
         csv_infer_btn = gr.Button("Run Batch Inference")
         csv_infer_btn.click(

     """Get data download status"""
     return f"{data_download_status['message']}"
 def run_inference(query, document_title, document_content, checkpoint="latest"):
     global current_model, current_tokenizer
     # Load the model if not already loaded
         else:
             load_model_and_tokenizer(checkpoint)
+    # Prepare prompt like training
+    prompt = format_prompt_for_inference(query, document_title, document_content)
+    # Tokenize
+    inputs = current_tokenizer(
+        prompt, return_tensors="pt", truncation=True, max_length=512
+    )
+    inputs = {k: v.to(current_model.device) for k, v in inputs.items()}
+    # Generate single label
+    with torch.no_grad():
+        outputs = current_model.generate(
+            **inputs,
+            max_new_tokens=5,
+            temperature=0.0,
+            do_sample=False,
+            pad_token_id=current_tokenizer.eos_token_id,
+            use_cache=False
+        )
+    response = current_tokenizer.decode(outputs[0], skip_special_tokens=True)
+    response = response[len(prompt):].strip().lower()
+    if response.startswith("irrelevant"):
+        return "Irrelevant"
+    return "Relevant"
 def list_checkpoints():
         import time
+        def batch_inference(csv_file, checkpoint="latest", batch_size=64):
             import pandas as pd
             if csv_file is None:
                 raise ValueError("No CSV file uploaded.")
+            # Gradio File can be path (str) or tempfile object
             csv_path = csv_file if isinstance(csv_file, str) else getattr(csv_file, "name", None)
             if csv_path is None:
                 raise ValueError("Invalid file input from Gradio.")
             df = pd.read_csv(csv_path)
             if "prompt" not in df.columns:
                 raise ValueError("CSV must have a 'prompt' column")
             correct = 0
             total = len(prompts)
+            # Temp output path
             output_path = "/tmp/batch_inference_results.csv"
             for i in range(0, total, batch_size):
                     )
                 batch_decoded = current_tokenizer.batch_decode(outputs, skip_special_tokens=True)
                 for prompt, decoded in zip(batch_prompts, batch_decoded):
+                    response = decoded[len(prompt):].strip().lower()
+                    if response.startswith("irrelevant"):
+                        pred = "Irrelevant"
+                    elif response.startswith("relevant"):
+                        pred = "Relevant"
                     else:
+                        pred = decoded.strip()
                     predictions.append(pred)
+                # Accuracy calculation
                 if "chosen" in df.columns:
                     for j, pred in enumerate(predictions[-len(batch_prompts):]):
                         idx = i + j
                         if str(df["chosen"].iloc[idx]).strip().lower() == pred.lower():
                             correct += 1
+                # Save partial results for streaming
                 df_partial = df.copy()
                 df_partial.loc[:len(predictions) - 1, "prediction"] = predictions
                 df_partial.to_csv(output_path, index=False)
+                # Progress & accuracy stats
+                progress = min(i + batch_size, total) / total * 100
                 stats = f"Processed {min(i + batch_size, total)}/{total} rows ({progress:.1f}%)"
                 if "chosen" in df.columns:
+                    stats += f"\nCurrent Accuracy: {correct / len(predictions) * 100:.2f}%"
+                # Stream update to Gradio
                 yield output_path, stats
             # Final stats
             final_stats = f"✅ Processed {total} rows"
             if "chosen" in df.columns:
                 final_stats += f"\nFinal Accuracy: {correct / total * 100:.2f}%"
+            yield output_path, final_stats
         csv_infer_btn = gr.Button("Run Batch Inference")
         csv_infer_btn.click(

requirements.txt CHANGED Viewed

@@ -6,8 +6,7 @@ accelerate>=0.25.0
 bitsandbytes>=0.41.0
 datasets
 pandas
-torch>=2.0.0
 scipy
 beir
 scikit-learn
-tqdm

 bitsandbytes>=0.41.0
 datasets
 pandas
 scipy
 beir
 scikit-learn
+tqdm