Upload folder using huggingface_hub

Browse files

Files changed (4) hide show

README.md +109 -0
__init__.py +18 -0
quantum_head.py +335 -0
train.py +246 -0

README.md ADDED Viewed

	@@ -0,0 +1,109 @@

+# Q-GPT: Quantum-Enhanced GPT
+A quantum neural network head that adds confidence estimation to GPT models.
+## Features
+- 🔮 **Variational Quantum Circuit** - Uses PennyLane for true quantum computing simulation
+- 📊 **Confidence Estimation** - Estimates how confident the model is in its response
+- 🚫 **Refusal Detection** - Identifies when the model should refuse to answer
+- ⚡ **Classical Fallback** - Works without PennyLane using classical approximation
+## Installation
+```bash
+pip install pennylane torch transformers
+```
+## Usage
+### Quick Start
+```python
+from quantum_head import load_qgpt
+# Load Q-GPT
+model, tokenizer = load_qgpt(
+    "squ11z1/gpt-oss-9b-reasoning",
+    torch_dtype="auto",
+    device="auto",
+)
+# Generate with confidence
+inputs = tokenizer("What is 2 + 2?", return_tensors="pt").to(model.device)
+outputs = model.generate_with_confidence(inputs.input_ids, max_new_tokens=50)
+print(f"Response: {tokenizer.decode(outputs['sequences'][0])}")
+print(f"Confidence: {outputs['confidence_label']}")  # e.g., "high"
+print(f"Should refuse: {outputs['should_refuse']}")
+```
+### Just the Quantum Head
+```python
+from quantum_head import QuantumHead
+import torch
+# Create quantum head
+head = QuantumHead(hidden_size=2880)  # Match your model's hidden size
+# Forward pass with hidden states
+hidden_states = torch.randn(1, 2880)  # From your model
+output = head(hidden_states)
+print(f"Confidence: {output['confidence'].item():.2f}")
+print(f"Uncertainty: {output['uncertainty'].item():.2f}")
+```
+### Training
+```bash
+# Create synthetic training data
+python train.py --model squ11z1/gpt-oss-9b-reasoning --create-data --data train.jsonl
+# Train quantum head
+python train.py --model squ11z1/gpt-oss-9b-reasoning --data train.jsonl --epochs 3
+```
+## Architecture
+```
+Hidden States → [Classical Compression] → [Quantum Circuit] → [Post-Processing] → Confidence
+     ↓                    ↓                      ↓                    ↓
+ [B, H]            [B, n_qubits]          [B, n_qubits]           [B, 2]
+                                                                     ↓
+                                                     confidence + uncertainty
+```
+### Quantum Circuit
+```
+|0⟩ ─ RY(x₀) ─ RZ(x₀) ─ Rot(θ) ─ ●─────── Rot(θ) ─ ... ─ ⟨Z⟩
+                                 │
+|0⟩ ─ RY(x₁) ─ RZ(x₁) ─ Rot(θ) ─ ⊕ ─ ●─── Rot(θ) ─ ... ─ ⟨Z⟩
+                                     │
+|0⟩ ─ RY(x₂) ─ RZ(x₂) ─ Rot(θ) ───── ⊕ ─ ●─ Rot(θ) ─ ... ─ ⟨Z⟩
+                                         │
+|0⟩ ─ RY(x₃) ─ RZ(x₃) ─ Rot(θ) ───────── ⊕ ─ Rot(θ) ─ ... ─ ⟨Z⟩
+```
+## Files
+- `quantum_head.py` - Main implementation (QuantumHead, QGPT, load_qgpt)
+- `train.py` - Training script for quantum head
+- `quantum_head.pt` - Pre-trained weights (after training)
+## Citation
+```bibtex
+@misc{qgpt2026,
+  title={Q-GPT: Quantum-Enhanced Confidence Estimation for Language Models},
+  author={squ11z1},
+  year={2026},
+  url={https://huggingface.co/squ11z1/Q-GPT}
+}
+```
+## License
+Apache 2.0

__init__.py ADDED Viewed

	@@ -0,0 +1,18 @@

+"""Q-GPT: Quantum-Enhanced GPT with Confidence Estimation"""
+from .quantum_head import (
+    QuantumHead,
+    QuantumCircuit,
+    QGPT,
+    load_qgpt,
+)
+__version__ = "1.0.0"
+__author__ = "squ11z1"
+__all__ = [
+    "QuantumHead",
+    "QuantumCircuit",
+    "QGPT",
+    "load_qgpt",
+]

quantum_head.py ADDED Viewed

	@@ -0,0 +1,335 @@

+"""
+Q-GPT: Quantum-Enhanced GPT with Confidence Estimation
+A quantum neural network head that estimates response confidence.
+Author: squ11z1
+"""
+import torch
+import torch.nn as nn
+import numpy as np
+try:
+    import pennylane as qml
+    PENNYLANE_AVAILABLE = True
+except ImportError:
+    PENNYLANE_AVAILABLE = False
+    print("Warning: PennyLane not installed. Using classical fallback.")
+class QuantumCircuit:
+    """Variational Quantum Circuit for confidence estimation."""
+    def __init__(self, n_qubits: int = 4, n_layers: int = 3):
+        self.n_qubits = n_qubits
+        self.n_layers = n_layers
+        if PENNYLANE_AVAILABLE:
+            self.dev = qml.device("default.qubit", wires=n_qubits)
+            self.circuit = qml.QNode(self._quantum_circuit, self.dev, interface="torch")
+    def _quantum_circuit(self, inputs, weights):
+        """
+        Variational quantum circuit.
+        Args:
+            inputs: Input features [n_qubits]
+            weights: Trainable parameters [n_layers, n_qubits, 3]
+        """
+        # Encode classical data into quantum states
+        for i in range(self.n_qubits):
+            qml.RY(inputs[i], wires=i)
+            qml.RZ(inputs[i], wires=i)
+        # Variational layers
+        for layer in range(self.n_layers):
+            # Rotation gates
+            for i in range(self.n_qubits):
+                qml.Rot(weights[layer, i, 0],
+                       weights[layer, i, 1],
+                       weights[layer, i, 2], wires=i)
+            # Entanglement (CNOT ladder)
+            for i in range(self.n_qubits - 1):
+                qml.CNOT(wires=[i, i + 1])
+            # Circular entanglement
+            if self.n_qubits > 2:
+                qml.CNOT(wires=[self.n_qubits - 1, 0])
+        # Measure expectation values
+        return [qml.expval(qml.PauliZ(i)) for i in range(self.n_qubits)]
+    def forward(self, inputs, weights):
+        """Execute quantum circuit."""
+        if PENNYLANE_AVAILABLE:
+            return self.circuit(inputs, weights)
+        else:
+            # Classical fallback: simple tanh transformation
+            return torch.tanh(inputs @ weights.mean(dim=(0, 2)))
+class QuantumHead(nn.Module):
+    """
+    Quantum-enhanced confidence estimation head for GPT.
+    Takes hidden states from the last layer and outputs:
+    - confidence: Estimated confidence in the response [0, 1]
+    - uncertainty: Quantum-derived uncertainty measure
+    """
+    def __init__(
+        self,
+        hidden_size: int = 2880,  # GPT-OSS hidden size
+        n_qubits: int = 4,
+        n_layers: int = 3,
+        intermediate_size: int = 64,
+    ):
+        super().__init__()
+        self.hidden_size = hidden_size
+        self.n_qubits = n_qubits
+        self.n_layers = n_layers
+        # Classical preprocessing: compress hidden states
+        self.pre_quantum = nn.Sequential(
+            nn.Linear(hidden_size, intermediate_size),
+            nn.LayerNorm(intermediate_size),
+            nn.GELU(),
+            nn.Linear(intermediate_size, n_qubits),
+            nn.Tanh(),  # Normalize to [-1, 1] for quantum encoding
+        )
+        # Quantum circuit
+        self.quantum = QuantumCircuit(n_qubits, n_layers)
+        # Quantum weights (trainable)
+        self.quantum_weights = nn.Parameter(
+            torch.randn(n_layers, n_qubits, 3) * 0.1
+        )
+        # Post-quantum processing
+        self.post_quantum = nn.Sequential(
+            nn.Linear(n_qubits, intermediate_size),
+            nn.GELU(),
+            nn.Linear(intermediate_size, 2),  # [confidence, uncertainty]
+        )
+        # Output heads
+        self.confidence_activation = nn.Sigmoid()
+        self.uncertainty_activation = nn.Softplus()
+    def forward(self, hidden_states: torch.Tensor) -> dict:
+        """
+        Compute confidence and uncertainty from hidden states.
+        Args:
+            hidden_states: Last layer hidden states [batch, seq_len, hidden_size]
+                          or pooled representation [batch, hidden_size]
+        Returns:
+            dict with 'confidence' and 'uncertainty' tensors
+        """
+        # Pool if sequence dimension exists
+        if hidden_states.dim() == 3:
+            # Use last token representation
+            hidden_states = hidden_states[:, -1, :]
+        batch_size = hidden_states.size(0)
+        # Preprocess
+        quantum_input = self.pre_quantum(hidden_states)  # [batch, n_qubits]
+        # Process through quantum circuit (per sample)
+        quantum_outputs = []
+        for i in range(batch_size):
+            qout = self.quantum.forward(
+                quantum_input[i],
+                self.quantum_weights
+            )
+            if isinstance(qout, list):
+                qout = torch.stack(qout)
+            quantum_outputs.append(qout)
+        quantum_output = torch.stack(quantum_outputs)  # [batch, n_qubits]
+        # Post-process
+        output = self.post_quantum(quantum_output)
+        confidence = self.confidence_activation(output[:, 0])
+        uncertainty = self.uncertainty_activation(output[:, 1])
+        return {
+            "confidence": confidence,
+            "uncertainty": uncertainty,
+            "should_refuse": confidence < 0.3,  # Low confidence = should refuse
+        }
+    def get_interpretable_confidence(self, confidence: torch.Tensor) -> str:
+        """Convert confidence score to human-readable label."""
+        conf = confidence.item() if confidence.dim() == 0 else confidence.mean().item()
+        if conf >= 0.9:
+            return "very high"
+        elif conf >= 0.7:
+            return "high"
+        elif conf >= 0.5:
+            return "moderate"
+        elif conf >= 0.3:
+            return "low"
+        else:
+            return "very low (consider refusing)"
+class QGPT(nn.Module):
+    """
+    Q-GPT: GPT with Quantum Confidence Head
+    Wraps any HuggingFace GPT model and adds quantum confidence estimation.
+    """
+    def __init__(self, base_model, quantum_head: QuantumHead = None):
+        super().__init__()
+        self.base_model = base_model
+        # Get hidden size from model config
+        if hasattr(base_model.config, 'hidden_size'):
+            hidden_size = base_model.config.hidden_size
+        elif hasattr(base_model.config, 'd_model'):
+            hidden_size = base_model.config.d_model
+        else:
+            hidden_size = 2880  # GPT-OSS default
+        self.quantum_head = quantum_head or QuantumHead(hidden_size=hidden_size)
+    def forward(self, input_ids, attention_mask=None, **kwargs):
+        """Forward pass with confidence estimation."""
+        # Get base model outputs with hidden states
+        outputs = self.base_model(
+            input_ids=input_ids,
+            attention_mask=attention_mask,
+            output_hidden_states=True,
+            **kwargs
+        )
+        # Get last layer hidden states
+        hidden_states = outputs.hidden_states[-1]
+        # Compute quantum confidence
+        confidence_output = self.quantum_head(hidden_states)
+        # Add to outputs
+        outputs.confidence = confidence_output["confidence"]
+        outputs.uncertainty = confidence_output["uncertainty"]
+        outputs.should_refuse = confidence_output["should_refuse"]
+        return outputs
+    def generate_with_confidence(
+        self,
+        input_ids,
+        attention_mask=None,
+        max_new_tokens=256,
+        **kwargs
+    ):
+        """Generate text and return confidence score."""
+        # Generate
+        outputs = self.base_model.generate(
+            input_ids=input_ids,
+            attention_mask=attention_mask,
+            max_new_tokens=max_new_tokens,
+            output_hidden_states=True,
+            return_dict_in_generate=True,
+            **kwargs
+        )
+        # Get hidden states from last generation step
+        if hasattr(outputs, 'hidden_states') and outputs.hidden_states:
+            last_hidden = outputs.hidden_states[-1][-1]  # Last layer, last step
+        else:
+            # Fallback: run forward pass on generated sequence
+            with torch.no_grad():
+                model_outputs = self.base_model(
+                    outputs.sequences,
+                    output_hidden_states=True
+                )
+                last_hidden = model_outputs.hidden_states[-1]
+        # Compute confidence
+        confidence_output = self.quantum_head(last_hidden)
+        return {
+            "sequences": outputs.sequences,
+            "confidence": confidence_output["confidence"],
+            "uncertainty": confidence_output["uncertainty"],
+            "should_refuse": confidence_output["should_refuse"],
+            "confidence_label": self.quantum_head.get_interpretable_confidence(
+                confidence_output["confidence"]
+            ),
+        }
+def load_qgpt(
+    model_name: str = "squ11z1/gpt-oss-9b-reasoning",
+    quantum_head_path: str = None,
+    device: str = "auto",
+    torch_dtype = None,
+    **kwargs
+):
+    """
+    Load Q-GPT model with quantum head.
+    Args:
+        model_name: HuggingFace model name or path
+        quantum_head_path: Path to trained quantum head weights
+        device: Device to load model on
+        torch_dtype: Model dtype (e.g., torch.bfloat16)
+    Returns:
+        QGPT model and tokenizer
+    """
+    from transformers import AutoModelForCausalLM, AutoTokenizer
+    if torch_dtype is None:
+        torch_dtype = torch.bfloat16
+    # Load base model
+    base_model = AutoModelForCausalLM.from_pretrained(
+        model_name,
+        torch_dtype=torch_dtype,
+        device_map=device,
+        trust_remote_code=True,
+        **kwargs
+    )
+    tokenizer = AutoTokenizer.from_pretrained(
+        model_name,
+        trust_remote_code=True,
+        **kwargs
+    )
+    # Create Q-GPT
+    model = QGPT(base_model)
+    # Load quantum head weights if provided
+    if quantum_head_path:
+        state_dict = torch.load(quantum_head_path, map_location="cpu")
+        model.quantum_head.load_state_dict(state_dict)
+        print(f"Loaded quantum head from {quantum_head_path}")
+    return model, tokenizer
+if __name__ == "__main__":
+    # Quick test
+    print("Testing QuantumHead...")
+    head = QuantumHead(hidden_size=2880)
+    dummy_input = torch.randn(2, 2880)  # Batch of 2
+    output = head(dummy_input)
+    print(f"Confidence: {output['confidence']}")
+    print(f"Uncertainty: {output['uncertainty']}")
+    print(f"Should refuse: {output['should_refuse']}")
+    print("\n✓ QuantumHead test passed!")

train.py ADDED Viewed

	@@ -0,0 +1,246 @@

+"""
+Q-GPT Training Script
+Train the quantum head on GPT outputs.
+"""
+import torch
+import torch.nn as nn
+from torch.utils.data import DataLoader, Dataset
+from tqdm import tqdm
+import json
+import os
+from quantum_head import QuantumHead, load_qgpt
+class ConfidenceDataset(Dataset):
+    """Dataset for training quantum confidence head."""
+    def __init__(self, data_path: str, tokenizer, max_length: int = 512):
+        self.tokenizer = tokenizer
+        self.max_length = max_length
+        self.data = []
+        # Load data
+        with open(data_path, 'r') as f:
+            for line in f:
+                item = json.loads(line)
+                self.data.append(item)
+    def __len__(self):
+        return len(self.data)
+    def __getitem__(self, idx):
+        item = self.data[idx]
+        # Tokenize
+        encoding = self.tokenizer(
+            item["text"],
+            truncation=True,
+            max_length=self.max_length,
+            padding="max_length",
+            return_tensors="pt"
+        )
+        return {
+            "input_ids": encoding["input_ids"].squeeze(),
+            "attention_mask": encoding["attention_mask"].squeeze(),
+            "confidence_label": torch.tensor(item.get("confidence", 0.5)),
+            "is_correct": torch.tensor(float(item.get("is_correct", True))),
+        }
+def train_quantum_head(
+    model_name: str = "squ11z1/gpt-oss-9b-reasoning",
+    train_data_path: str = None,
+    output_dir: str = "./q_gpt_trained",
+    epochs: int = 3,
+    batch_size: int = 4,
+    learning_rate: float = 1e-4,
+    device: str = "cuda",
+):
+    """
+    Train the quantum head on confidence estimation.
+    Args:
+        model_name: Base model name
+        train_data_path: Path to training data (jsonl with text, confidence, is_correct)
+        output_dir: Where to save trained weights
+        epochs: Number of training epochs
+        batch_size: Batch size
+        learning_rate: Learning rate for quantum head
+        device: Device to train on
+    """
+    from transformers import AutoModelForCausalLM, AutoTokenizer
+    os.makedirs(output_dir, exist_ok=True)
+    print(f"Loading model: {model_name}")
+    # Load base model (frozen)
+    base_model = AutoModelForCausalLM.from_pretrained(
+        model_name,
+        torch_dtype=torch.bfloat16,
+        device_map="auto",
+        trust_remote_code=True,
+    )
+    base_model.eval()
+    for param in base_model.parameters():
+        param.requires_grad = False
+    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
+    if tokenizer.pad_token is None:
+        tokenizer.pad_token = tokenizer.eos_token
+    # Create quantum head
+    hidden_size = base_model.config.hidden_size
+    quantum_head = QuantumHead(hidden_size=hidden_size).to(device)
+    # Optimizer (only quantum head parameters)
+    optimizer = torch.optim.AdamW(quantum_head.parameters(), lr=learning_rate)
+    # Loss functions
+    confidence_loss_fn = nn.BCELoss()
+    correctness_loss_fn = nn.BCELoss()
+    # Training loop
+    if train_data_path and os.path.exists(train_data_path):
+        dataset = ConfidenceDataset(train_data_path, tokenizer)
+        dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
+        for epoch in range(epochs):
+            quantum_head.train()
+            total_loss = 0
+            for batch in tqdm(dataloader, desc=f"Epoch {epoch+1}/{epochs}"):
+                input_ids = batch["input_ids"].to(device)
+                attention_mask = batch["attention_mask"].to(device)
+                confidence_labels = batch["confidence_label"].to(device)
+                correctness_labels = batch["is_correct"].to(device)
+                # Get hidden states from base model
+                with torch.no_grad():
+                    outputs = base_model(
+                        input_ids=input_ids,
+                        attention_mask=attention_mask,
+                        output_hidden_states=True
+                    )
+                    hidden_states = outputs.hidden_states[-1]
+                # Forward through quantum head
+                qout = quantum_head(hidden_states.to(device))
+                # Compute loss
+                conf_loss = confidence_loss_fn(qout["confidence"], confidence_labels)
+                # High confidence should correlate with correctness
+                correct_loss = correctness_loss_fn(qout["confidence"], correctness_labels)
+                loss = 0.5 * conf_loss + 0.5 * correct_loss
+                # Backward
+                optimizer.zero_grad()
+                loss.backward()
+                optimizer.step()
+                total_loss += loss.item()
+            avg_loss = total_loss / len(dataloader)
+            print(f"Epoch {epoch+1} - Loss: {avg_loss:.4f}")
+    else:
+        print("No training data provided. Saving untrained quantum head.")
+    # Save
+    save_path = os.path.join(output_dir, "quantum_head.pt")
+    torch.save(quantum_head.state_dict(), save_path)
+    print(f"Saved quantum head to {save_path}")
+    return quantum_head
+def create_synthetic_training_data(
+    model_name: str,
+    output_path: str,
+    num_samples: int = 1000,
+):
+    """Create synthetic training data from model predictions."""
+    from transformers import AutoModelForCausalLM, AutoTokenizer
+    import random
+    print("Creating synthetic training data...")
+    model = AutoModelForCausalLM.from_pretrained(
+        model_name,
+        torch_dtype=torch.bfloat16,
+        device_map="auto",
+        trust_remote_code=True,
+    )
+    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
+    # Sample prompts
+    prompts = [
+        "What is 2 + 2?",
+        "Explain quantum mechanics.",
+        "Who was the first president of USA?",
+        "Solve: x^2 - 4 = 0",
+        "What is the capital of France?",
+        "Explain machine learning.",
+        "What is consciousness?",
+        "Calculate 15% of 200.",
+    ]
+    data = []
+    for i in tqdm(range(num_samples)):
+        prompt = random.choice(prompts)
+        inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
+        with torch.no_grad():
+            outputs = model.generate(
+                **inputs,
+                max_new_tokens=50,
+                do_sample=True,
+                temperature=0.7,
+            )
+        text = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        # Simple heuristic for confidence (based on prompt type)
+        is_factual = any(kw in prompt.lower() for kw in ["what is", "who", "calculate", "solve"])
+        confidence = random.uniform(0.7, 0.95) if is_factual else random.uniform(0.4, 0.7)
+        data.append({
+            "text": text,
+            "confidence": confidence,
+            "is_correct": confidence > 0.5,
+        })
+    with open(output_path, 'w') as f:
+        for item in data:
+            f.write(json.dumps(item) + '\n')
+    print(f"Created {len(data)} samples at {output_path}")
+if __name__ == "__main__":
+    import argparse
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--model", default="squ11z1/gpt-oss-9b-reasoning")
+    parser.add_argument("--data", default=None)
+    parser.add_argument("--output", default="./q_gpt_trained")
+    parser.add_argument("--epochs", type=int, default=3)
+    parser.add_argument("--create-data", action="store_true")
+    args = parser.parse_args()
+    if args.create_data:
+        create_synthetic_training_data(args.model, args.data or "train_data.jsonl")
+    else:
+        train_quantum_head(
+            model_name=args.model,
+            train_data_path=args.data,
+            output_dir=args.output,
+            epochs=args.epochs,
+        )