import os
import gradio as gr
from huggingface_hub import InferenceClient, hf_hub_download
from llama_cpp import Llama

# ==========================================
# 1. SECURE CONFIGURATION
# ==========================================
HF_TOKEN = os.getenv("HF_TOKEN")
EN2DE_TEMPLATE = os.getenv("en2de")
DE2EN_TEMPLATE = os.getenv("de2en")

CPT_REPO = "st192011/Llama-3-8B-Physics-CFT-GGUF"
MODEL_FILE = "llama-3-8b.Q4_K_M.gguf"
BASE_MODEL_ID = "meta-llama/Meta-Llama-3-8B"

# ==========================================
# 2. INITIALIZE MODELS
# ==========================================
print("📥 Downloading Physics-CFT GGUF...")
model_path = hf_hub_download(repo_id=CPT_REPO, filename=MODEL_FILE)

print("🔄 Loading Local Causal CPT Model (CPU)...")
llm_cpt = Llama(
    model_path=model_path,
    n_ctx=2048,
    n_threads=4,
    verbose=False
)

print("🌐 Connecting to Base Model API...")
client_base = InferenceClient(token=HF_TOKEN)

# ==========================================
# 3. TRANSLATION LOGIC
# ==========================================
def compare_models(text, direction):
    if not EN2DE_TEMPLATE or not DE2EN_TEMPLATE:
        return "System Error: Missing Configuration", "System Error: Missing Configuration"
    
    if not text or not text.strip():
        return "Input required.", "Input required."

    is_en2de = direction == "English to German"
    template = EN2DE_TEMPLATE if is_en2de else DE2EN_TEMPLATE
    prompt = template.replace("{text}", text.strip())
    
    stop_seqs = ["|", "\n", " En:", " De:"]

    # --- A. CPT ADAPTED MODEL (Local GGUF) ---
    # Greedy decoding: temperature=0.0
    cpt_res = llm_cpt(
        prompt,
        max_tokens=256,
        stop=stop_seqs,
        echo=False,
        temperature=0.0, 
        top_p=1.0,      # Ensure pure greedy
        repeat_penalty=1.1
    )
    cft_output = cpt_res["choices"][0]["text"].strip()

    # --- B. BASE MODEL (HF API) ---
    try:
        base_res = client_base.text_generation(
            prompt,
            model=BASE_MODEL_ID,
            max_new_tokens=256,
            stop_sequences=stop_seqs,
            temperature=0.01 # API limit for greedy
        )
        base_output = base_res.strip()
    except Exception as e:
        base_output = "Base Model API Unavailable."

    return base_output, cft_output

# ==========================================
# 4. UI & REVISED RESEARCH REPORT
# ==========================================
with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# ⚛️ Physics-ACPT: Domain-Adaptive Translation")
    
    with gr.Tabs():
        with gr.TabItem("Comparison Demo"):
            gr.Markdown("Direct comparison: **Base Llama-3-8B** vs. **Physics-ACPT** (Unsupervised Adaptation).")
            
            with gr.Row():
                with gr.Column():
                    input_text = gr.Textbox(label="Source Physics Text", lines=4)
                    direction = gr.Radio(["English to German", "German to English"], label="Direction", value="English to German")
                    btn = gr.Button("Translate & Compare", variant="primary")
            
            with gr.Row():
                with gr.Column():
                    gr.Markdown("### 📉 Base Llama-3-8B")
                    out_base = gr.Textbox(label="General Purpose Output", lines=6, interactive=False)
                with gr.Column():
                    gr.Markdown("### 🚀 Physics-ACPT (ACPT Mode)")
                    out_cft = gr.Textbox(label="Domain-Refined Output", lines=6, interactive=False)
            
            btn.click(fn=compare_models, inputs=[input_text, direction], outputs=[out_base, out_cft])

        with gr.TabItem("Technical Report"):
            gr.Markdown("""
            ## Methodology: Zero-Shot Domain Adaptation via Anchored CPT
            
            ### **Training Objective**
            This model was developed to achieve specialized domain translation in Physics using solo **Continued Pre-Training (CPT)** on independent monolingual manifolds (5,000 ArXiv EN abstracts / 5,000 Wiki DE articles). No parallel domain-specific corpora were utilized.

            ### **Quantization vs. Adapter Precision**
            It is important to note a performance distinction between the **full-precision LoRA adapter** and this **quantized GGUF deployment**:
            - **Local Adapter:** In 16-bit precision, the model demonstrated aggressive terminology selection, such as correctly mapping *'Reverse Shock'* to the specific astrophysical term ***'Rückstoßwelle'***.
            - **GGUF Deployment:** The 4-bit quantization (Q4_K_M) required for efficient CPU deployment introduces a slight probabilistic "blurring." In the demo above, the model may select a "safer" technical term (e.g., ***'rückläufige'***) rather than the most aggressive jargon. 
            
            ### **Persistent Domain Traces**
            Despite quantization, the Physics-ACPT model maintains significant "Domain Traces" that outperform the base Llama-3-8B model:
            
            1. **Rejection of Hallucinations:** 
               - *Input:* "Ground state degeneracy"
               - *Base Model:* Produces **"Degenerenz"** (A linguistic hallucination; a non-existent German word).
               - *Physics-ACPT:* Selects **"Degenerierung"** (A valid, research-oriented German technical term).
            
            2. **Technical Adjective Selection:**
               - *Input:* "Reverse shock wave"
               - *Base Model:* Uses **"rückwärtige"** (A casual, general-purpose word for 'at the back').
               - *Physics-ACPT:* Uses **"rückläufige"** (A specific scientific term for 'retrograde/receding').

            ### **Conclusion**
            These results validate the **Semantic Triangulation** hypothesis. By aligning the "Functional Bridge" (Anchor) with "Domain Knowledge" (Monolingual CPT) via causal language modeling, the model shifts its internal probability away from colloquial "guesses" and toward authentic scientific vocabulary, even under the constraints of 4-bit quantization.
            """)

if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0", server_port=7860)