File size: 6,224 Bytes
3766f93
 
23c3ea2
3766f93
 
5cedcd5
23c3ea2
5cedcd5
23c3ea2
 
 
5cedcd5
23c3ea2
0d68322
23c3ea2
3766f93
23c3ea2
 
 
 
0d68322
3766f93
23c3ea2
 
3766f93
 
0d68322
112c6cd
3766f93
 
23c3ea2
 
 
 
 
 
 
 
 
 
48f7388
23c3ea2
3766f93
23c3ea2
 
 
 
 
48f7388
23c3ea2
0d68322
23c3ea2
112c6cd
 
23c3ea2
 
0d68322
 
 
112c6cd
23c3ea2
 
 
 
 
 
 
 
 
0d68322
23c3ea2
 
 
 
 
 
48f7388
5cedcd5
0d68322
5cedcd5
f8cb864
c4b8fdc
3766f93
23c3ea2
 
c4b8fdc
23c3ea2
 
 
 
 
 
 
 
 
 
 
 
c4b8fdc
23c3ea2
8506268
23c3ea2
3766f93
23c3ea2
 
 
 
 
c4b8fdc
0d68322
 
 
 
 
23c3ea2
0d68322
c4b8fdc
23c3ea2
0d68322
 
 
c4b8fdc
23c3ea2
0d68322
 
 
c4b8fdc
0d68322
 
c4b8fdc
23c3ea2
3766f93
48f7388
f8cb864
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
import os
import gradio as gr
from huggingface_hub import InferenceClient, hf_hub_download
from llama_cpp import Llama

# ==========================================
# 1. SECURE CONFIGURATION
# ==========================================
HF_TOKEN = os.getenv("HF_TOKEN")
EN2DE_TEMPLATE = os.getenv("en2de")
DE2EN_TEMPLATE = os.getenv("de2en")

CPT_REPO = "st192011/Llama-3-8B-Physics-CFT-GGUF"
MODEL_FILE = "llama-3-8b.Q4_K_M.gguf"
BASE_MODEL_ID = "meta-llama/Meta-Llama-3-8B"

# ==========================================
# 2. INITIALIZE MODELS
# ==========================================
print("📥 Downloading Physics-CFT GGUF...")
model_path = hf_hub_download(repo_id=CPT_REPO, filename=MODEL_FILE)

print("🔄 Loading Local Causal CPT Model (CPU)...")
llm_cpt = Llama(
    model_path=model_path,
    n_ctx=2048,
    n_threads=4,
    verbose=False
)

print("🌐 Connecting to Base Model API...")
client_base = InferenceClient(token=HF_TOKEN)

# ==========================================
# 3. TRANSLATION LOGIC
# ==========================================
def compare_models(text, direction):
    if not EN2DE_TEMPLATE or not DE2EN_TEMPLATE:
        return "System Error: Missing Configuration", "System Error: Missing Configuration"
    
    if not text or not text.strip():
        return "Input required.", "Input required."

    is_en2de = direction == "English to German"
    template = EN2DE_TEMPLATE if is_en2de else DE2EN_TEMPLATE
    prompt = template.replace("{text}", text.strip())
    
    stop_seqs = ["|", "\n", " En:", " De:"]

    # --- A. CPT ADAPTED MODEL (Local GGUF) ---
    # Greedy decoding: temperature=0.0
    cpt_res = llm_cpt(
        prompt,
        max_tokens=256,
        stop=stop_seqs,
        echo=False,
        temperature=0.0, 
        top_p=1.0,      # Ensure pure greedy
        repeat_penalty=1.1
    )
    cft_output = cpt_res["choices"][0]["text"].strip()

    # --- B. BASE MODEL (HF API) ---
    try:
        base_res = client_base.text_generation(
            prompt,
            model=BASE_MODEL_ID,
            max_new_tokens=256,
            stop_sequences=stop_seqs,
            temperature=0.01 # API limit for greedy
        )
        base_output = base_res.strip()
    except Exception as e:
        base_output = "Base Model API Unavailable."

    return base_output, cft_output

# ==========================================
# 4. UI & REVISED RESEARCH REPORT
# ==========================================
with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# ⚛️ Physics-ACPT: Domain-Adaptive Translation")
    
    with gr.Tabs():
        with gr.TabItem("Comparison Demo"):
            gr.Markdown("Direct comparison: **Base Llama-3-8B** vs. **Physics-ACPT** (Unsupervised Adaptation).")
            
            with gr.Row():
                with gr.Column():
                    input_text = gr.Textbox(label="Source Physics Text", lines=4)
                    direction = gr.Radio(["English to German", "German to English"], label="Direction", value="English to German")
                    btn = gr.Button("Translate & Compare", variant="primary")
            
            with gr.Row():
                with gr.Column():
                    gr.Markdown("### 📉 Base Llama-3-8B")
                    out_base = gr.Textbox(label="General Purpose Output", lines=6, interactive=False)
                with gr.Column():
                    gr.Markdown("### 🚀 Physics-ACPT (ACPT Mode)")
                    out_cft = gr.Textbox(label="Domain-Refined Output", lines=6, interactive=False)
            
            btn.click(fn=compare_models, inputs=[input_text, direction], outputs=[out_base, out_cft])

        with gr.TabItem("Technical Report"):
            gr.Markdown("""
            ## Methodology: Zero-Shot Domain Adaptation via Anchored CPT
            
            ### **Training Objective**
            This model was developed to achieve specialized domain translation in Physics using solo **Continued Pre-Training (CPT)** on independent monolingual manifolds (5,000 ArXiv EN abstracts / 5,000 Wiki DE articles). No parallel domain-specific corpora were utilized.

            ### **Quantization vs. Adapter Precision**
            It is important to note a performance distinction between the **full-precision LoRA adapter** and this **quantized GGUF deployment**:
            - **Local Adapter:** In 16-bit precision, the model demonstrated aggressive terminology selection, such as correctly mapping *'Reverse Shock'* to the specific astrophysical term ***'Rückstoßwelle'***.
            - **GGUF Deployment:** The 4-bit quantization (Q4_K_M) required for efficient CPU deployment introduces a slight probabilistic "blurring." In the demo above, the model may select a "safer" technical term (e.g., ***'rückläufige'***) rather than the most aggressive jargon. 
            
            ### **Persistent Domain Traces**
            Despite quantization, the Physics-ACPT model maintains significant "Domain Traces" that outperform the base Llama-3-8B model:
            
            1. **Rejection of Hallucinations:** 
               - *Input:* "Ground state degeneracy"
               - *Base Model:* Produces **"Degenerenz"** (A linguistic hallucination; a non-existent German word).
               - *Physics-ACPT:* Selects **"Degenerierung"** (A valid, research-oriented German technical term).
            
            2. **Technical Adjective Selection:**
               - *Input:* "Reverse shock wave"
               - *Base Model:* Uses **"rückwärtige"** (A casual, general-purpose word for 'at the back').
               - *Physics-ACPT:* Uses **"rückläufige"** (A specific scientific term for 'retrograde/receding').

            ### **Conclusion**
            These results validate the **Semantic Triangulation** hypothesis. By aligning the "Functional Bridge" (Anchor) with "Domain Knowledge" (Monolingual CPT) via causal language modeling, the model shifts its internal probability away from colloquial "guesses" and toward authentic scientific vocabulary, even under the constraints of 4-bit quantization.
            """)

if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0", server_port=7860)