File size: 5,807 Bytes
67367c9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
import logging
from threading import Thread
from typing import Generator, Dict, Any, List
import re
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer


if torch.cuda.is_available():
    torch.backends.cuda.matmul.allow_tf32 = True
    torch.backends.cudnn.allow_tf32 = True

logger = logging.getLogger("plutus.model")
logging.basicConfig(level=logging.INFO)

MODEL_NAME = "Remostart/Plutus_Advanced_model"




class SharedLLM:
    _tokenizer = None
    _model = None
    _device = "cuda" if torch.cuda.is_available() else "cpu"

    @classmethod
    def load(cls):
        if cls._model is not None:
            return cls._tokenizer, cls._model, cls._device

        logger.info(f"[LOAD] Loading tokenizer: {MODEL_NAME}")
        cls._tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True)

        logger.info(f"[LOAD] Loading model on {cls._device}")
        cls._model = AutoModelForCausalLM.from_pretrained(
            MODEL_NAME,
            torch_dtype=torch.float16 if cls._device == "cuda" else None,
            low_cpu_mem_usage=True
        )

        cls._model.to(cls._device)
        cls._model.eval()

        logger.info("[READY] Shared LLM loaded once.")
        return cls._tokenizer, cls._model, cls._device




_SENTENCE_END_RE = re.compile(r"([.!?])\s+$")
_LIST_ITEM_RE = re.compile(r"^\s*(\d+\.|\-|\*)\s+$")
_CODE_FENCE = "```"


def should_flush(buffer: str) -> bool:
    stripped = buffer.strip()

    if len(stripped) < 25:
        return False

    if _LIST_ITEM_RE.match(stripped):
        return False

    if "\n\n" in buffer:
        return True

    if _SENTENCE_END_RE.search(buffer):
        return True

    if len(buffer) > 180:
        return True

    return False




class PlutusModel:
    def __init__(self):
        self.tokenizer, self.model, self.device = SharedLLM.load()

    def create_prompt(self, personality: str, level: str, topic: str, extra_context: str = None) -> str:
        prompt = (
            "You are PlutusTutor — the best expert in Cardano's Plutus ecosystem.\n\n"
            f"User Info:\n"
            f"- Personality: {personality}\n"
            f"- Level: {level}\n"
            f"- Topic: {topic}\n\n"
            "Your task:\n"
            "- Teach with extreme clarity.\n"
            "- Use structured explanations.\n"
            "- Include examples where helpful.\n"
            "- Avoid filler.\n"
            "- Adapt tone to personality.\n\n"
        )

        if extra_context:
            prompt += f"Additional Context:\n{extra_context}\n\n"

        return prompt + "Begin teaching now.\n\nAssistant:"

    def generate(
        self,
        prompt: str,
        max_new_tokens: int = 600,
        temperature: float = 0.6,
        top_p: float = 0.9
    ) -> Generator[str, None, None]:

        inputs = self.tokenizer(prompt, return_tensors="pt").to(self.device)
        streamer = TextIteratorStreamer(self.tokenizer, skip_prompt=True, skip_special_tokens=True)

        def _run():
            with torch.inference_mode():
                self.model.generate(
                    **inputs,
                    streamer=streamer,
                    max_new_tokens=max_new_tokens,
                    do_sample=True,
                    temperature=temperature,
                    top_p=top_p,
                    eos_token_id=self.tokenizer.eos_token_id,
                    pad_token_id=self.tokenizer.pad_token_id,
                )

        Thread(target=_run, daemon=True).start()

        buffer = ""
        in_code_block = False

        for token in streamer:
            buffer += token

            if _CODE_FENCE in buffer:
                in_code_block = not in_code_block

            if not in_code_block and should_flush(buffer):
                yield buffer.strip()
                buffer = ""

        if buffer.strip():
            yield buffer.strip()



class SummaryModel:
    def __init__(self):
        self.tokenizer, self.model, self.device = SharedLLM.load()

    def summarize_text(
        self,
        full_teaching: str,
        topic: str,
        level: str,
        recommended: List[Dict[str, Any]],
        max_new_tokens: int = 400
    ) -> Generator[str, None, None]:

        prompt = (
            "You are a world-class summarization assistant.\n\n"
            f"TOPIC: {topic}\n"
            f"LEVEL: {level}\n\n"
            "CONTENT:\n"
            f"{full_teaching}\n\n"
            "Produce a clear, structured summary.\n\n"
            "Assistant:"
        )

        inputs = self.tokenizer(prompt, return_tensors="pt").to(self.device)
        streamer = TextIteratorStreamer(self.tokenizer, skip_prompt=True, skip_special_tokens=True)

        def _run():
            with torch.inference_mode():
                self.model.generate(
                    **inputs,
                    streamer=streamer,
                    max_new_tokens=max_new_tokens,
                    do_sample=True,
                    temperature=0.6,
                    top_p=0.9,
                    eos_token_id=self.tokenizer.eos_token_id,
                )

        Thread(target=_run, daemon=True).start()

        buffer = ""
        in_code_block = False

    
        for token in streamer:
            buffer += token

            if _CODE_FENCE in buffer:
                in_code_block = not in_code_block

            if not in_code_block and should_flush(buffer):
                yield buffer.strip()
                buffer = ""

        if buffer.strip():
            yield buffer.strip()

       
        if recommended:
            yield "\n\n### Recommended Resources\n"

            for item in recommended:
                line = f"- **{item['type'].upper()}**: {item.get('url')}"
                yield line