File size: 4,156 Bytes
63a8d84
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72872bb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
from __future__ import annotations

from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate

import ast
import atexit
import os
import re
import sys

FENCE_RE = re.compile(r"```(?:python)?\s*([\s\S]*?)\s*```", flags=re.IGNORECASE)
TRAILING_PARENS_RE = re.compile(r"\)\)\s*$", flags=re.MULTILINE)

# Install (Python env):
# - pip install langchain langchain-community
# - pip install gpt4all


def _force_utf8_stdio() -> None:
    try:
        if hasattr(sys.stdout, "reconfigure"):
            sys.stdout.reconfigure(encoding="utf-8")
        if hasattr(sys.stderr, "reconfigure"):
            sys.stderr.reconfigure(encoding="utf-8")
    except Exception:
        pass

# =====================
# Config
# =====================
MODEL_FILE = "Cube-Python_v2.gguf"
N_CTX = 4096
TEMPERATURE = 0.1
N_GPU_LAYERS = -1  # llama.cpp: -1 = try push all to GPU, set 0 to force CPU

MAX_FIX_ATTEMPTS = 2

def load_llm():
    base_path = os.path.dirname(os.path.abspath(__file__))
    model_path = os.path.join(base_path, MODEL_FILE)
    
    if not os.path.exists(model_path):
        raise FileNotFoundError(f"Không tìm thấy file model tại: {model_path}")

    try:
        from langchain_community.llms import GPT4All
    except Exception as e:
        raise RuntimeError(
            "Chưa cài GPT4All cho LangChain. Cài bằng:\n"
            "  pip install gpt4all langchain-community\n"
            f"Chi tiết: {e}"
        )

    return GPT4All(model=model_path, temp=TEMPERATURE, verbose=False)

def close_llm_safely(llm):
    try:
        client = getattr(llm, "client", None)
        close = getattr(client, "close", None)
        if callable(close):
            close()
    except Exception:
        pass

def extract_python_code(text: str) -> str:
    if not text:
        return ""

    m = FENCE_RE.search(text)
    if m:
        return m.group(1).strip()

    return text.strip()

def _syntax_error_message(code: str) -> str | None:
    try:
        ast.parse(code)
        return None
    except SyntaxError:
        # Re-parse to get rich info (cheap vs model inference, and avoids duplicate logic).
        try:
            ast.parse(code)
            return None
        except SyntaxError as e:
            line = (e.text or "").strip()
            where = f"line {e.lineno}, col {e.offset}" if e.lineno and e.offset else "unknown location"
            return f"{e.msg} ({where}). Offending line: {line}"


def is_valid_python(code: str) -> bool:
    return _syntax_error_message(code) is None


def generate_code(chain, question: str) -> str:
    raw = chain.invoke({"question": question})
    code = extract_python_code(raw)

    for _ in range(MAX_FIX_ATTEMPTS):
        err = _syntax_error_message(code)
        if err is None:
            return code

        raw = chain.invoke(
            {
                "question": (
                    "Output trước bị sai cú pháp Python.\n"
                    f"Lỗi: {err}\n\n"
                    f"Output trước:\n{raw}\n\n"
                    "Hãy trả lại code Python ĐÚNG cú pháp, chỉ code, không markdown."
                )
            }
        )
        code = extract_python_code(raw)

    code2 = TRAILING_PARENS_RE.sub(")", code)
    return code2 if is_valid_python(code2) else code

template = """[INST] Bạn là một trợ lý AI chuyên nghiệp về lập trình Python.
Hãy viết code Python chất lượng cao để giải quyết yêu cầu sau.
Chỉ trả lời bằng code Python thuần (KHÔNG markdown, KHÔNG giải thích).
Yêu cầu: {question} [/INST]"""

prompt = PromptTemplate(input_variables=["question"], template=template)

_force_utf8_stdio()
llm = load_llm()
atexit.register(close_llm_safely, llm)
chain = prompt | llm | StrOutputParser()

question = '''
Write a Python program that extracts all email addresses from a given text.
Input:
A text: "Contact us at support@nlp.com or info@textprocessing.ai for more details."
Desired Output:
['support@nlp.com', 'info@textprocessing.ai']'''

try:
    print(generate_code(chain, question))
finally:
    close_llm_safely(llm)