File size: 3,129 Bytes
5668ca7 76fc70e 5668ca7 76fc70e 5668ca7 76fc70e 5668ca7 76fc70e 5668ca7 76fc70e 5668ca7 76fc70e 5668ca7 76fc70e 5668ca7 76fc70e | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 | """
Inference code for Code Analyzer Model
This file enables the "Use this model" button on Hugging Face.
"""
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
def load_model_and_tokenizer(model_name: str):
"""Load model and tokenizer"""
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
device_map="auto",
trust_remote_code=True
)
return model, tokenizer
def build_input(task, code):
"""Build input in the same format as used during training"""
parts = []
if task.strip():
parts.append(f"Задача:\n{task.strip()}")
if code.strip():
parts.append(f"Решение (код):\n```python\n{code.strip()}\n```")
return "\n\n".join(parts)
def generate_response(
model,
tokenizer,
task: str,
code: str,
max_new_tokens: int = 1024,
temperature: float = 0.7,
top_p: float = 0.8,
top_k: int = 20,
repetition_penalty: float = 1.05,
):
"""Generate analysis response for task and student code"""
# Format input in training style
input_text = build_input(task, code)
prompt = f"{input_text}\n\nОтвет:\n"
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=max_new_tokens,
temperature=temperature,
top_p=top_p,
top_k=top_k,
repetition_penalty=repetition_penalty,
do_sample=True,
pad_token_id=tokenizer.eos_token_id
)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
# Extract only the answer part
if "Ответ:" in response:
response = response.split("Ответ:")[-1].strip()
return response
if __name__ == "__main__":
# Example usage
import json
model_name = "Vilyam888/Code_analyze.1.0"
print("Loading model...")
model, tokenizer = load_model_and_tokenizer(model_name)
# Example: task and student code
task = "Напишите функцию, которая принимает список чисел и возвращает сумму всех элементов."
code = """def sum_list(numbers):
total = 0
for num in numbers:
total += num
return total"""
print(f"\nЗадача: {task}")
print(f"\nКод студента:\n{code}\n")
print("Generating analysis...")
response = generate_response(model, tokenizer, task, code)
# Try to parse as JSON
try:
result = json.loads(response)
print(f"\nРезультат анализа (JSON):")
print(json.dumps(result, ensure_ascii=False, indent=2))
except json.JSONDecodeError:
print(f"\nРезультат анализа:\n{response}")
|