File size: 3,245 Bytes
6e65f57
 
 
 
 
 
 
 
 
 
1488258
 
83bfbd3
 
 
1488258
 
 
 
 
 
 
 
 
 
 
 
 
83bfbd3
 
 
 
 
 
 
 
 
 
 
1488258
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83bfbd3
 
1488258
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
---
license: apache-2.0
language:
- en
base_model:
- Qwen/Qwen3-0.6B
tags:
- event-data
- political-science
- computational-social-science
---



# Example usage with vLLM

## Load the model and tokenizer

```
from vllm import LLM, SamplingParams
from transformers import AutoTokenizer

model = LLM(model="ahalt/event-attribute-extractor",
            enable_prefix_caching=True,
            max_model_len=8000,
            gpu_memory_utilization=0.80)

tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen3-0.6B")


sampling_params = SamplingParams(
    temperature=0.5,       # Greedy decoding breaks Qwen
    top_p=0.8,             # Qwen3 non-thinking recommendation  
    top_k=20,              # Qwen3 recommendation
    presence_penalty=1.5,  # Recommended for quantized models
    min_p=0.0,
    #guided_decoding=guided_decoding_params, # Optionally, set a JSON schema for contrained decoding
    max_tokens=1024,
)
```


## Prompt setup

```
system_content_short = """Extract political events as JSON.

OUTPUT FORMAT:
[
  {
    "event_type": "EVENT_TYPE",
    "anchor_quote": "quote from text",
    "actor": "who performed action OR N/A",
    "recipient": "who was targeted OR N/A", 
    "date": "when occurred OR N/A",
    "location": "where occurred OR N/A"
  }
]

Return valid JSON only. Empty array [] if no events."""


def make_prompt(doc, event_type, tokenizer):
    messages = [
                {"role": "system", "content": system_content_short},
                {"role": "user", "content": f"## Document: {doc}\n\n## Event Type: {event_type}"},
            ]
    prompt = tokenizer.apply_chat_template(
                messages, 
                tokenize=False, 
                add_generation_prompt=True,
                enable_thinking=False
            )
    return prompt
```


## Example usage

```
text = """KYIV, Ukraine (AP) — Ukraine’s anti-corruption agencies said they had uncovered a major graft scheme involving inflated military procurement contracts, just two days after Ukraine’s parliament voted to restore the agencies’ independence.

In a joint statement published Saturday on social media, the National Anti-Corruption Bureau (NABU) and the Specialized Anti-Corruption Prosecutor’s Office (SAPO) said the suspects had taken bribes in a scheme that used state funds to buy drones and other military equipment at inflated prices.

“The essence of the scheme was to conclude state contracts with supplier companies at deliberately inflated prices,” the statement said, adding that offenders had received kickbacks of up to 30% of the contracts’ value.

event_type = "Investigate, charge, or prosecute"

prompt = make_prompt(text, event_type, tokenizer)
output = model.generate(prompt, sampling_params=sampling_params)
response = output[0].outputs[0].text.strip()

[{"event_type": "Investigate, charge, or prosecute",
   "anchor_quote": "Ukraine\u2019s anti-corruption agencies said they had uncovered a major graft scheme involving inflated military procurement contracts",
   "actor": "National Anti-Corruption Bureau (NABU); Specialized Anti-Corruption Prosecutor\u2019s Office (SAPO)",
   "recipient": "suspects involved in the scheme",
   "date": "Saturday",
   "location": "Ukraine"}
```