|
|
--- |
|
|
license: apache-2.0 |
|
|
language: |
|
|
- en |
|
|
base_model: |
|
|
- Qwen/Qwen3-0.6B |
|
|
tags: |
|
|
- event-data |
|
|
- political-science |
|
|
- computational-social-science |
|
|
--- |
|
|
|
|
|
|
|
|
|
|
|
# Example usage with vLLM |
|
|
|
|
|
## Load the model and tokenizer |
|
|
|
|
|
``` |
|
|
from vllm import LLM, SamplingParams |
|
|
from transformers import AutoTokenizer |
|
|
|
|
|
model = LLM(model="ahalt/event-attribute-extractor", |
|
|
enable_prefix_caching=True, |
|
|
max_model_len=8000, |
|
|
gpu_memory_utilization=0.80) |
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen3-0.6B") |
|
|
|
|
|
|
|
|
sampling_params = SamplingParams( |
|
|
temperature=0.5, # Greedy decoding breaks Qwen |
|
|
top_p=0.8, # Qwen3 non-thinking recommendation |
|
|
top_k=20, # Qwen3 recommendation |
|
|
presence_penalty=1.5, # Recommended for quantized models |
|
|
min_p=0.0, |
|
|
#guided_decoding=guided_decoding_params, # Optionally, set a JSON schema for contrained decoding |
|
|
max_tokens=1024, |
|
|
) |
|
|
``` |
|
|
|
|
|
|
|
|
## Prompt setup |
|
|
|
|
|
``` |
|
|
system_content_short = """Extract political events as JSON. |
|
|
|
|
|
OUTPUT FORMAT: |
|
|
[ |
|
|
{ |
|
|
"event_type": "EVENT_TYPE", |
|
|
"anchor_quote": "quote from text", |
|
|
"actor": "who performed action OR N/A", |
|
|
"recipient": "who was targeted OR N/A", |
|
|
"date": "when occurred OR N/A", |
|
|
"location": "where occurred OR N/A" |
|
|
} |
|
|
] |
|
|
|
|
|
Return valid JSON only. Empty array [] if no events.""" |
|
|
|
|
|
|
|
|
def make_prompt(doc, event_type, tokenizer): |
|
|
messages = [ |
|
|
{"role": "system", "content": system_content_short}, |
|
|
{"role": "user", "content": f"## Document: {doc}\n\n## Event Type: {event_type}"}, |
|
|
] |
|
|
prompt = tokenizer.apply_chat_template( |
|
|
messages, |
|
|
tokenize=False, |
|
|
add_generation_prompt=True, |
|
|
enable_thinking=False |
|
|
) |
|
|
return prompt |
|
|
``` |
|
|
|
|
|
|
|
|
## Example usage |
|
|
|
|
|
``` |
|
|
text = """KYIV, Ukraine (AP) — Ukraine’s anti-corruption agencies said they had uncovered a major graft scheme involving inflated military procurement contracts, just two days after Ukraine’s parliament voted to restore the agencies’ independence. |
|
|
|
|
|
In a joint statement published Saturday on social media, the National Anti-Corruption Bureau (NABU) and the Specialized Anti-Corruption Prosecutor’s Office (SAPO) said the suspects had taken bribes in a scheme that used state funds to buy drones and other military equipment at inflated prices. |
|
|
|
|
|
“The essence of the scheme was to conclude state contracts with supplier companies at deliberately inflated prices,” the statement said, adding that offenders had received kickbacks of up to 30% of the contracts’ value. |
|
|
|
|
|
event_type = "Investigate, charge, or prosecute" |
|
|
|
|
|
prompt = make_prompt(text, event_type, tokenizer) |
|
|
output = model.generate(prompt, sampling_params=sampling_params) |
|
|
response = output[0].outputs[0].text.strip() |
|
|
|
|
|
[{"event_type": "Investigate, charge, or prosecute", |
|
|
"anchor_quote": "Ukraine\u2019s anti-corruption agencies said they had uncovered a major graft scheme involving inflated military procurement contracts", |
|
|
"actor": "National Anti-Corruption Bureau (NABU); Specialized Anti-Corruption Prosecutor\u2019s Office (SAPO)", |
|
|
"recipient": "suspects involved in the scheme", |
|
|
"date": "Saturday", |
|
|
"location": "Ukraine"} |
|
|
``` |