File size: 40,628 Bytes
9014afd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 |
"""
Instruction Tuning of LLM for Trait-conditioned Style Impact Caliberation
"""
import unsloth
import yaml # type: ignore
import pandas as pd # type: ignore
import os
from PIL import Image # type: ignore
import gradio as gr
import torch # type: ignore
from langchain_community.chat_models import ChatOllama # type: ignore
from langchain_core.messages import SystemMessage, HumanMessage # type: ignore
from langchain_ollama import OllamaEmbeddings # type: ignore
from langchain_core.output_parsers import StrOutputParser # type: ignore
from pydantic import BaseModel # format LLM output as JSON # type: ignore
from unsloth import FastVisionModel, FastModel, FastLanguageModel # type: ignore
from transformers import TextStreamer # type: ignore
from unsloth.chat_templates import get_chat_template # type: ignore
from unsloth.chat_templates import standardize_sharegpt # type: ignore
from transformers import TextIteratorStreamer
from utils import convert_to_base64, load_config, process_trait_info # type: ignore
from tqdm import tqdm # type: ignore
from termcolor import colored # type: ignore
import threading
import random
import numpy as np
import random
import threading
# generation_lock = threading.Lock()
# from transformers import StoppingCriteria, StoppingCriteriaList
# class StopGenerationCriteria(StoppingCriteria):
# def __init__(self, stop_event):
# self.stop_event = stop_event
# def __call__(self, input_ids, scores, **kwargs):
# return self.stop_event.is_set()
device = 'cuda' if torch.cuda.is_available() else 'cpu'
TRAIT_VALUES = {
"Gender": [
"Male", "Female", "Non-binary/third gender", "Leave Blank",
],
"Age": [
"18–24", "25–34", "35–44", "45–54", "55–64", "65 or older", "Leave Blank",
],
"Current Profession": [
"Healthcare/Medical", "Government/Public Service",
"Business/Finance",
"Technology/Engineering", "Education", "Arts/Entertainment",
"Retail/Hospitality/Food Service",
"Skilled Trades/Labor (e.g., construction, electrician, landscaper, house cleaner)",
"Student",
"Unemployed/Looking for work", "Retired",
"Other",
"Leave Blank",
],
"Race/Ethnicity" : [
"Asian", "Black/African American", "Hispanic/Latino",
"Native American/Alaska Native", "Native Hawaiian/Other Pacific Islander",
"White/Caucasian", "Other", "Leave Blank",
],
"Religious/Cultural Group": [
"Christianity", "Islam", "Hinduism", "Judaism", "Buddhism", "None of the above", "Leave Blank",
],
"Political Affiliation": [
"Conservative", "Apolitical/Not involved in politics", "Independent",
"Libertarian", "Moderate", "Liberal", "Leave Blank",
],
"Highest Education": [
"Less than high school", "High school diploma or equivalent", "Some college, no degree",
"Associate’s degree", "Bachelor’s degree",
"Master’s degree", "Doctoral or professional degree",
"Leave Blank",
],
"Annual Household Income": [
"Less than $25,000", "$25,000–$49,999", "$50,000–$74,999",
"$75,000–$99,999", "$100,000–$149,999", "$150,000 or more",
"Leave Blank",
],
"Family Status": [
"Single, living alone", "Single, living with family", "Single Parent with children",
"Married/Partnered, no children", "Married/Partnered, with children",
"Multi-generation family (e.g., with parents, grandparents, or extended family)",
"Leave Blank",
],
}
HEALTH_TOPICS = {
"Chronic Obstructive Pulmonary Disease (COPD)": "COPD1.1",
"Heart Disease": "HD1",
"HIV": "HIV1.1",
"Mental Health": "MH1.1",
"Nutrition": "N2.1",
"Substance Abuse": "SA4.1",
"Sexual Practice": "SP7.1",
"Vaccination": "V7.1",
"Cystic Fibrosis": "CF1.1",
}
health_topics = ""
for topic in HEALTH_TOPICS:
health_topics += topic + '\n'
##########################################################
### To increase style variability to avoid repetitiveness
##########################################################
# * Style variants
style_variants = [
"Write with a slightly informal and reflective tone.",
"Write in a straightforward conversational tone.",
"Write with mild emotional coloring, but still natural.",
"Write in a calm, matter-of-fact tone.",
"Write in a slightly narrative, flowing tone.",
"Write in a concise but personable tone.",
"Write in a informal, pragmatic tone, focusing on clarity and utility.",
]
# --- Add small lexical noise / synonym variation ---
lexical_flavors = [
"Feel free to vary sentence structures slightly.",
"Use a mix of simple and slightly complex sentences.",
"Use a light mix of paraphrasing expressions.",
"Feel free to choose different synonyms for common emotional words.",
"Introduce subtle variation in connectors like 'however', 'still', or 'overall'.",
]
openers = [
"This message",
"From this message",
"Through the message",
"After seeing this message",
"Looking at this poster",
"Based on what this poster conveys",
"Hmmm I think that this message",
"Reflecting on the message here",
"Considering what this poster is trying to say",
"Seeing this message makes me think",
"Thinking about what this poster is communicating",
"After reading what's on here",
"Based on what’s written here",
"After I look at this whole thing",
]
openers_generic = [
"Hmmm when thinking about",
"When I think about",
"My impression about",
"On top of my head",
"My general thoughts about",
"The way I see it,",
"From my point of view on",
"My initial take on",
"In my own words,",
"As I see things,",
"Just speaking for myself,",
"At a glance,",
]
openers_poster_summary = [
"This poster",
"This poster seems to",
"My interpretation of the poster is",
"From what this poster shows, it seems to",
"Looking at the poster as a whole, it appears to",
"Based on the imagery and tone, the poster seems to",
"Visually, the poster comes across as trying to",
"To me, this poster is trying to",
"When I look at this poster, it feels like it aims to",
"The poster gives me the impression that it intends to",
]
openers_explain = [
"The reason why I think that is because",
"To explain why I",
"Well, to explain my thoughts",
"To put it simply, I feel this way because",
"My reasoning behind that is",
"What leads me to that view is",
"A big part of why I think that is",
"To give some context for my view,",
"Here’s why I lean that way:",
"I see it that way mainly because",
"Let me explain why I think so",
"Thinking through it, I realize it's because",
"To unpack my thinking a bit,",
"I guess it’s because",
"The thing that really shapes my view is",
"It’s pretty much because",
"A lot of it comes down to",
"I feel that way mostly because",
"My thinking comes from the idea that",
]
"""
Generate LLM response given a single user prompt and input image
"""
def vlm_response(user_input, history, health_topic,
gender, age, profession, race, religion,
political, education, income, family_status,
# extraversion, agreeableness, conscientiousness, neuroticism, openness,
):
# # 1. Initialize Stop Event for this session
# stop_event = threading.Event()
# # Create the stopping criteria to pass to the model
# stopping_criteria = StoppingCriteriaList([StopGenerationCriteria(stop_event)])
# 1. Clear any lingering state
torch.cuda.empty_cache() # Clear GPU memory
# 2. Initialize Streamers LOCALLY (Fresh for every request)
# Note: We need to re-initialize these for every single generation call
# or just once per function call if we share them.
streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
# streamer_aux = TextIteratorStreamer(tokenizer_aux, skip_prompt=True, skip_special_tokens=True)
""" [NOTE] we have not use `history` for this generation """
# get uploaded image
image = Image.open(user_input['files'][0]) if user_input['files'] else None
image_uploaded = True
if image is None:
image = Image.new('RGB', (24,24))
image_uploaded = False
# image_b64 = convert_to_base64(image)
print(health_topic)
# print("Image uploaded:", image_uploaded)
#################################################
# 1. Construct traits from user inputs
#################################################
demo_dict = {
"Gender": gender,
"Age": age,
"Current Profession": profession,
"Race/Ethnicity": race,
"Religious/Cultural Group": religion,
"Political Affiliation": political,
"Highest Education": education,
"Annual Household Income": income,
"Family Status": family_status,
}
# big5_dict = {
# "Extraversion": extraversion,
# "Agreeableness": agreeableness,
# "Conscientiousness": conscientiousness,
# "Neuroticism": neuroticism,
# "Open-Mindedness": openness,
# }
demo_info = ""
for trait, value in demo_dict.items():
if value != "Leave Blank": # only add non-blank values
demo_info += f"{trait}: {value}\n"
else:
demo_info += f"{trait}: [Not specified]\n"
persona_score = ""
persona_score += "Big-Five Trait Scores:\n"
# for trait, value in big5_dict.items():
# persona_score += f"{trait}: {value}\n"
# no locus of control trait score
locus = None
######################################################################################
# 1*. modify trait info based on trait selection setings
# demo_full: wheter include full demographic traits or only selected ones
# include_big5, include_facet, include_locus: include big5 / facet / locus of control traits or not
# format: <trait>: <value> if available; else <trait>: [Not specified]
######################################################################################
demo_info, persona_score, locus = process_trait_info(
demo_info, persona_score, locus,
demo_full=False, include_big5=True,
include_facet=False, include_locus=False,
train_mode=False,
)
# print(demo_info)
# print(persona_score)
###############################################
### Add style variability ###
###############################################
style_hint = random.choice(style_variants) # increase style variant
lexical_hint = random.choice(lexical_flavors) # increase lexical variant
opening_phrase = random.choice(openers) # increase opening variant
opening_generic = random.choice(openers_generic) # increase opening variant
opening_poster = random.choice(openers_poster_summary) # poster summary variation
opening_explain = random.choice(openers_explain) # thought explanation
print('Style:', style_hint)
print('Lexical:', lexical_hint)
print('Opening:', opening_phrase)
print('Generic opening:', opening_generic)
# Wrap the GENERATION logic in try/finally to handle cleanup
try:
if image_uploaded:
"""###############################################################
Case 1: a health poster is uploaded
=> VLM-enabled response prediction to that specific poster
###############################################################"""
################################################
# * IMAGE UNDERSTANDING
################################################
yield "Analyzing image content..." # UI Feedback
PROMPT = (
f"Describe the content and main message in given heatlh campaign poster and how it's related to {health_topic}. ",
"Note that the message could be non-direct or subtle (e.g. irony, fear-driven evoke without explicit texts, etc). Only provide the answer (in 2-4 sentences). ",
f"Start the response with {opening_poster}"
)
messages = [
{"role": "user", "content": [
{"type": "image"},
{"type": "text", "text": PROMPT}
]}
]
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt = True)
inputs = tokenizer(
image.convert("RGB"),
input_text,
add_special_tokens = False,
return_tensors = "pt",
).to(device)
# Model inference
gen_tokens = model.generate(
**inputs,
max_new_tokens = 512,
use_cache = True,
# do_sample=cfgs["stochastic"],
# temperature=cfgs["temperature"],
# min_p=0.9,
# min_p=0.3,
top_k=15,
temperature=0.8,
do_sample=True, # cfgs["stochastic"]
)
outs = tokenizer.batch_decode(gen_tokens[:, inputs.input_ids.shape[1]:])[0]
image_desc = outs.replace(tokenizer.eos_token, "")
image_desc = image_desc.replace("<end_of_turn>", "")
################################################
# 2. Construct SYSTEM and USER PROMPT
################################################
SYSTEM_PROMPT = cfg_prompts["SYSTEM_SIM"]
SIM_PROMPT = ""
# prompt for role-playing information
SIM_PROMPT += f"You are: Demographics:\n{demo_info}\n"
# SIM_PROMPT += f"Your personality test shows you have (min score = 0; max score = 5):\nBig-Five Trait Scores:\n{persona_score}\n\n"
# SIM_PROMPT += f"You also have {locus}\n"
# situation description (role-playing)
SIM_PROMPT += cfg_prompts["SIMULATION_SIM"]
################################################
# 3. Stage 1: VLM-enabled response prediction
# Predict Trait-aware Likert Scale Responses
################################################
assert cfgs["infer_engine"] == "unsloth", "Only unsloth inference is supported"
assert cfgs["vision"] == True, "Must have vision input"
# load a sample row to extract Likert scale questions
df = pd.read_csv(os.path.expandvars(cfgs["data_path"]))
# extract sample with given health_topic for correct question set
sample = df[df['Poster_id'] == HEALTH_TOPICS[health_topic]].iloc[0]
del df # free memory
""" Iterate through each question"""
# answers_json = {}
answers_numeric = ""
# for question in [
# "This message makes me more concerned about the health risks in the poster - Scale: 1 (not at all) - 9 (extremely)",
# "The message motivates me to engage in healthier lifestyle and habit - Scale: 1 (not at all) - 9 (extremely)",
# "In your opinion, how harmful is ignoring the health risks in the poster? - Scale: 1 (not at all) - 9 (extremely",
# "How open are you to engaging in the activity in the poster? - Scale: 1 (not at all) - 9 (extremely)",
# ]:
for i in range(1,16,1):
# a. parse specific Likert score question
col = f"Q{i}"
if pd.isna(sample[col]):
continue
question = sample[col].replace("\n", " ")
# instruction prompt to answer in proper format
if "type in" in question.lower():
continue # skip free-text questions for demo
elif "make you feel" in question.lower():
continue # skip emotional questions: imprecise
elif "how open" in question.lower():
continue # skip intentional question: low-accuracy
# b. intialize USER PROMPT with SIMULATION PROMPT
# with full demographic+personality data
USER_PROMPT = SIM_PROMPT
USER_PROMPT += f"Question: {question}\n\n"
# instruction prompt to answer in proper format
USER_PROMPT += cfg_prompts['INSTRUCTION_MCQ']
# c. Contruct LLM message: response prediction
messages = [
{"role": "user", "content": [
{"type": "image"},
{"type": "text", "text": SYSTEM_PROMPT + USER_PROMPT}
]}
]
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt = True)
inputs = tokenizer(
image.convert("RGB"),
input_text,
add_special_tokens = False,
return_tensors = "pt",
).to(device)
# d. Model inference
gen_tokens = model.generate(
**inputs,
max_new_tokens = 16,
use_cache = True,
do_sample=cfgs["stochastic"],
temperature=cfgs["temperature"],
min_p=0.9,
)
outs = tokenizer.batch_decode(gen_tokens[:, inputs.input_ids.shape[1]:])[0]
answer = outs.replace(tokenizer.eos_token, "")
answer = answer.replace("<end_of_turn>", "")
# answers_json[col] = answer
answers_numeric += f"{question}. Your answer: {answer}\n"
# print(answers_json)
print(answers_numeric)
################################################
# 4. Stage 2: LLM Summarization of all answers
# => final response generation based on
# all Likert answers to the poster
# => one-shot prompting
################################################
SYSTEM_PROMPT = "You are a helpful assistant."
# USER_PROMPT = f"Please convert these questions and answers into a concise and coherent \
# summary of your overall reactions, feelings, and perspectives about the poster: {answers_numeric} \
# Please provide the final response only."
# USER_PROMPT = f"Summarize the main points from questions and answers below into a concise and coherent overall reaction to the poster:\
# {answers_numeric}. Provide the final response only.\n"
USER_PROMPT = (
"Summarize the following survey responses into a short, natural paragraph that captures your overall sentiment, motivation, and thinking. "
f"Write as if paraphrasing what a person might say in conversation. Adjust your style based on your demographic/personality traits."
"Do NOT repeat numeric scores. "
"Preserve polarity: low scores → low concern/motivation/openness; high scores → high concern/motivation/openness. "
"If answers are mixed (e.g., believes something is harmful but isn't personally moved), reflect that nuance explicitly. "
"Keep to 1-5 sentences.\n\n"
"**STRICTLY FOLLOW THESE RULES:**\n"
"- Infer direction from each item's Scale description (e.g., 1-9: higher = more; 0-6: higher = more). "
"- Use calibrated wording: 1-2 = very low, 3-4 = low, 5 = moderate, 6-7 = high, 8-9 = very high; for 0-6: 0-1 = not/slight, 2-3 = somewhat, 4-5 = high, 6 = very. "
"- VERY IMPORTANT: provide ONLY the final summarized response, without anything else!"
f"- The response MUST have a consistent health topic: {health_topic}. Ground each sentence to the impact of campaign message.\n"
"- Never invert sentiment. Prefer hedged phrases (e.g., “not particularly,” “only somewhat,” “very open,” “not open at all”).\n\n"
f"- Mimic the talking style of emulated demographic as realistic as possible."
"**Example input 1:**\n"
"The message makes me more concerned about the health risks of poor eating habits - Scale: 1-9. Your answer: 9\n"
"The message motivates me to make healthy eating choices - Scale: 1-9. Your answer: 9\n"
"In your opinion, how harmful is neglecting proper nutrition and weight management to your overall health? - Scale: 0–6. Your answer: 5\n"
"How open are you to adopting healthier eating habits and lifestyle changes? - Scale: 1-9. Your answer: 9\n"
"**Example output 1:**\n"
"This message really heightened my awareness of how unhealthy eating can be. The content in the message strongly motivates me to make better choices, and I feel very ready to follow through.\n\n"
"**Example input 2:**\n"
"The message makes me more concerned about the health risks of COPD and smoking - Scale: 1-9. Your answer: 1\n"
"The message motivates me to not smoke. - Scale: 1-9. Your answer: 1\n"
"In your opinion, how harmful is smoking to your general health? - Scale: 0-6. Your answer: 6\n"
"How open are you to smoking in the future? - Scale: 1-9. Your answer: 1\n"
"**Example output 2:**\n"
"From this message, I recognize smoking is very harmful, but the content in the message didn't increase my concern or motivate me much. It does somewhat make me understand that smoking is harmful, however. Anyway, I'm not open to smoking in the future.\n\n"
"**Example input 3:**\n"
"The message makes me more concerned about the effects of lack of exercise - Scale: 1-9. Your answer: 4\n"
"The message motivates me to be more active - Scale: 1-9. Your answer: 3\n"
"How open are you to exercising regularly? - Scale: 1-9. Your answer: 4\n"
"**Example output 3:**\n"
"Through the message, I get that exercise matters and the message raised my awareness a bit, but the poster content itself didn't really motivate me. The content in the message has some small impact in motivating me to change my routine.\n\n"
# "**Example input 4:**\n"
# "The message makes me more concerned about the health risks of substance abuse - Scale: 1 (not at all) - 9 (extremely). Your answer: 6\n"
# "The message motivates me to not use substances. - Scale: 1 (not at all) - 9 (extremely). Your answer: 6\n"
# "In your opinion, how harmful is substance use to your general health? - Scale: 0 (not at all)-6 (extremely harmful). Your answer: 5\n"
# "How open are you to trying a substance in the future? - Scale: 1 (not at all)-9 (extremely). Your answer: 1\n"
# "**Example output 4:**\n"
# "This message somewhat makes me more concerned about the health risks of substance abuse motivates me not to use them. However, the message itself doesn't completely convince me that substance abuse is harmful. However, I'm not open to trying substance at all!!\n"
f"Start the response with '{opening_phrase}' (Style hint: {style_hint}; Lexical hint: {lexical_hint})\n"
f"Input: {answers_numeric}. "
)
# Contruct LLM message
messages = [
{"role": "user", "content": [
# {"type": "image"},
{"type": "text", "text": SYSTEM_PROMPT + USER_PROMPT}
]}
]
# input_text = tokenizer_aux.apply_chat_template(messages, add_generation_prompt = True)
# inputs = tokenizer_aux(
# # image.convert("RGB"),
# input_text,
# add_special_tokens = False,
# return_tensors = "pt",
# ).to(device)
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt = True)
inputs = tokenizer(
# image.convert("RGB"),
input_text,
add_special_tokens = False,
return_tensors = "pt",
).to(device)
############################
### Text LLM Streaming ###
############################
# generation with streamer
generate_kwargs = dict(
**inputs,
streamer=streamer, # streamer_aux,
max_new_tokens=512,
use_cache=True,
# min_p=0.3,
top_k=15,
temperature=0.8,
do_sample=True, # cfgs["stochastic"]
)
# separate thread to run generation
thread = threading.Thread(
target=model.generate, # model_aux.generate,
kwargs=generate_kwargs
)
thread.start()
# stream out generation
outputs = [
f"Emulated traits:\n {demo_info}\n" + '='*20 + "\n\n",
image_desc + "\n\n"
]
for new_token in streamer: # streamer_aux:
outputs.append(new_token)
final_output = ''.join(outputs)
yield final_output
# Ensure thread finishes
thread.join()
# text representation of final response
response = "".join(outputs[2:]) # ignore trait summary & image description
print(colored('Traits', 'green'), demo_info)
print(colored('Emulated response:', 'green'), response)
print('='*100)
################################################
# 5. Stage 3: provide explanation (demo purpose)
# => condition on {trait} AND {reponse}
################################################
SYSTEM_PROMPT = cfg_prompts["SYSTEM_SIM"]
SIM_PROMPT = ""
# prompt for role-playing information
SIM_PROMPT += f"You are: Demographics:\n{demo_info}\n"
# SIM_PROMPT += f"Your personality test shows you have (min score = 0; max score = 5):\nBig-Five Trait Scores:\n{persona_score}\n\n"
# SIM_PROMPT += f"You also have {locus}\n"
# situation description (role-playing)
SIM_PROMPT += cfg_prompts["SIMULATION_SIM"]
SIM_PROMPT += (
f"After seeing the uploaded impage, your response were {response}. "
"Briefly explain WHY you responded that way, based on your demographic background. "
f"Keep the explanation concise and direct. Start the response with '{opening_explain}' "
f"(Style hint: {style_hint}, concise; Lexical hint: {lexical_hint}). "
"Afterward, give a few *generic and succinct* suggestions to improve the poster's persuasiveness."
)
USER_PROMPT = SIM_PROMPT
# Contruct LLM message
messages = [
{"role": "user", "content": [
{"type": "image"},
{"type": "text", "text": SYSTEM_PROMPT + USER_PROMPT}
]}
]
# input_text = tokenizer_aux.apply_chat_template(messages, add_generation_prompt = True)
# inputs = tokenizer_aux(
# image.convert("RGB"),
# input_text,
# add_special_tokens = False,
# return_tensors = "pt",
# ).to(device)
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt = True)
inputs = tokenizer(
image.convert("RGB"),
input_text,
add_special_tokens = False,
return_tensors = "pt",
).to(device)
############################
### Text LLM Streaming ###
############################
# generation with streamer
generate_kwargs = dict(
**inputs,
streamer=streamer, # streamer_aux,
max_new_tokens=512,
use_cache=True,
min_p=0.85,
temperature=0.1,
do_sample=True, # cfgs["stochastic"]
)
# separate thread to run generation
thread = threading.Thread(
target=model.generate, # model_aux.generate,
kwargs=generate_kwargs
)
thread.start()
# stream out generation
# outputs = [image_desc + "\n\n"]
outputs += ["\n"]
for new_token in streamer: # streamer_aux:
outputs.append(new_token)
final_output = ''.join(outputs)
yield final_output
thread.join()
return answer
else:
"""###############################################################
Case 2: no health poster is uploaded
=> General Response to the health topic
=> not conditioned on any particular health poster
###############################################################"""
################################################
# 2. Construct SYSTEM and USER PROMPT
################################################
SYSTEM_PROMPT = (
"You are a person with unique demographic and personality traits. "
"Based on your background, you naturally have thoughts, feelings, and reactions to what you see."
)
SIM_PROMPT = ""
# prompt for role-playing information
SIM_PROMPT += f"You are: {demo_info}\n"
# SIM_PROMPT += f"Your personality test shows you have (min score = 0; max score = 5): {persona_score}\n"
# SIM_PROMPT += f"You also have {locus}\n"
# situation description (role-playing)
SIM_PROMPT += f"You are being asked a general question to share your *general* opinions and beliefs about a given health topic.\n"
################################################
# 3. LLM-enabled response prediction
# Predict Trait-aware Likert Scale Responses
################################################
assert cfgs["infer_engine"] == "unsloth", "Only unsloth inference is supported"
USER_PROMPT = SIM_PROMPT
USER_PROMPT += (
f"What are your *general* thoughts and opinions about the {health_topic} health topic? "
f" What's your attitude and feeling when talking about {health_topic} in general and why?"
f" How familiar are you with {health_topic}? How much do you care or know about it?"
f" Do you think {health_topic} is an important topic to talk about?"
f" What is its impacts and importance {health_topic} in society and your life? Why?"
f" Do you have any strong opinions about it?"
f" Are you interested in learning more about it?"
)
# instruction prompt to answer in proper format
USER_PROMPT += (
"Your personality, locus of control, and demographic traits influence your response. Adjust your style based on your demographic personality traits.\n"
"**STRICTLY FOLLOW THESE RULES:**\n"
"- Human-like, casual, everyday conversational response. Only answer the questions\n"
f"- The response MUST have a consistent health topic: {health_topic}.\n"
# "- Answer briefly in **5-7 sentences**.\n"
"- Only provide the answer. DO NOT REPEAT THE PROMPT!\n"
"- Condition your response on your *demographic/personality traits provided earlier, IGNORING the [Not specified] ones*.\n"
"- MUST provide *reasonable* and *informative* answers aligned with your background."
f"- Start the response with '{opening_generic}' ; {style_hint} {lexical_hint}\n"
# f"- Start the answer some variations of \'About my personal thoughts on *{health_topic}*, I \' \n"
# f"- Start the answer with something like: When thinking about {health_topic}, I ..."
)
# c. Contruct LLM message
# print("USER PROMPT:", USER_PROMPT)
messages = [
{"role": "user", "content": SYSTEM_PROMPT + USER_PROMPT}
]
assert "gemma" in cfgs["model"], "Currently only gemma model is supported for no-image input"
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt = True)
inputs = tokenizer(
input_text,
add_special_tokens = False,
return_tensors = "pt",
).to(device)
############################
### Text LLM Streaming ###
############################
# generation with streamer
generate_kwargs = dict(
**inputs,
streamer=streamer,
max_new_tokens=512,
use_cache=True,
# min_p=0.3,
top_k=15,
temperature=0.8,
do_sample=True, # cfgs["stochastic"]
)
# separate thread to run generation
thread = threading.Thread(
target=model.generate,
kwargs=generate_kwargs
)
thread.start()
# stream out generation
outputs = [f"Emulated traits:\n {demo_info}\n" + '='*20 + "\n\n"]
for new_token in streamer:
outputs.append(new_token)
final_output = ''.join(outputs)
yield final_output
thread.join()
except GeneratorExit:
print("User disconnected. Waiting for generation to complete...")
finally:
# Ensure cleanup happens even on normal finish or errors
if thread is not None and thread.is_alive():
thread.join()
torch.cuda.empty_cache()
"""###########################################################################
Evaluate a given model (specified in model_cfgs)
on posters with given test_style
Args:
+ cfgs : specify model type (e.g. gemma or llama),
data source, and export paths
+ prompts : set of prompts
Outputs:
=> save model in cfgs["export_path"] (CSV file)
+ if cfgs["export_path"] not exists, initialize it with cfgs["data_path"]
=> original survey data with ground-truth responses
+ add column "<model>:<version>": store AI-simulated responses
+ support concurrent evaluation on different jobs
##########################################################################"""
if __name__ == '__main__':
"""==========================================
1. load model settings & prompts format
=========================================="""
######################################
# Load model configs & prompts
######################################
model_cfg = "./configs/task1_demo_sph.yaml"
prompt_cfg = "./configs/prompts.yaml"
cfgs = load_config(model_cfg)
cfg_prompts = load_config(prompt_cfg)
"""==========================================
2. Evaluate model defined in configs
=========================================="""
print(colored('MODEL USE:', 'green'), cfgs["model"])
# print(prompts['SYSTEM'])
# print(prompts['INSTRUCTION'])
"""===============================
3. Initialize model
=> `model`, `tokenizer`
are initialized here
==============================="""
assert cfgs["infer_engine"] == "unsloth", "Only unsloth inference is supported"
assert cfgs["vision"] == True, "Must have vision input"
if cfgs["vision"]:
#################################################
### (1) MAIN MODEL
### => response emulation, fine-tuned model
#################################################
# WITH VISUAL STIMULI
model, tokenizer = FastVisionModel.from_pretrained(
model_name=cfgs["model"],
load_in_4bit=True,
)
FastVisionModel.for_inference(model)
if "gemma" in cfgs["model"]:
# gemma-specific tokenizer chat template
tokenizer = get_chat_template(
tokenizer,
chat_template = "gemma-3",
)
#################################################
### (2) AUXILLIARY MODEL
### => summarization model
### => larger (12b) for better summarization
#################################################
# model_aux, tokenizer_aux = FastVisionModel.from_pretrained(
# model_name=cfgs["model_summarize"],
# load_in_4bit=True,
# )
# FastVisionModel.for_inference(model)
# if "gemma" in cfgs["model"]:
# # gemma-specific tokenizer chat template
# tokenizer_aux = get_chat_template(
# tokenizer_aux,
# chat_template = "gemma-3",
# )
# # initialize streamer tokens
# streamer = TextIteratorStreamer(
# tokenizer, skip_prompt=True, skip_special_tokens=True
# )
# streamer_aux = TextIteratorStreamer(
# tokenizer_aux, skip_prompt=True, skip_special_tokens=True
# )
"""=============================================
4. User-input Dropdown Traits
============================================="""
#################################
### Gradio Interface ###
#################################
with gr.Blocks(theme="gradio/dark") as interface:
# --- Title Page with Logo ---
LOGO_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), "assets/umd_logo.png"))
gr.Image(value=LOGO_PATH, show_label=False, interactive=False, height=100)
gr.Markdown(
"""
<div style="text-align: center;">
<h1 style="margin-bottom: 0.5em;">
UMD AI-Empowered Response Prediction in Public Health Messaging
</h1>
</div>
<hr style="margin-top: 0.8em; margin-bottom: 0.8em;"> <!-- thinner spacing around line -->
<div style="text-align: center;">
<h2 style="margin-top: 0.3em; margin-bottom: 0.6em;">
User Guide
</h2>
</div>
<ul style="text-align: left; max-width: 800px; margin: auto;">
<li>This program emulates <b>demographic- and personality-conditioned responses</b> to public health posters using our trait-aligned Vision-Language Model (VLM).</li>
<li>To begin, (1) specify the target demographic traits, then (2) upload a public health poster to predict responses.</li>
<li>If a health poster is uploaded, the model first summarizes its understanding of the image.</li>
<li><b>Please note:</b>
<ul>
<li>Each interaction only uses the uploaded image and selected traits (no conversation history).</li>
<li>You don’t need to type any text prompt; just upload the Health Poster and click <b>Submit</b>.</li>
<li>If no poster or image is uploaded, the program automatically generates the emulated person’s <b>general opinion</b> on the selected Health Topic.</li>
<li>Please do not interrupt the generation process as it can lead to unexpected results. In case it happens, simply refresh the web app.</li>
<li><b>Limitation:</b> The model may generate less realistic emulations to some under-represented demographics in the survey dataset (e.g., Asian seniors). We are conducting more comprehensive survey to effectively address this limitation.</li>
</ul>
</li>
</ul>
<hr style="margin-top: 0.8em; margin-bottom: 1.2em;">
""",
elem_id="intro-section"
)
# Scroll to intro section on load
gr.HTML("""
<script>
window.onload = function() {
window.scrollTo({ top: 0, behavior: 'smooth' });
}
</script>
""")
##########################
### Demographic Traits ###
##########################
gr.Markdown("## 1. Please specify the target demographic traits to be emulated here:")
# Dropdowns (single-select, no custom values)
with gr.Row():
gender = gr.Dropdown(
label="Gender",
choices=TRAIT_VALUES["Gender"],
allow_custom_value=False,
value="Female",
)
age = gr.Dropdown(
label="Age",
choices=TRAIT_VALUES["Age"],
allow_custom_value=False,
value="25–34",
)
profession = gr.Dropdown(
label="Current Profession",
choices=TRAIT_VALUES["Current Profession"], # keep given order
allow_custom_value=False,
value="Student",
)
with gr.Row():
race = gr.Dropdown(
label="Race/Ethnicity",
choices=TRAIT_VALUES["Race/Ethnicity"],
allow_custom_value=False,
value="White/Caucasian",
)
religion = gr.Dropdown(
label="Religious/Cultural Group",
choices=TRAIT_VALUES["Religious/Cultural Group"],
allow_custom_value=False,
value="Leave Blank",
)
political = gr.Dropdown(
label="Political Affiliation",
choices=TRAIT_VALUES["Political Affiliation"],
allow_custom_value=False,
value="Leave Blank",
)
with gr.Row():
education = gr.Dropdown(
label="Highest Education",
choices=TRAIT_VALUES["Highest Education"],
allow_custom_value=False,
value="Leave Blank",
)
income = gr.Dropdown(
label="Annual Household Income",
choices=TRAIT_VALUES["Annual Household Income"],
allow_custom_value=False,
value="$75,000–$99,999",
)
family_status = gr.Dropdown(
label="Family Status",
choices=TRAIT_VALUES["Family Status"],
allow_custom_value=False,
value="Leave Blank"
)
# ##########################
# ### Big Five Traits ###
# ##########################
# gr.Markdown("## 1.b) Please adjust the Big Five Personality Traits to be emulated:")
# with gr.Accordion("Big Five Personality Traits (1 = very low, 5 = very high)", open=True):
# gr.Markdown(
# "Adjust the sliders to represent the target personality profile. "
# "Leave them as-is if not applicable."
# )
# with gr.Row():
# with gr.Column(scale=1):
# openness = gr.Slider(
# label="Open-Mindedness",
# minimum=1, maximum=5, step=0.2, value=2.5,
# interactive=True
# )
# with gr.Column(scale=1):
# conscientiousness = gr.Slider(
# label="Conscientiousness",
# minimum=1, maximum=5, step=0.2, value=2.5,
# interactive=True
# )
# with gr.Column(scale=1):
# extraversion = gr.Slider(
# label="Extraversion",
# minimum=1, maximum=5, step=0.2, value=2.5,
# interactive=True
# )
# with gr.Row():
# with gr.Column(scale=1):
# neuroticism = gr.Slider(
# label="Neuroticism",
# minimum=1, maximum=5, step=0.2, value=2.5,
# interactive=True
# )
# with gr.Column(scale=1):
# agreeableness = gr.Slider(
# label="Agreeableness",
# minimum=1, maximum=5, step=0.2, value=2.5,
# interactive=True
# )
# gr.Column(scale=1) # right spacer
##########################
### Health Topic ###
##########################
gr.Markdown("## 2. Please specify the main Health Topic of the poster here:")
# ---- dropdown at ~50% page width and centered ----
with gr.Row():
with gr.Column(scale=1):
health_topic = gr.Dropdown(
label="Health Topic",
choices=HEALTH_TOPICS,
allow_custom_value=False,
)
gr.Column(scale=1) # right spacer
##########################
### Chat interface ###
##########################
gr.Markdown("## 3. Upload Public Health Poster here (if no poster is uploaded, the model emulates General Response to the topic):")
gr.Markdown("""
#### ▶️ Use Case 1: Poster-Based Response
+ Upload **only one** poster image — the first file is the one processed.
+ The model has **no memory**, so re-upload the image for each new request.
+ Must choose a **Health Topic** that matches the poster content for best results.
+ No text prompt is needed: upload the poster and click **Submit**.
#### ▶️ Use Case 2: General Response (No Poster)
+ Simply select a Health Topic and click **Send**.
"""
)
gr.Markdown("""
### 📘 Important Notes
- ⚠️ **Do not interrupt the generation process.** Stopping midway can cause backend issues. Please allow the response to complete.
- 🏷️ Before uploading a poster, select its **corresponding health topic**.
- 🎯 For the best experience, ensure the **topic accurately matches the poster content**.
- 🧩 If you choose not to upload a poster, the model will produce a **general, trait-conditioned response** for the selected topic.
""")
chat = gr.ChatInterface(
fn=vlm_response,
multimodal=True, # text + image
title=f"Vision-Language Model: Trait-Conditioned Response Emulation",
type="messages",
additional_inputs=[
health_topic, gender, age, profession, race, religion,
political, education, income, family_status,
# extraversion, agreeableness, conscientiousness, neuroticism, openness,
],
chatbot=gr.Chatbot(height=500), # height=330
autofocus=False,
)
"""=============================================
5. Chat Interface Launch
============================================="""
interface.queue(
max_size=20,
default_concurrency_limit=1,
).launch(
share=True,
max_threads=1,
# show_error=True,
# prevent_thread_lock=False,
# debug=True,
) |