|
|
import streamlit as st |
|
|
import leafmap.foliumap as leafmap |
|
|
import base64 |
|
|
|
|
|
import random |
|
|
st.set_page_config(layout="wide") |
|
|
|
|
|
|
|
|
st.logo( |
|
|
"./static/logo1.png", |
|
|
link="https://nicedata.eu.org/" |
|
|
) |
|
|
|
|
|
|
|
|
st.sidebar.title("Contact") |
|
|
with st.sidebar: |
|
|
st.info( |
|
|
""" |
|
|
- Email: [sh.wang4067@gmail.com](mailto:sh.wang4067@gmail.com) |
|
|
- Tel: +86 181-1615-2720 |
|
|
- Homepage: [nicedata.eu.org](https://nicedata.eu.org) |
|
|
- Github: [wdzhwsh4076](https://github.com/wdzhwsh4076) |
|
|
- Address: Boda Campus, Xinjiang University, Urumqi City, China |
|
|
""" |
|
|
) |
|
|
p1, p2 = st.columns(2) |
|
|
with p2: |
|
|
|
|
|
st.subheader("") |
|
|
st.image("./static/self.jpeg",width=200) |
|
|
with p1: |
|
|
|
|
|
st.title("Shaohuang Wang") |
|
|
|
|
|
st.markdown( |
|
|
""" |
|
|
I'm Shaohuang Wang, a Computer Science Master's student at Xinjiang University, focusing on Machine Learning and Data Structures. |
|
|
|
|
|
My research interests include Large Language Models and Recommender Systems. I'm also a developer at NLPIR Lab, working on NLP and data processing technologies. |
|
|
|
|
|
[Email](mailto:sh.wang4067@gmail.com) / [CV](https://nicedata.eu.org) / [Bio](https://nicedata.eu.org) / [Google Scholar](https://scholar.google.com) / [Twitter](https://twitter.com) / [Github](https://github.com/wdzhwsh4076) |
|
|
|
|
|
""" |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
st.markdown('<a name="customizable-border"></a>', unsafe_allow_html=True) |
|
|
st.header("👨🏻🎓 Education", divider="rainbow") |
|
|
st.markdown(""" |
|
|
### Xinjiang University, China |
|
|
- **M.Eng. in Computer Science** (2022.09 - Present) |
|
|
- GPA: 3.5/4.0 |
|
|
- Core Course: Machine Learning, Computer Networks, Data Structures |
|
|
- Thesis: Research on scientific and technological information recommendation via LLM |
|
|
- Research Interest: LLM, RAG, SFT, Fine-tuning, Recommend System |
|
|
|
|
|
### Shanghai University of Engineering Science, China |
|
|
- **B.Eng. in Vehicle Engineering** (2016.09 - 2020.06) |
|
|
- GPA: 3.1/4.0 |
|
|
- Awards: National Scholarship (Top 1%), First-Class Scholarship (Top 3%) |
|
|
""" |
|
|
) |
|
|
|
|
|
|
|
|
st.markdown('<a name="customizable-border"></a>', unsafe_allow_html=True) |
|
|
st.header("🧑🏻💻 Publication", divider="rainbow") |
|
|
def get_file_url(path): |
|
|
file_ = open(path, "rb") |
|
|
contents = file_.read() |
|
|
data_url = base64.b64encode(contents).decode("utf-8") |
|
|
file_.close() |
|
|
return data_url |
|
|
p1, p2 = st.columns(2) |
|
|
old_skeleton_url = get_file_url("./static/lora.png") |
|
|
new_skeleton_url = get_file_url("./static/rag.png") |
|
|
with p1: |
|
|
st.subheader("Bypassing LLM Safeguards: The In-Context Tense Attack Approach.") |
|
|
st.info("Wang, S et al. International Conference on Computer Engineering and Networks, 2406.12243 (2024). (Accepted)") |
|
|
st.markdown(""" |
|
|
We explore the power of combining tense attacks with in-context examples in manipulating the security of LLMs and propose In-Context Tense Attack (ITA) for jailbreaking purposes. |
|
|
""" |
|
|
) |
|
|
f1, f2, f3 = st.columns(3) |
|
|
with f1: |
|
|
with open("./static/lora.png", "rb") as file1: |
|
|
btn1 = st.download_button( |
|
|
key='4', |
|
|
label="Arxiv", |
|
|
data=file1, |
|
|
file_name="flower.png", |
|
|
mime="image/png", |
|
|
type="primary", |
|
|
use_container_width=True, |
|
|
) |
|
|
with f2: |
|
|
with open("./static/lora.png", "rb") as file2: |
|
|
btn = st.download_button( |
|
|
key='1', |
|
|
label="PDF", |
|
|
data=file2, |
|
|
file_name="flower.png", |
|
|
mime="image/png", |
|
|
type="secondary", |
|
|
use_container_width=True, |
|
|
) |
|
|
with f3: |
|
|
st.link_button("Cite", "https://streamlit.io/gallery", use_container_width=True,) |
|
|
|
|
|
with p2: |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
st.image("./static/paper1.png",width=450) |
|
|
st.caption(""" |
|
|
Tense Attack is a technique targeting Large Language Models (LLMs) that |
|
|
exploits the models’ potential vulnerability when processing requests phrased in |
|
|
the past tense, |
|
|
""") |
|
|
|
|
|
st.divider() |
|
|
p1, p2 = st.columns(2) |
|
|
old_skeleton_url = get_file_url("./static/lora.png") |
|
|
new_skeleton_url = get_file_url("./static/rag.png") |
|
|
with p2: |
|
|
st.subheader("CherryRec: Enhancing News Recommendation Quality via LLM driven Framework.") |
|
|
st.info("Wang, S et al. CherryRec: Enhancing News Recommendation Quality via LLM driven Framework. ICASSP (2025). (Under Review)") |
|
|
st.markdown(""" |
|
|
Introduced CherryRec, a news recommendation framework using LLMs to filter low-value news and recommend high-quality news by understanding user preferences and integrating multi-dimensional scores. |
|
|
""" |
|
|
) |
|
|
f1, f2, f3 = st.columns(3) |
|
|
with f1: |
|
|
with open("./static/lora.png", "rb") as file1: |
|
|
btn = st.download_button( |
|
|
key='2', |
|
|
label="Arxiv", |
|
|
data=file1, |
|
|
file_name="flower.png", |
|
|
mime="image/png", |
|
|
type="primary", |
|
|
use_container_width=True, |
|
|
) |
|
|
with f2: |
|
|
with open("./static/lora.png", "rb") as file2: |
|
|
btn = st.download_button( |
|
|
key='3', |
|
|
label="PDF", |
|
|
data=file2, |
|
|
file_name="flower.png", |
|
|
mime="image/png", |
|
|
type="secondary", |
|
|
use_container_width=True, |
|
|
) |
|
|
with f3: |
|
|
st.link_button("Cite", "https://streamlit.io/gallery",use_container_width=True,) |
|
|
|
|
|
with p1: |
|
|
st.image("./static/paper2-1.png",width=450) |
|
|
st.caption(""" |
|
|
Knowledge-aware News Rapid Selector (KnRS) quickly identifies relevant |
|
|
news candidates by assessing user interaction history and content attributes. |
|
|
Content-aware News LLM Evaluator (CnLE) refines selections using a fine-tuned |
|
|
LLM, deeply understanding user preferences to enhance personalized news rec- |
|
|
ommendations. |
|
|
""") |
|
|
with st.expander("A list of other publications "): |
|
|
st.markdown( |
|
|
""" |
|
|
1. **Wang, S** et al. "Bypassing LLM Safeguards: The In-Context Tense Attack Approach." International Conference on Computer Engineering and Networks, 2406.12243 (2024). (Accepted) |
|
|
2. **Wang, S** et al. "CherryRec: Enhancing News Recommendation Quality via LLM driven Framework." ICASSP (2025). (Under Review) |
|
|
3. Liang, Y & **Wang, S** et al. "LLaMA-MoT: A Cost-Effective Framework for Visual-Linguistic Instruction Tuning Based on Multi-Head Adapters and Chain-of-Thought." ESWA (2024). (Under Review) |
|
|
4. **Wang, S** et al. "An agile construction method of instruction fine-tuning dataset based on semi-structured data." Patent (2024). (Submitted) |
|
|
5. **Wang, S** et al. "Finite element analysis of modular automotive body based on Ansys." Guangxi Journal of Light Industry (2020). (Accepted) |
|
|
6. **Wang, S** et al. "Buffer connecting device for vehicle." Patent (2020). (Accepted) |
|
|
|
|
|
*Still in possession of 8 patents, along with various other publications.* |
|
|
""" |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
st.markdown('<a name="customizable-border"></a>', unsafe_allow_html=True) |
|
|
st.header("🧑🏻🏫 Research Experience", divider="rainbow") |
|
|
st.markdown(""" |
|
|
### Domain Information Tracking and Processing Project |
|
|
- **Developer@NLPIR Lab** (2022.09 - Present) |
|
|
- Responsible for the development of the algorithm tool layer, including data collection, review and correction, dynamic selection, keyword extraction, and briefing generation algorithms. |
|
|
- Utilized Elasticsearch and MySQL databases for data storage and processing, optimizing data query and analysis processes. |
|
|
- Achieved rapid system deployment and front-end and back-end separation design through Docker, simplifying operations and maintenance and enhancing system maintainability. |
|
|
- **Technology Stack**: Python, Elasticsearch, MySQL, Docker, Vue.JS, FastAPI |
|
|
|
|
|
### Doc2QA Framework for Large Language Model SFT Datasets |
|
|
- **Developer@NLPIR Lab** (2023.04 - Present) |
|
|
- Designed and released a comprehensive dataset for QA instruction fine-tuning using semi-structured data, providing a valuable resource for future research. |
|
|
- Developed a novel framework, "Doc2QA" based on Large Language Models (LLMs) to generate question-answer pairs from semi-structured data such as HTML, DOC, and PDF. |
|
|
- **Technology Stack**: Python, Llama-factory, Vllm, FastAPI, Docker, JavaScript |
|
|
""" |
|
|
) |
|
|
st.markdown('<a name="customizable-border"></a>', unsafe_allow_html=True) |
|
|
st.header("🚵🏻♂️ Skills", divider="rainbow") |
|
|
st.markdown( |
|
|
""" |
|
|
- **Proficient in Coding**: Python, FastAPI, Elasticsearch, Docker, Vue.Js, Nginx |
|
|
- **Model Training in AI/ML**: PyTorch, TensorFlow, Llama-index, Vllm, Llama-factory |
|
|
- **Simulation and Design**: AutoCAD, CATIA, SolidWorks, ANSYS, 3DMax, Adobe Photoshop/Illustrator |
|
|
- **Languages**: Chinese (native), English (IELTS: 6.5, L: 6.5 R: 7.5 W: 6.0 S: 6.0), Japanese (JLPT-N2) |
|
|
""" |
|
|
) |
|
|
st.image( |
|
|
"./static/stack.png", |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|