Using UniHSI, we use LLMs to plan and control multi-agent interaction and synthesis interesting videos. Note: the ''multi-agent interaction'' can be just fulfilled at the commands level at the current stage.
@inproceedings{
xiao2024unified,
title={Unified Human-Scene Interaction via Prompted Chain-of-Contacts},
author={Zeqi Xiao and Tai Wang and Jingbo Wang and Jinkun Cao and Wenwei Zhang and Bo Dai and Dahua Lin and Jiangmiao Pang},
booktitle={The Twelfth International Conference on Learning Representations},
year={2024},
url={https://openreview.net/forum?id=1vCnDyQkjg}
}