@techreport{TD:101057,
	att_abstract={{Motivated by the multi-modal way that humans detect voice activity, in the computer science and signal processing communities voice activity detection (VAD) has seen great improvement by combining multiple modalities, in particular employing both acoustic and visual (AV) information.  When dealing with multiple modalities, the question soon arises, "how do we fuse the modalities?".  Prior work treats this problem in one of two ways, via feature fusion or via decision fusion.  In this work we present a new AV-VAD modality fusion method which combines both approaches; we allow individual independent classifiers to provide feedback and then use this feedback, in combination with the raw fused features, as inputs to a second classifier.  We call this approach "augmented multi-tier classification" since we concatenate the output of a set of base classifiers with the original fused features (we augment the output).  Our experiments show an improvement over other common fusion schemes, especially when faced with large amounts of acoustic noise.}},
	att_authors={dd734j, ez2685},
	att_categories={C_IIS.9},
	att_copyright={{International Speech Communication Association}},
	att_copyright_notice={{This version of the work is reprinted here with permission of IEEE for your personal use. Not for redistribution. The definitive version was published in 2012. {{, 2013-08-26}}{{, http://www.interspeech2013.org}}
}},
	att_donotupload={},
	att_private={false},
	att_projects={},
	att_tags={Multi-Modal Fusion,  Computer Vision,  Voice Activity Detection},
	att_techdoc={true},
	att_techdoc_key={TD:101057},
	att_url={http://web1.research.att.com:81/techdocs_downloads/TD:101057_DS1_2012-11-29T22:16:05.702Z.pdf},
	author={Dimitrios Dimitriadis and Eric Zavesky and Matthew Burlick, Stevens Institute of Technology},
	institution={{Annual Conference of International Speech Communication Association (Interspeech) }},
	month={August},
	title={{AN AUGMENTED MULTI-TIERED CLASSIFIER FOR INSTANTANEOUS MULTI-MODAL VOICE ACTIVITY DETECTION}},
	year=2013,
}