@techreport{TD:100455,
	att_abstract={{We propose a simple, yet novel, multi-layer model for the problem of phonetic classification. Our model combines the frame level transformation of the acoustic signal with the segment level transformation via a temporal pooling architecture to compute class conditional probabilities of phones. Without the use of any phonetic knowledge, our model achieved the state-of-the-art performance on the TIMIT phone classification task. The flexibility of our model allows us to mix a variety of pooling architectures, leading 
to further significant performance improvements.}},
	att_authors={sc984q, ph2326, dd734j},
	att_categories={C_CCF.5, C_CCF.2, C_IIS.11},
	att_copyright={{International Speech Communication Association}},
	att_copyright_notice={{The definitive version was published in 12th Annual Conference of the International Speech Communication Association. {{, 2011-08-27}}
}},
	att_donotupload={},
	att_private={false},
	att_projects={},
	att_tags={deep networks, neural networks, Time Delay Neural Network (TDNN), ensemble methods, phoneme classification},
	att_techdoc={true},
	att_techdoc_key={TD:100455},
	att_url={http://web1.research.att.com:81/techdocs_downloads/TD:100455_DS1_2011-04-01T00:53:43.949Z.pdf},
	author={Sumit Chopra and Patrick Haffner and Dimitrios Dimitriadis},
	institution={{12th Annual Conference of the International Speech Communication Association}},
	month={August},
	title={{Combining Frame and Segment Level Processing via  Temporal Pooling for Phonetic Classification}},
	year=2011,
}