@unpublished{Stoffel2012, author = {Alexander Stoffel}, title = {Audio Hashing for Spam Detection Using the Redundant Discrete Wavelet Transform}, institution = {Fakult{\"a}t 07 / Institut f{\"u}r Nachrichtentechnik}, year = {2012}, abstract = {For audio signals, we use the sign of the coefficients of the redundant discrete wavelet transform to generate primary hash vectors assigning bit 1 to positive or zero coefficients and bit 0 in the negative case. Discarding the highest frequency band and using a 6 step transform we get for each sample a 6 bit primary hash value which we may save as an integer. We then select a possible primary hash value (in our experiments we chose 0 or 63) and take the time indices where this primary hash value occurs as the secondary hash vector which is attributed to the whole audio signal. Comparing two audio signals, the number of elements in the intersection of the corresponding time indices are called \"number of matches\", a high number may indicate a similarity between the files. This secondary hash vector turns out to be robust against addition of noise, GSM-, G.726-, MP3 coding and packet loss. It may therefore be useful to detect spam telephone calls without analyzing the semantic content by the similarity of the corresponding signals. An algorithm is given to detect similar but shifted signals. Results of experiments are reported using a test corpus of 5 000 audio files of regular calls and 200 audio files of different versions of 20 original spam recordings augmented by a set of 45 files of different versions of 9 \"special spam\" signals.}, language = {en} }