diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1a2c44f --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +*.pyc +.DS_Store + +.tox/ diff --git a/audioread/__init__.py b/audioread/__init__.py index 740bc55..93ef684 100644 --- a/audioread/__init__.py +++ b/audioread/__init__.py @@ -70,14 +70,14 @@ def _mad_available(): return True -def audio_open(path): +def audio_open(path, block_samples=4096): """Open an audio file using a library that is available on this system. """ # Standard-library WAV and AIFF readers. from . import rawread try: - return rawread.RawAudioFile(path) + return rawread.RawAudioFile(path, block_samples=block_samples) except DecodeError: pass @@ -85,7 +85,7 @@ def audio_open(path): if _ca_available(): from . import macca try: - return macca.ExtAudioFile(path) + return macca.ExtAudioFile(path, block_samples=block_samples) except DecodeError: pass @@ -93,7 +93,7 @@ def audio_open(path): if _gst_available(): from . import gstdec try: - return gstdec.GstAudioFile(path) + return gstdec.GstAudioFile(path, block_samples=block_samples) except DecodeError: pass @@ -101,14 +101,14 @@ def audio_open(path): if _mad_available(): from . import maddec try: - return maddec.MadAudioFile(path) + return maddec.MadAudioFile(path, block_samples=block_samples) except DecodeError: pass # FFmpeg. from . import ffdec try: - return ffdec.FFmpegAudioFile(path) + return ffdec.FFmpegAudioFile(path, block_samples=block_samples) except DecodeError: pass diff --git a/audioread/ffdec.py b/audioread/ffdec.py index 8306267..c207af7 100644 --- a/audioread/ffdec.py +++ b/audioread/ffdec.py @@ -100,7 +100,7 @@ def popen_multiple(commands, command_args, *args, **kwargs): class FFmpegAudioFile(object): """An audio file decoded by the ffmpeg command-line utility.""" - def __init__(self, filename, block_size=4096): + def __init__(self, filename, block_samples=4096): # On Windows, we need to disable the subprocess's crash dialog # in case it dies. Passing SEM_NOGPFAULTERRORBOX to SetErrorMode # disables this behavior. @@ -143,7 +143,7 @@ def __init__(self, filename, block_size=4096): # Start another thread to consume the standard output of the # process, which contains raw audio data. - self.stdout_reader = QueueReaderThread(self.proc.stdout, block_size) + self.stdout_reader = QueueReaderThread(self.proc.stdout, blocksize=block_samples) self.stdout_reader.start() # Read relevant information from stderr. @@ -152,7 +152,7 @@ def __init__(self, filename, block_size=4096): # Start a separate thread to read the rest of the data from # stderr. This (a) avoids filling up the OS buffer and (b) # collects the error output for diagnosis. - self.stderr_reader = QueueReaderThread(self.proc.stderr) + self.stderr_reader = QueueReaderThread(self.proc.stderr, blocksize=block_samples) self.stderr_reader.start() def read_data(self, timeout=10.0): diff --git a/audioread/macca.py b/audioread/macca.py index e1903c0..01b5897 100644 --- a/audioread/macca.py +++ b/audioread/macca.py @@ -195,7 +195,7 @@ class ExtAudioFile(object): >>> do_something(block) """ - def __init__(self, filename): + def __init__(self, filename, block_samples=4096): url = CFURL(filename) try: self._obj = self._open_url(url) @@ -204,6 +204,7 @@ def __init__(self, filename): raise del url + self.block_samples = block_samples self.closed = False self._file_fmt = None self._client_fmt = None @@ -295,9 +296,11 @@ def setup(self, bitdepth=16): newfmt.mBytesPerFrame = newfmt.mBytesPerPacket self.set_client_format(newfmt) - def read_data(self, blocksize=4096): + def read_data(self, blocksize=None): """Generates byte strings reflecting the audio data in the file. """ + blocksize = blocksize or self.block_samples * self._client_fmt.mBytesPerFrame + frames = ctypes.c_uint(blocksize // self._client_fmt.mBytesPerFrame) buf = ctypes.create_string_buffer(blocksize) @@ -323,6 +326,11 @@ def read_data(self, blocksize=4096): blob = data[:size] yield blob + def seek(self, pos): + """Seek to a frame position in the file.""" + check(_coreaudio.ExtAudioFileSeek(self._obj, pos)) + + def close(self): """Close the audio file and free associated memory.""" if not self.closed: diff --git a/audioread/maddec.py b/audioread/maddec.py index 8dd7aaa..9d3c058 100644 --- a/audioread/maddec.py +++ b/audioread/maddec.py @@ -23,7 +23,8 @@ class UnsupportedError(DecodeError): class MadAudioFile(object): """MPEG audio file decoder using the MAD library.""" - def __init__(self, filename): + def __init__(self, filename, block_samples=4096): + self.block_samples = block_samples self.fp = open(filename, 'rb') self.mf = mad.MadFile(self.fp) if not self.mf.total_time(): # Indicates a failed open. @@ -36,11 +37,12 @@ def close(self): if hasattr(self, 'mf'): del self.mf - def read_blocks(self, block_size=4096): + def read_blocks(self, block_size=None): """Generates buffers containing PCM data for the audio file. """ + block_samples = block_size or self.block_samples while True: - out = self.mf.read(block_size) + out = self.mf.read(block_samples) if not out: break yield out diff --git a/audioread/rawread.py b/audioread/rawread.py index 2ff476a..7126ef2 100644 --- a/audioread/rawread.py +++ b/audioread/rawread.py @@ -57,8 +57,9 @@ class RawAudioFile(object): """An AIFF, WAV, or Au file that can be read by the Python standard library modules ``wave``, ``aifc``, and ``sunau``. """ - def __init__(self, filename): + def __init__(self, filename, block_samples=1024): self._fh = open(filename, 'rb') + self.block_samples = block_samples try: self._file = aifc.open(self._fh) @@ -71,7 +72,7 @@ def __init__(self, filename): return try: - self._file = wave.open(self._fh) + self._file = wave.open(self._fh, 'r') except wave.Error: self._fh.seek(0) pass @@ -107,6 +108,11 @@ def close(self): self._file.close() self._fh.close() + def seek(self, pos): + """Seek to a frame position in the file.""" + # All three libraries have the same method for seeking + self._file.setpos(pos) + @property def channels(self): """Number of audio channels.""" @@ -122,8 +128,14 @@ def duration(self): """Length of the audio in seconds (a float).""" return float(self._file.getnframes()) / self.samplerate - def read_data(self, block_samples=1024): + @property + def nframes(self): + """Gets the number of frames in the source file.""" + return self._file.getnframes() + + def read_data(self, block_samples=None): """Generates blocks of PCM data found in the file.""" + block_samples = block_samples or self.block_samples old_width = self._file.getsampwidth() while True: diff --git a/derp.py b/derp.py new file mode 100644 index 0000000..6832fab --- /dev/null +++ b/derp.py @@ -0,0 +1,9 @@ +import mad +import os +tf = os.path.abspath(os.path.join('test', 'fixtures', 'wavetest.wav')) + +fp = open(tf, 'rb') +mf = mad.MadFile(fp) + +print('mf.total_time', mf.total_time()) +print(mf.read()) diff --git a/test.py b/test.py new file mode 100644 index 0000000..35fa6de --- /dev/null +++ b/test.py @@ -0,0 +1,10 @@ +import unittest +import sys + +if __name__ == '__main__': + loader = unittest.TestLoader() + tests = loader.discover('test') + testRunner = unittest.runner.TextTestRunner() + result = testRunner.run(tests) + if not result.wasSuccessful(): + sys.exit(1) \ No newline at end of file diff --git a/test/fixtures/README.md b/test/fixtures/README.md new file mode 100644 index 0000000..c248300 --- /dev/null +++ b/test/fixtures/README.md @@ -0,0 +1,23 @@ +Audio file fixtures for the tests. + +#### test.wav +Test.wav was produced by doing: + +```py +import numpy as np +from scipy.io import wavfile + +if __name__ == '__main__': + size = 512 + a = np.full((size, ), 0.) + b = np.full((size, ), 0.2) + c = np.full((size, ), 0.5) + d = np.full((size, ), 0.9) + t = np.concatenate((a, b, c, d)) + + wavfile.write('test.wav', 44100, t) +``` + +#### wavetest.wav + +Produced with `make_test_wave.py` diff --git a/test/fixtures/make_test_wave.py b/test/fixtures/make_test_wave.py new file mode 100644 index 0000000..37ea73e --- /dev/null +++ b/test/fixtures/make_test_wave.py @@ -0,0 +1,22 @@ +import numpy as np +import wave +import struct + +def getData(): + size = 512 + + a = np.full((size, ), 0., dtype=np.float16) + b = np.full((size, ), 0.2, dtype=np.float16) + c = np.full((size, ), 0.5, dtype=np.float16) + d = np.full((size, ), 0.9, dtype=np.float16) + return np.concatenate((a, b, c, d)) + + +if __name__ == '__main__': + fout = wave.open('test/fixtures/wavetest.wav', 'w') + data = getData() + fout.setnchannels(1) + fout.setframerate(44100) + fout.setsampwidth(2) + fout.writeframes(data.tobytes()) + fout.close() diff --git a/test/fixtures/mp3test.mp3 b/test/fixtures/mp3test.mp3 new file mode 100644 index 0000000..15f79d2 Binary files /dev/null and b/test/fixtures/mp3test.mp3 differ diff --git a/test/fixtures/sample.mp3 b/test/fixtures/sample.mp3 new file mode 100644 index 0000000..30edb48 Binary files /dev/null and b/test/fixtures/sample.mp3 differ diff --git a/test/fixtures/test.wav b/test/fixtures/test.wav new file mode 100644 index 0000000..a8b5271 Binary files /dev/null and b/test/fixtures/test.wav differ diff --git a/test/fixtures/wavetest.wav b/test/fixtures/wavetest.wav new file mode 100644 index 0000000..5d29340 Binary files /dev/null and b/test/fixtures/wavetest.wav differ diff --git a/test/test_audioread.py b/test/test_audioread.py new file mode 100644 index 0000000..7881942 --- /dev/null +++ b/test/test_audioread.py @@ -0,0 +1,93 @@ +import os +import unittest +import audioread + +numSamples = 512 + +testFilename = os.path.abspath(os.path.join('test', 'fixtures', 'wavetest.wav')) +rowLookup = [ + b'\x00\x00', + b'f2', + b'\x008', + b'3;', +] + +class TestAudioreadWav(unittest.TestCase): + + def test_audio_open_as_generator(self): + result = [] + with audioread.audio_open(testFilename, block_samples=numSamples) as f: + print('wav decode class', f.__class__) + gen = f.read_data() + try: + while True: + data = next(gen) + result.append(data) + except StopIteration: + pass + + self.assertEqual(len(bytes(result[0])), numSamples*2) + self.assertEqual(len(rowLookup), len(result)) + for i, row in enumerate(result): + self.assertEqual(bytes(row[0:2]), rowLookup[i]) + + + def test_audio_open_as_forloop(self): + result = [] + with audioread.audio_open(testFilename, block_samples=numSamples) as f: + self.assertEqual(f.nframes, 2048) + for buf in f: + result.append(buf) + + self.assertEqual(len(bytes(result[0])), numSamples*2) + self.assertEqual(len(rowLookup), len(result)) + for i, row in enumerate(result): + self.assertEqual(bytes(row[0:2]), rowLookup[i]) + + +mp3TestFilename = os.path.abspath(os.path.join('test', 'fixtures', 'sample.mp3')) +mp3RowLookup = [ + b'\x00\x00', + b'\x00\x00', + b'N\xff', + b'\xe8/', + b'.5', + b'\x089', + b'\x00\x00', +] + +class TestAudioreadMp3(unittest.TestCase): + + def test_audio_open_as_generator(self): + result = [] + with audioread.audio_open(mp3TestFilename, block_samples=numSamples) as f: + print('Mp3 decode class', f.__class__) + gen = f.read_data() + try: + while True: + data = next(gen) + result.append(data) + except StopIteration: + pass + + self.assertEqual(len(bytes(result[0])), numSamples*2) + self.assertEqual(len(mp3RowLookup), len(result)) + for i, row in enumerate(result): + self.assertEqual(bytes(row[0:2]), mp3RowLookup[i]) + + + def test_audio_open_as_forloop(self): + result = [] + with audioread.audio_open(mp3TestFilename, block_samples=numSamples) as f: + # self.assertEqual(f.nframes, 4) + for buf in f: + result.append(buf) + + self.assertEqual(len(bytes(result[0])), numSamples*2) + self.assertEqual(len(mp3RowLookup), len(result)) + for i, row in enumerate(result): + self.assertEqual(bytes(row[0:2]), mp3RowLookup[i]) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_ffdec.py b/test/test_ffdec.py new file mode 100644 index 0000000..7a0c976 --- /dev/null +++ b/test/test_ffdec.py @@ -0,0 +1,72 @@ +import os +import unittest +import audioread +from audioread import ffdec + +testFilename = os.path.abspath(os.path.join('test', 'fixtures', 'sample.mp3')) +rowLookup = [ + b'\x01\x00', + b'w\x00', + b'\xf6&', + b'\xe8/', + b'v4', + b'f5', + b'~7', + b'\x9a7', + b'C\t', + b'\xfb\xff', +] +numSamples = 512 + +class TestFFDec(unittest.TestCase): + + def test_open_as_generator(self): + result = [] + with ffdec.FFmpegAudioFile(testFilename, block_samples=numSamples) as input_file: + print('input_file duration', input_file.duration) + gen = input_file.read_data() + try: + while True: + data = next(gen) + result.append(data) + except StopIteration: + pass + + self.assertEqual( + (len(rowLookup), len(bytes(result[0]))), + (len(result), numSamples) + ) + for i, row in enumerate(result): + self.assertEqual(bytes(row[0:2]), rowLookup[i]) + + + def test_open_as_forloop(self): + result = [] + with ffdec.FFmpegAudioFile(testFilename, block_samples=numSamples) as input_file: + for buf in input_file: + result.append(buf) + + self.assertEqual( + (len(rowLookup), len(bytes(result[0]))), + (len(result), numSamples) + ) + for i, row in enumerate(result): + self.assertEqual(bytes(row[0:2]), rowLookup[i]) + + @unittest.skip('wip') + def test_seek(self): + result = [] + with ffdec.FFmpegAudioFile(testFilename, block_samples=numSamples) as input_file: + gen = input_file.read_data() + + # move forward + row = next(gen) + row = next(gen) + row = next(gen) + + # go back + input_file.seek(512) + row = next(gen) + self.assertEqual(bytes(row[0:2]), rowLookup[1]) + row = next(gen) + self.assertEqual(bytes(row[0:2]), rowLookup[2]) diff --git a/test/test_gstdec.py b/test/test_gstdec.py new file mode 100644 index 0000000..c6b0afb --- /dev/null +++ b/test/test_gstdec.py @@ -0,0 +1,70 @@ +import os +import unittest +import audioread + +gstAvailible = audioread._gst_available() +if gstAvailible: + from audioread import gstdec + + +testFilename = os.path.abspath(os.path.join('test', 'fixtures', 'mp3test.mp3')) +rowLookup = [ + b'\x01\x00', + b'w\x00', + b'\xf6&', + b'\xe8/', + b'v4', + b'f5', + b'~7', + b'\x9a7', + b'C\t', + b'\xfb\xff', +] +numSamples = 512 + +print('gstAvailible', gstAvailible) +@unittest.skipIf(not gstAvailible, 'Not supported') +class TestGstDec(unittest.TestCase): + + def test_open_as_generator(self): + result = [] + with gstdec.GstAudioFile(testFilename, block_samples=numSamples) as input_file: + gen = input_file.read_data() + try: + while True: + data = next(gen) + result.append(data) + except StopIteration: + pass + + self.assertEqual(len(bytes(result[0])), numSamples*2) + self.assertEqual(len(rowLookup), len(result)) + for i, row in enumerate(result): + self.assertEqual(bytes(row[0:2]), rowLookup[i]) + + + def test_open_as_forloop(self): + result = [] + with gstdec.GstAudioFile(testFilename, block_samples=numSamples) as input_file: + for buf in input_file: + result.append(buf) + + for i, row in enumerate(result): + self.assertEqual(bytes(row[0:2]), rowLookup[i]) + + def test_seek(self): + result = [] + with gstdec.GstAudioFile(testFilename, block_samples=numSamples) as input_file: + gen = input_file.read_data() + + # move forward + row = next(gen) + row = next(gen) + row = next(gen) + + # go back + input_file.seek(512) + row = next(gen) + self.assertEqual(bytes(row[0:2]), rowLookup[1]) + row = next(gen) + self.assertEqual(bytes(row[0:2]), rowLookup[2]) diff --git a/test/test_macca.py b/test/test_macca.py new file mode 100644 index 0000000..8dbd8a9 --- /dev/null +++ b/test/test_macca.py @@ -0,0 +1,67 @@ +import os +import unittest +import audioread +maccaAvailable = audioread._ca_available() +print('maccaAvailable', maccaAvailable) +if maccaAvailable: + from audioread import macca + +PROJECT_DIR = os.path.abspath(os.path.dirname(os.path.dirname(__file__))) +testFilename = os.path.abspath(os.path.join(PROJECT_DIR, 'test', 'fixtures', 'wavetest.wav')) + +rowLookup = [ + b'\x00\x00', + b'f2', + b'\x008', + b'3;', +] +numSamples = 512 + +@unittest.skipIf(not maccaAvailable, 'Not supported') +class TestMacca(unittest.TestCase): + + def test_macca_as_generator(self): + result = [] + with macca.ExtAudioFile(testFilename, block_samples=numSamples) as f: + gen = f.read_data() + try: + while True: + data = next(gen) + result.append(data) + except StopIteration: + pass + + self.assertEqual(len(bytes(result[0])), numSamples*2) + self.assertEqual(len(rowLookup), len(result)) + for i, row in enumerate(result): + self.assertEqual(bytes(row[0:2]), rowLookup[i]) + + + def test_macca_as_forloop(self): + result = [] + with macca.ExtAudioFile(testFilename, block_samples=numSamples) as f: + self.assertEqual(f.nframes, 2048) + for buf in f: + result.append(buf) + + self.assertEqual(len(bytes(result[0])), numSamples*2) + self.assertEqual(len(rowLookup), len(result)) + for i, row in enumerate(result): + self.assertEqual(bytes(row[0:2]), rowLookup[i]) + + def test_seek(self): + result = [] + with macca.ExtAudioFile(testFilename, block_samples=numSamples) as input_file: + gen = input_file.read_data() + + # move forward + row = next(gen) + row = next(gen) + row = next(gen) + + # go back + input_file.seek(512) + row = next(gen) + self.assertEqual(bytes(row[0:2]), rowLookup[1]) + row = next(gen) + self.assertEqual(bytes(row[0:2]), rowLookup[2]) diff --git a/test/test_maddec.py b/test/test_maddec.py new file mode 100644 index 0000000..5809247 --- /dev/null +++ b/test/test_maddec.py @@ -0,0 +1,68 @@ +import os +import unittest +import audioread +madAvailible = audioread._mad_available() +if madAvailible: + from audioread import maddec + +testFilename = os.path.abspath(os.path.join('test', 'fixtures', 'mp3test.mp3')) +rowLookup = [ + b'\x01\x00', + b'w\x00', + b'\xf6&', + b'\xe8/', + b'v4', + b'f5', + b'~7', + b'\x9a7', + b'C\t', + b'\xfb\xff', +] +numSamples = 512 + +@unittest.skipIf(not madAvailible, 'Not supported') +class TestMadDec(unittest.TestCase): + + def test_open_as_generator(self): + result = [] + with maddec.MadAudioFile(testFilename, block_samples=numSamples) as input_file: + gen = input_file.read_data() + try: + while True: + data = next(gen) + result.append(data) + except StopIteration: + pass + + self.assertEqual(len(bytes(result[0])), numSamples*2) + self.assertEqual(len(rowLookup), len(result)) + for i, row in enumerate(result): + self.assertEqual(bytes(row[0:2]), rowLookup[i]) + + + def test_open_as_forloop(self): + result = [] + with maddec.MadAudioFile(testFilename, block_samples=numSamples) as input_file: + for buf in input_file: + result.append(buf) + + for i, row in enumerate(result): + self.assertEqual(bytes(row[0:2]), rowLookup[i]) + + @unittest.skip('WIP') + def test_seek(self): + result = [] + with maddec.MadAudioFile(testFilename, block_samples=numSamples) as input_file: + gen = input_file.read_data() + + # move forward + row = next(gen) + row = next(gen) + row = next(gen) + + # go back + input_file.seek(512) + row = next(gen) + self.assertEqual(bytes(row[0:2]), rowLookup[1]) + row = next(gen) + self.assertEqual(bytes(row[0:2]), rowLookup[2]) diff --git a/test/test_rawread.py b/test/test_rawread.py new file mode 100644 index 0000000..cb50ef4 --- /dev/null +++ b/test/test_rawread.py @@ -0,0 +1,60 @@ +import os +import unittest +import audioread +from audioread import rawread + + +testFilename = os.path.abspath(os.path.join('test', 'fixtures', 'wavetest.wav')) +rowLookup = [ + b'\x00\x00', + b'f2', + b'\x008', + b'3;', +] +numSamples = 512 + + +class TestRawRead(unittest.TestCase): + + def test_open_as_generator(self): + result = [] + with rawread.RawAudioFile(testFilename, block_samples=numSamples) as input_file: + gen = input_file.read_data() + try: + while True: + data = next(gen) + result.append(data) + except StopIteration: + pass + + self.assertEqual(len(bytes(result[0])), numSamples*2) + self.assertEqual(len(rowLookup), len(result)) + for i, row in enumerate(result): + self.assertEqual(bytes(row[0:2]), rowLookup[i]) + + + def test_open_as_forloop(self): + result = [] + with rawread.RawAudioFile(testFilename, block_samples=numSamples) as input_file: + for buf in input_file: + result.append(buf) + + for i, row in enumerate(result): + self.assertEqual(bytes(row[0:2]), rowLookup[i]) + + def test_seek(self): + result = [] + with rawread.RawAudioFile(testFilename, block_samples=numSamples) as input_file: + gen = input_file.read_data() + + # move forward + row = next(gen) + row = next(gen) + row = next(gen) + + # go back + input_file.seek(512) + row = next(gen) + self.assertEqual(bytes(row[0:2]), rowLookup[1]) + row = next(gen) + self.assertEqual(bytes(row[0:2]), rowLookup[2]) diff --git a/tox.ini b/tox.ini new file mode 100644 index 0000000..49d8827 --- /dev/null +++ b/tox.ini @@ -0,0 +1,6 @@ +[tox] +envlist = py27,py36 + +[testenv] +commands = + python test.py