Package echonest :: Package remix :: Module audio
[hide private]
[frames] | no frames]

Source Code for Module echonest.remix.audio

   1  """ 
   2  The main `Echo Nest`_ `Remix API`_ module for manipulating audio files and  
   3  their associated `Echo Nest`_ `Analyze API`_ analyses. 
   4   
   5  AudioData, and getpieces by Robert Ochshorn 
   6  on 2008-06-06.  Some refactoring and everything else by Joshua Lifton 
   7  2008-09-07.  Refactoring by Ben Lacker 2009-02-11. Other contributions 
   8  by Adam Lindsay. 
   9   
  10  :group Base Classes: AudioAnalysis, AudioRenderable, AudioData, AudioData32 
  11  :group Audio-plus-Analysis Classes: LocalAudioFile, LocalAnalysis 
  12  :group Building Blocks: AudioQuantum, AudioSegment, AudioQuantumList, ModifiedRenderable 
  13  :group Effects: AudioEffect, LevelDB, AmplitudeFactor, TimeTruncateFactor, TimeTruncateLength, Simultaneous 
  14  :group Exception Classes: FileTypeError, EchoNestRemixError 
  15   
  16  :group Audio helper functions: getpieces, mix, assemble, megamix 
  17  :group ffmpeg helper functions: ffmpeg, settings_from_ffmpeg, ffmpeg_error_check 
  18  :group Utility functions: _dataParser, _attributeParser, _segmentsParser 
  19   
  20  .. _Analyze API: http://developer.echonest.com/ 
  21  .. _Remix API: https://github.com/echonest/remix 
  22  .. _Echo Nest: http://the.echonest.com/ 
  23  """ 
  24   
  25  __version__ = "$Revision: 0 $" 
  26  # $Source$ 
  27   
  28  import hashlib 
  29  import numpy 
  30  import os 
  31  import sys 
  32  import pickle 
  33  import shutil 
  34  import StringIO 
  35  import struct 
  36  import subprocess 
  37  import tempfile 
  38  import wave 
  39   
  40  from pyechonest import track 
  41  import pyechonest.util 
  42  import pyechonest.config as config 
  43  #from echonest.support import stupidxml 
  44  import xml.etree.ElementTree as etree 
  45  import xml.dom.minidom as minidom 
  46  import weakref 
  47   
  48   
  49  MP3_BITRATE = 128 
50 51 -class AudioAnalysis(object):
52 """ 53 This class uses (but does not wrap) `pyechonest.track` to allow 54 transparent caching of the audio analysis of an audio file. 55 56 For example, the following script will display the bars of a track 57 twice:: 58 59 from echonest import * 60 a = audio.AudioAnalysis('YOUR_TRACK_ID_HERE') 61 a.bars 62 a.bars 63 64 The first time `a.bars` is called, a network request is made of the 65 `Echo Nest`_ `Analyze API`_. The second time time `a.bars` is called, the 66 cached value is returned immediately. 67 68 An `AudioAnalysis` object can be created using an existing ID, as in 69 the example above, or by specifying the audio file to upload in 70 order to create the ID, as in:: 71 72 a = audio.AudioAnalysis('FULL_PATH_TO_AUDIO_FILE') 73 74 .. _Analyze API: http://developer.echonest.com/pages/overview?version=2 75 .. _Echo Nest: http://the.echonest.com/ 76 """ 77
78 - def __init__(self, path_or_identifier):
79 """ 80 Constructor. If the argument is a valid local path or a URL, 81 the track ID is generated by uploading the file to the `Echo Nest`_ 82 `Analyze API`_\. Otherwise, the argument is assumed to be 83 the track ID. 84 85 :param path_or_identifier: A string representing either a path to a local 86 file, or the ID of a file that has already 87 been uploaded for analysis. 88 89 .. _Analyze API: http://developer.echonest.com/docs/v4/track.html 90 .. _Echo Nest: http://the.echonest.com/ 91 """ 92 93 if type(path_or_identifier) is not str: 94 # Argument is invalid. 95 raise TypeError("Argument 'path_or_identifier' must be a string \ 96 representing either a filename, track ID, or MD5.") 97 98 # see if path_or_identifier is a path or an ID 99 if os.path.isfile(path_or_identifier): 100 # it's a filename 101 self.pyechonest_track = track.track_from_filename(path_or_identifier) 102 else: 103 if path_or_identifier.startswith('music://') or \ 104 (path_or_identifier.startswith('TR') and \ 105 len(path_or_identifier) == 18): 106 # it's an id 107 self.pyechonest_track = track.track_from_id(path_or_identifier) 108 elif len(path_or_identifier) == 32: 109 # it's an md5 110 self.pyechonest_track = track.track_from_md5(path_or_identifier) 111 112 if self.pyechonest_track is None: 113 raise EchoNestRemixError('Could not find track %s' % path_or_identifier) 114 115 self.source = None 116 117 self._bars = None 118 self._beats = None 119 self._tatums = None 120 self._sections = None 121 self._segments = None 122 123 self.identifier = self.pyechonest_track.id 124 # Patching around the fact that sometimes pyechonest doesn't give back metadata 125 # As of 11/2012, metadata is not used by remix 126 try: 127 self.metadata = self.pyechonest_track.meta 128 except AttributeError: 129 self.metadata = None 130 print >> sys.stderr, "Warning: no metadata returned for track." 131 132 for attribute in ('time_signature', 'mode', 'tempo', 'key'): 133 d = {} 134 d['value'] = getattr(self.pyechonest_track, attribute) 135 d['confidence'] = getattr(self.pyechonest_track, attribute + '_confidence') 136 setattr(self, attribute, d) 137 138 for attribute in ('end_of_fade_in', 'start_of_fade_out', 'duration', 'loudness'): 139 setattr(self, attribute, getattr(self.pyechonest_track, attribute))
140 141 @property
142 - def bars(self):
143 if self._bars is None: 144 self._bars = _dataParser('bar', self.pyechonest_track.bars) 145 self._bars.attach(self) 146 return self._bars
147 148 @property
149 - def beats(self):
150 if self._beats is None: 151 self._beats = _dataParser('beat', self.pyechonest_track.beats) 152 self._beats.attach(self) 153 return self._beats
154 155 @property
156 - def tatums(self):
157 if self._tatums is None: 158 self._tatums = _dataParser('tatum', self.pyechonest_track.tatums) 159 self._tatums.attach(self) 160 return self._tatums
161 162 @property
163 - def sections(self):
164 if self._sections is None: 165 self._sections = _attributeParser('section', self.pyechonest_track.sections) 166 self._sections.attach(self) 167 return self._sections
168 169 @property
170 - def segments(self):
171 if self._segments is None: 172 self._segments = _segmentsParser(self.pyechonest_track.segments) 173 self._segments.attach(self) 174 return self._segments
175
176 - def __getstate__(self):
177 """ 178 Eliminates the circular reference for pickling. 179 """ 180 dictclone = self.__dict__.copy() 181 del dictclone['source'] 182 return dictclone
183
184 - def __setstate__(self, state):
185 """ 186 Recreates circular references after unpickling. 187 """ 188 self.__dict__.update(state) 189 if hasattr(AudioAnalysis, 'CACHED_VARIABLES'): 190 for cached_var in AudioAnalysis.CACHED_VARIABLES: 191 if type(object.__getattribute__(self, cached_var)) == AudioQuantumList: 192 object.__getattribute__(self, cached_var).attach(self)
193
194 -class AudioRenderable(object):
195 """ 196 An object that gives an `AudioData` in response to a call to its `render`\() 197 method. 198 Intended to be an abstract class that helps enforce the `AudioRenderable` 199 protocol. Picked up a couple of convenience methods common to many descendants. 200 201 Every `AudioRenderable` must provide three things: 202 203 render() 204 A method returning the `AudioData` for the object. The rhythmic duration (point 205 at which any following audio is appended) is signified by the `endindex` accessor, 206 measured in samples. 207 source 208 An accessor pointing to the `AudioData` that contains the original sample data of 209 (a superset of) this audio object. 210 duration 211 An accessor returning the rhythmic duration (in seconds) of the audio object. 212 """
213 - def resolve_source(self, alt):
214 """ 215 Given an alternative, fallback `alt` source, return either `self`'s 216 source or the alternative. Throw an informative error if no source 217 is found. 218 219 Utility code that ended up being replicated in several places, so 220 it ended up here. Not necessary for use in the RenderableAudioObject 221 protocol. 222 """ 223 if hasattr(self, 'source'): 224 source = self.source 225 else: 226 if isinstance(alt, AudioData): 227 source = alt 228 else: 229 print >> sys.stderr, self.__repr__() 230 raise EchoNestRemixError("%s has no implicit or explicit source \ 231 during rendering." % 232 (self.__class__.__name__, )) 233 return source
234 235 @staticmethod
236 - def init_audio_data(source, num_samples):
237 """ 238 Convenience function for rendering: return a pre-allocated, zeroed 239 `AudioData`. 240 """ 241 if source.numChannels > 1: 242 newchans = source.numChannels 243 newshape = (num_samples, newchans) 244 else: 245 newchans = 1 246 newshape = (num_samples,) 247 return AudioData32(shape=newshape, sampleRate=source.sampleRate, 248 numChannels=newchans, defer=False)
249 250
251 - def sources(self):
252 return set([self.source])
253
254 - def encode(self, filename):
255 """ 256 Shortcut function that takes care of the need to obtain an `AudioData` 257 object first, through `render`. 258 """ 259 self.render().encode(filename)
260
261 262 -class AudioData(AudioRenderable):
263 """ 264 Handles audio data transparently. A smart audio container 265 with accessors that include: 266 267 sampleRate 268 samples per second 269 numChannels 270 number of channels 271 data 272 a `numpy.array`_ 273 274 .. _numpy.array: http://docs.scipy.org/doc/numpy/reference/generated/numpy.array.html 275 """
276 - def __init__(self, filename=None, ndarray = None, shape=None, sampleRate=None, numChannels=None, defer=False, verbose=True):
277 """ 278 Given an input `ndarray`, import the sample values and shape 279 (if none is specified) of the input `numpy.array`. 280 281 Given a `filename` (and an input ndarray), use ffmpeg to convert 282 the file to wave, then load the file into the data, 283 auto-detecting the sample rate, and number of channels. 284 285 :param filename: a path to an audio file for loading its sample 286 data into the AudioData.data 287 :param ndarray: a `numpy.array`_ instance with sample data 288 :param shape: a tuple of array dimensions 289 :param sampleRate: sample rate, in Hz 290 :param numChannels: number of channels 291 292 .. _numpy.array: http://docs.scipy.org/doc/numpy/reference/generated/numpy.array.html 293 """ 294 self.verbose = verbose 295 if (filename is not None) and (ndarray is None) : 296 if sampleRate is None or numChannels is None: 297 # force sampleRate and numChannels to 44100 hz, 2 298 sampleRate, numChannels = 44100, 2 299 parsestring = ffmpeg(filename, overwrite=False, verbose=self.verbose) 300 ffmpeg_error_check(parsestring[1]) 301 sampleRate, numChannels = settings_from_ffmpeg(parsestring[1]) 302 self.defer = defer 303 self.filename = filename 304 self.sampleRate = sampleRate 305 self.numChannels = numChannels 306 self.convertedfile = None 307 self.endindex = 0 308 if shape is None and isinstance(ndarray, numpy.ndarray) and not self.defer: 309 self.data = numpy.zeros(ndarray.shape, dtype=numpy.int16) 310 elif shape is not None and not self.defer: 311 self.data = numpy.zeros(shape, dtype=numpy.int16) 312 elif not self.defer and self.filename: 313 self.data = None 314 self.load() 315 else: 316 self.data = None 317 if ndarray is not None and self.data is not None: 318 self.endindex = len(ndarray) 319 self.data[0:self.endindex] = ndarray
320
321 - def load(self):
322 if isinstance(self.data, numpy.ndarray): 323 return 324 temp_file_handle = None 325 if self.filename.lower().endswith(".wav") and (self.sampleRate, self.numChannels) == (44100, 2): 326 file_to_read = self.filename 327 elif self.convertedfile: 328 file_to_read = self.convertedfile 329 else: 330 temp_file_handle, self.convertedfile = tempfile.mkstemp(".wav") 331 result = ffmpeg(self.filename, self.convertedfile, overwrite=True, 332 numChannels=self.numChannels, sampleRate=self.sampleRate, verbose=self.verbose) 333 ffmpeg_error_check(result[1]) 334 file_to_read = self.convertedfile 335 336 w = wave.open(file_to_read, 'r') 337 numFrames = w.getnframes() 338 raw = w.readframes(numFrames) 339 sampleSize = numFrames * self.numChannels 340 data = numpy.frombuffer(raw, dtype="<h", count=sampleSize) 341 ndarray = numpy.array(data, dtype=numpy.int16) 342 if self.numChannels > 1: 343 ndarray.resize((numFrames, self.numChannels)) 344 self.data = numpy.zeros(ndarray.shape, dtype=numpy.int16) 345 self.endindex = 0 346 if ndarray is not None: 347 self.endindex = len(ndarray) 348 self.data = ndarray 349 if temp_file_handle is not None: 350 os.close(temp_file_handle) 351 w.close()
352
353 - def __getitem__(self, index):
354 """ 355 Fetches a frame or slice. Returns an individual frame (if the index 356 is a time offset float or an integer sample number) or a slice if 357 the index is an `AudioQuantum` (or quacks like one). 358 """ 359 if not isinstance(self.data, numpy.ndarray) and self.defer: 360 self.load() 361 if isinstance(index, float): 362 index = int(index*self.sampleRate) 363 elif hasattr(index, "start") and hasattr(index, "duration"): 364 index = slice(float(index.start), index.start + index.duration) 365 366 if isinstance(index, slice): 367 if ( hasattr(index.start, "start") and 368 hasattr(index.stop, "duration") and 369 hasattr(index.stop, "start") ) : 370 index = slice(index.start.start, index.stop.start + index.stop.duration) 371 372 if isinstance(index, slice): 373 return self.getslice(index) 374 else: 375 return self.getsample(index)
376
377 - def getslice(self, index):
378 "Help `__getitem__` return a new AudioData for a given slice" 379 if not isinstance(self.data, numpy.ndarray) and self.defer: 380 self.load() 381 if isinstance(index.start, float): 382 index = slice(int(index.start * self.sampleRate), 383 int(index.stop * self.sampleRate), index.step) 384 return AudioData(None, self.data[index], sampleRate=self.sampleRate, 385 numChannels=self.numChannels, defer=False)
386
387 - def getsample(self, index):
388 """ 389 Help `__getitem__` return a frame (all channels for a given 390 sample index) 391 """ 392 if not isinstance(self.data, numpy.ndarray) and self.defer: 393 self.load() 394 if isinstance(index, int): 395 return self.data[index] 396 else: 397 #let the numpy array interface be clever 398 return AudioData(None, self.data[index], defer=False)
399
400 - def pad_with_zeros(self, num_samples):
401 if num_samples > 0: 402 if self.numChannels == 1: 403 extra_shape = (num_samples,) 404 else: 405 extra_shape = (num_samples, self.numChannels) 406 self.data = numpy.append(self.data, 407 numpy.zeros(extra_shape, dtype=numpy.int16), axis=0)
408
409 - def append(self, another_audio_data):
410 "Appends the input to the end of this `AudioData`." 411 extra = len(another_audio_data.data) - (len(self.data) - self.endindex) 412 self.pad_with_zeros(extra) 413 self.data[self.endindex : self.endindex + len(another_audio_data)] += another_audio_data.data 414 self.endindex += another_audio_data.endindex
415
416 - def sum(self, another_audio_data):
417 extra = len(another_audio_data.data) - len(self.data) 418 self.pad_with_zeros(extra) 419 compare_limit = min(len(another_audio_data.data), len(self.data)) - 1 420 self.data[ : compare_limit] += another_audio_data.data[ : compare_limit]
421
422 - def add_at(self, time, another_audio_data):
423 """ 424 Adds the input `another_audio_data` to this `AudioData` 425 at the `time` specified in seconds. 426 """ 427 offset = int(time * self.sampleRate) 428 extra = offset + len(another_audio_data.data) - len(self.data) 429 self.pad_with_zeros(extra) 430 if another_audio_data.numChannels < self.numChannels: 431 another_audio_data.data = numpy.repeat(another_audio_data.data, self.numChannels).reshape(len(another_audio_data), self.numChannels) 432 self.data[offset : offset + len(another_audio_data.data)] += another_audio_data.data
433
434 - def __len__(self):
435 if self.data is not None: 436 return len(self.data) 437 else: 438 return 0
439
440 - def __add__(self, other):
441 """Supports stuff like this: sound3 = sound1 + sound2""" 442 return assemble([self, other], numChannels=self.numChannels, 443 sampleRate=self.sampleRate)
444
445 - def encode(self, filename=None, mp3=None):
446 """ 447 Outputs an MP3 or WAVE file to `filename`. 448 Format is determined by `mp3` parameter. 449 """ 450 if not mp3 and filename.lower().endswith('.wav'): 451 mp3 = False 452 else: 453 mp3 = True 454 if mp3: 455 foo, tempfilename = tempfile.mkstemp(".wav") 456 os.close(foo) 457 else: 458 tempfilename = filename 459 fid = open(tempfilename, 'wb') 460 # Based on Scipy svn 461 # http://projects.scipy.org/pipermail/scipy-svn/2007-August/001189.html 462 fid.write('RIFF') 463 fid.write(struct.pack('<i',0)) # write a 0 for length now, we'll go back and add it later 464 fid.write('WAVE') 465 # fmt chunk 466 fid.write('fmt ') 467 if self.data.ndim == 1: 468 noc = 1 469 else: 470 noc = self.data.shape[1] 471 bits = self.data.dtype.itemsize * 8 472 sbytes = self.sampleRate * (bits / 8) * noc 473 ba = noc * (bits / 8) 474 fid.write(struct.pack('<ihHiiHH', 16, 1, noc, self.sampleRate, sbytes, ba, bits)) 475 # data chunk 476 fid.write('data') 477 fid.write(struct.pack('<i', self.data.nbytes)) 478 self.data.tofile(fid) 479 # Determine file size and place it in correct 480 # position at start of the file. 481 size = fid.tell() 482 fid.seek(4) 483 fid.write(struct.pack('<i', size - 8)) 484 fid.close() 485 if not mp3: 486 return tempfilename 487 # now convert it to mp3 488 if not filename.lower().endswith('.mp3'): 489 filename = filename + '.mp3' 490 try: 491 bitRate = MP3_BITRATE 492 except NameError: 493 bitRate = 128 494 parsestring = ffmpeg(tempfilename, filename, bitRate=bitRate, verbose=self.verbose) 495 ffmpeg_error_check(parsestring[1]) 496 if tempfilename != filename: 497 if self.verbose: 498 print >> sys.stderr, "Deleting: %s" % tempfilename 499 os.remove(tempfilename) 500 return filename
501
502 - def unload(self):
503 self.data = None 504 if self.convertedfile: 505 if self.verbose: 506 print >> sys.stderr, "Deleting: %s" % self.convertedfile 507 os.remove(self.convertedfile) 508 self.convertedfile = None
509
510 - def render(self, start=0.0, to_audio=None, with_source=None):
511 if not to_audio: 512 return self 513 if with_source != self: 514 return 515 to_audio.add_at(start, self) 516 return
517 518 @property
519 - def duration(self):
520 return float(self.endindex) / self.sampleRate
521 522 @property
523 - def source(self):
524 return self
525
526 527 -class AudioData32(AudioData):
528 """A 32-bit variant of AudioData, intended for data collection on 529 audio rendering with headroom."""
530 - def __init__(self, filename=None, ndarray = None, shape=None, sampleRate=None, numChannels=None, defer=False, verbose=True):
531 """ 532 Special form of AudioData to allow for headroom when collecting samples. 533 """ 534 self.verbose = verbose 535 if (filename is not None) and (ndarray is None) : 536 if sampleRate is None or numChannels is None: 537 # force sampleRate and numChannels to 44100 hz, 2 538 sampleRate, numChannels = 44100, 2 539 parsestring = ffmpeg(filename, overwrite=False, verbose=self.verbose) 540 ffmpeg_error_check(parsestring[1]) 541 sampleRate, numChannels = settings_from_ffmpeg(parsestring[1]) 542 self.defer = defer 543 self.filename = filename 544 self.sampleRate = sampleRate 545 self.numChannels = numChannels 546 self.convertedfile = None 547 self.normalized = None 548 if shape is None and isinstance(ndarray, numpy.ndarray) and not self.defer: 549 self.data = numpy.zeros(ndarray.shape, dtype=numpy.int32) 550 elif shape is not None and not self.defer: 551 self.data = numpy.zeros(shape, dtype=numpy.int32) 552 elif not self.defer and self.filename: 553 self.load() 554 else: 555 self.data = None 556 self.endindex = 0 557 if ndarray is not None and self.data is not None: 558 self.endindex = len(ndarray) 559 self.data[0:self.endindex] = ndarray
560
561 - def load(self):
562 if isinstance(self.data, numpy.ndarray): 563 return 564 temp_file_handle = None 565 if self.filename.lower().endswith(".wav") and (self.sampleRate, self.numChannels) == (44100, 2): 566 file_to_read = self.filename 567 elif self.convertedfile: 568 file_to_read = self.convertedfile 569 else: 570 temp_file_handle, self.convertedfile = tempfile.mkstemp(".wav") 571 result = ffmpeg(self.filename, self.convertedfile, overwrite=True, 572 numChannels=self.numChannels, sampleRate=self.sampleRate, verbose=self.verbose) 573 ffmpeg_error_check(result[1]) 574 file_to_read = self.convertedfile 575 576 w = wave.open(file_to_read, 'r') 577 numFrames = w.getnframes() 578 raw = w.readframes(numFrames) 579 sampleSize = numFrames * self.numChannels 580 data = numpy.frombuffer(raw, dtype="<h", count=sampleSize) 581 ndarray = numpy.array(data, dtype=numpy.int16) 582 if self.numChannels > 1: 583 ndarray.resize((numFrames, self.numChannels)) 584 self.data = numpy.zeros(ndarray.shape, dtype=numpy.int32) 585 self.endindex = 0 586 if ndarray is not None: 587 self.endindex = len(ndarray) 588 self.data[0:self.endindex] = ndarray 589 if temp_file_handle is not None: 590 os.close(temp_file_handle) 591 w.close()
592
593 - def encode(self, filename=None, mp3=None):
594 """ 595 Outputs an MP3 or WAVE file to `filename`. 596 Format is determined by `mp3` parameter. 597 """ 598 self.normalize() 599 temp_file_handle = None 600 if not mp3 and filename.lower().endswith('.wav'): 601 mp3 = False 602 else: 603 mp3 = True 604 if mp3: 605 temp_file_handle, tempfilename = tempfile.mkstemp(".wav") 606 else: 607 tempfilename = filename 608 fid = open(tempfilename, 'wb') 609 # Based on Scipy svn 610 # http://projects.scipy.org/pipermail/scipy-svn/2007-August/001189.html 611 fid.write('RIFF') 612 fid.write(struct.pack('<i',0)) # write a 0 for length now, we'll go back and add it later 613 fid.write('WAVE') 614 # fmt chunk 615 fid.write('fmt ') 616 if self.normalized.ndim == 1: 617 noc = 1 618 else: 619 noc = self.normalized.shape[1] 620 bits = self.normalized.dtype.itemsize * 8 621 sbytes = self.sampleRate*(bits / 8)*noc 622 ba = noc * (bits / 8) 623 fid.write(struct.pack('<ihHiiHH', 16, 1, noc, self.sampleRate, sbytes, ba, bits)) 624 # data chunk 625 fid.write('data') 626 fid.write(struct.pack('<i', self.normalized.nbytes)) 627 self.normalized.tofile(fid) 628 # Determine file size and place it in correct 629 # position at start of the file. 630 size = fid.tell() 631 fid.seek(4) 632 fid.write(struct.pack('<i', size-8)) 633 fid.close() 634 self.normalized = None 635 if not mp3: 636 return tempfilename 637 # now convert it to mp3 638 if not filename.lower().endswith('.mp3'): 639 filename = filename + '.mp3' 640 try: 641 bitRate = MP3_BITRATE 642 except NameError: 643 bitRate = 128 644 parsestring = ffmpeg(tempfilename, filename, bitRate=bitRate, verbose=self.verbose) 645 ffmpeg_error_check(parsestring[1]) 646 if tempfilename != filename: 647 if self.verbose: 648 print >> sys.stderr, "Deleting: %s" % tempfilename 649 os.remove(tempfilename) 650 if temp_file_handle is not None: 651 os.close(temp_file_handle) 652 return filename
653
654 - def normalize(self):
655 """Return to 16-bit for encoding.""" 656 if self.numChannels == 1: 657 self.normalized = numpy.zeros((self.data.shape[0],), dtype=numpy.int16) 658 else: 659 self.normalized = numpy.zeros((self.data.shape[0], self.data.shape[1]), dtype=numpy.int16) 660 661 factor = 32767.0 / numpy.max(numpy.absolute(self.data.flatten())) 662 # If the max was 32768, don't bother scaling: 663 if factor < 1.000031: 664 self.normalized[:len(self.data)] += self.data * factor 665 else: 666 self.normalized[:len(self.data)] += self.data
667
668 - def pad_with_zeros(self, num_samples):
669 if num_samples > 0: 670 if self.numChannels == 1: 671 extra_shape = (num_samples,) 672 else: 673 extra_shape = (num_samples, self.numChannels) 674 self.data = numpy.append(self.data, 675 numpy.zeros(extra_shape, dtype=numpy.int32), axis=0)
676
677 678 -def get_os():
679 """returns is_linux, is_mac, is_windows""" 680 if hasattr(os, 'uname'): 681 if os.uname()[0] == "Darwin": 682 return False, True, False 683 return True, False, False 684 return False, False, True
685
686 -def ffmpeg(infile, outfile=None, overwrite=True, bitRate=None, numChannels=None, sampleRate=None, verbose=True):
687 """ 688 Executes ffmpeg through the shell to convert or read media files. 689 """ 690 command = "en-ffmpeg" 691 if overwrite: 692 command += " -y" 693 command += " -i \"" + infile + "\"" 694 if bitRate is not None: 695 command += " -ab " + str(bitRate) + "k" 696 if numChannels is not None: 697 command += " -ac " + str(numChannels) 698 if sampleRate is not None: 699 command += " -ar " + str(sampleRate) 700 if outfile is not None: 701 command += " \"%s\"" % outfile 702 if verbose: 703 print >> sys.stderr, command 704 705 (lin, mac, win) = get_os() 706 if(not win): 707 p = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True) 708 else: 709 p = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=False) 710 return_val = p.communicate() 711 return return_val
712
713 -def settings_from_ffmpeg(parsestring):
714 """ 715 Parses the output of ffmpeg to determine sample rate and frequency of 716 an audio file. 717 """ 718 parse = parsestring.split('\n') 719 freq, chans = 44100, 2 720 for line in parse: 721 if "Stream #0" in line and "Audio" in line: 722 segs = line.split(", ") 723 for s in segs: 724 if "Hz" in s: 725 #print "Found: "+str(s.split(" ")[0])+"Hz" 726 freq = int(s.split(" ")[0]) 727 elif "stereo" in s: 728 #print "stereo" 729 chans = 2 730 elif "mono" in s: 731 #print "mono" 732 chans = 1 733 return freq, chans
734 735 ffmpeg_install_instructions = """ 736 en-ffmpeg not found! Please make sure ffmpeg is installed and create a link as follows: 737 sudo ln -s `which ffmpeg` /usr/local/bin/en-ffmpeg 738 """
739 -def ffmpeg_error_check(parsestring):
740 "Looks for known errors in the ffmpeg output" 741 parse = parsestring.split('\n') 742 error_cases = [ "Unknown format", # ffmpeg can't figure out format of input file 743 "error occur", # an error occurred 744 "Could not open", # user doesn't have permission to access file 745 "not found" # could not find encoder for output file 746 ] 747 for num, line in enumerate(parse): 748 if "command not found" in line: 749 raise RuntimeError(ffmpeg_install_instructions) 750 for error in error_cases: 751 if error in line: 752 report = "\n\t".join(parse[num:]) 753 raise RuntimeError("ffmpeg conversion error:\n\t" + report)
754
755 -def getpieces(audioData, segs):
756 """ 757 Collects audio samples for output. 758 Returns a new `AudioData` where the new sample data is assembled 759 from the input audioData according to the time offsets in each 760 of the elements of the input segs (commonly an `AudioQuantumList`). 761 762 :param audioData: an `AudioData` object 763 :param segs: an iterable containing objects that may be accessed 764 as slices or indices for an `AudioData` 765 """ 766 #calculate length of new segment 767 audioData.data = None 768 audioData.load() 769 dur = 0 770 for s in segs: 771 dur += int(s.duration * audioData.sampleRate) 772 # if I wanted to add some padding to the length, I'd do it here 773 774 #determine shape of new array 775 if len(audioData.data.shape) > 1: 776 newshape = (dur, audioData.data.shape[1]) 777 newchans = audioData.data.shape[1] 778 else: 779 newshape = (dur,) 780 newchans = 1 781 782 #make accumulator segment 783 newAD = AudioData(shape=newshape, sampleRate=audioData.sampleRate, 784 numChannels=newchans, defer=False, verbose=audioData.verbose) 785 786 #concatenate segs to the new segment 787 for s in segs: 788 newAD.append(audioData[s]) 789 # audioData.unload() 790 return newAD
791
792 -def assemble(audioDataList, numChannels=1, sampleRate=44100, verbose=True):
793 """ 794 Collects audio samples for output. 795 Returns a new `AudioData` object assembled 796 by concatenating all the elements of audioDataList. 797 798 :param audioDatas: a list of `AudioData` objects 799 """ 800 if numChannels == 1: 801 new_shape = (sum([len(x.data) for x in audioDataList]),) 802 else: 803 new_shape = (sum([len(x.data) for x in audioDataList]),numChannels) 804 new_data = AudioData(shape=new_shape, numChannels=numChannels, 805 sampleRate=sampleRate, defer=False, verbose=verbose) 806 for ad in audioDataList: 807 if not isinstance(ad, AudioData): 808 raise TypeError('Encountered something other than an AudioData') 809 new_data.append(ad) 810 return new_data
811
812 -def mix(dataA,dataB,mix=0.5):
813 """ 814 Mixes two `AudioData` objects. Assumes they have the same sample rate 815 and number of channels. 816 817 Mix takes a float 0-1 and determines the relative mix of two audios. 818 i.e., mix=0.9 yields greater presence of dataA in the final mix. 819 """ 820 if dataA.endindex > dataB.endindex: 821 newdata = AudioData(ndarray=dataA.data, sampleRate=dataA.sampleRate, numChannels=dataA.numChannels, defer=False) 822 newdata.data *= float(mix) 823 newdata.data[:dataB.endindex] += dataB.data[:] * (1 - float(mix)) 824 else: 825 newdata = AudioData(ndarray=dataB.data, sampleRate=dataB.sampleRate, numChannels=dataB.numChannels, defer=False) 826 newdata.data *= 1 - float(mix) 827 newdata.data[:dataA.endindex] += dataA.data[:] * float(mix) 828 return newdata
829
830 -def megamix(dataList):
831 """ 832 Mix together any number of `AudioData` objects. Keep the shape of 833 the first one in the list. Assume they all have the same sample rate 834 and number of channels. 835 """ 836 if not isinstance(dataList, list): 837 raise TypeError('input must be a list of AudioData objects') 838 newdata = AudioData(shape=dataList[0].data.shape, sampleRate=dataList[0].sampleRate, 839 numChannels=dataList[0].numChannels, defer=False) 840 for adata in dataList: 841 if not isinstance(adata, AudioData): 842 raise TypeError('input must be a list of AudioData objects') 843 if len(adata) > len(newdata): 844 newseg = AudioData(ndarray=adata[:newdata.endindex].data, 845 numChannels=newdata.numChannels, 846 sampleRate=newdata.sampleRate, defer=False) 847 newseg.endindex = newdata.endindex 848 else: 849 newseg = AudioData(ndarray=adata.data, 850 numChannels=newdata.numChannels, 851 sampleRate=newdata.sampleRate, defer=False) 852 newseg.endindex = adata.endindex 853 newdata.data[:newseg.endindex] += newseg.data / float(len(dataList)) 854 newdata.endindex = len(newdata) 855 return newdata
856
857 858 -class LocalAudioFile(AudioData):
859 """ 860 The basic do-everything class for remixing. Acts as an `AudioData` 861 object, but with an added `analysis` selector which is an 862 `AudioAnalysis` object. It conditianally uploads the file 863 it was initialized with. If the file is already known to the 864 Analyze API, then it does not bother uploading the file. 865 """ 866
867 - def __new__(cls, filename, verbose=True, defer=False, sampleRate=None, numChannels=None):
868 # There must be a better way to avoid collisions between analysis files and .wav files 869 if '.analysis.en' in filename: 870 print >> sys.stderr, "Reading analysis from local file " + filename 871 f = open(filename, 'rb') 872 audiofile = pickle.load(f) 873 f.close() 874 return audiofile 875 else: 876 # This just creates the object and goes straight on to initializing it 877 return AudioData.__new__(cls, filename=filename, verbose=verbose, defer=defer, sampleRate=sampleRate)
878
879 - def __init__(self, filename, verbose=True, defer=False, sampleRate=None, numChannels=None):
880 """ 881 :param filename: path to a local MP3 file 882 """ 883 # We have to skip the initialization here as the local file is already a complete object 884 if '.analysis.en' in filename: 885 self.is_local = True 886 else: 887 AudioData.__init__(self, filename=filename, verbose=verbose, defer=defer, 888 sampleRate=sampleRate, numChannels=numChannels) 889 track_md5 = hashlib.md5(file(self.filename, 'rb').read()).hexdigest() 890 891 if verbose: 892 print >> sys.stderr, "Computed MD5 of file is " + track_md5 893 try: 894 if verbose: 895 print >> sys.stderr, "Probing for existing analysis" 896 tempanalysis = AudioAnalysis(track_md5) 897 except Exception, e: 898 if verbose: 899 print >> sys.stderr, "Analysis not found. Uploading..." 900 tempanalysis = AudioAnalysis(filename) 901 902 self.analysis = tempanalysis 903 self.analysis.source = self 904 self.is_local = False
905 906 # Save out as a pickled file.
907 - def save(self):
908 # If we loaded from a local file, there's no need to save 909 if self.is_local is True: 910 print >> sys.stderr, "Analysis was loaded from local file, not saving" 911 else: 912 input_path = os.path.split(self.filename)[0] 913 input_file = os.path.split(self.filename)[1] 914 path_to_wave = self.convertedfile 915 wav_filename = input_file + '.wav' 916 new_path = os.path.abspath(input_path) + os.path.sep 917 wav_path = new_path + wav_filename 918 try: 919 shutil.copyfile(path_to_wave, wav_path) 920 except shutil.Error: 921 print >> sys.stderr, "Error when moving .wav file: the same file may already exist in this folder" 922 return 923 self.convertedfile = wav_path 924 analysis_filename = input_file + '.analysis.en' 925 analysis_path = new_path + analysis_filename 926 print >> sys.stderr, "Saving analysis to local file " + analysis_path 927 f = open(analysis_path, 'wb') 928 pickle.dump(self, f) 929 f.close()
930
931 - def toxml(self, context=None):
932 raise NotImplementedError
933 934 @property
935 - def duration(self):
936 """ 937 Since we consider `AudioFile` to be an evolved version of 938 `AudioData`, we return the measured duration from the analysis. 939 """ 940 return self.analysis.duration
941
942 - def __setstate__(self, state):
943 """ 944 Recreates circular reference after unpickling. 945 """ 946 self.__dict__.update(state) 947 self.analysis.source = weakref.proxy(self)
948
949 950 -class LocalAnalysis(object):
951 """ 952 Like `LocalAudioFile`, it conditionally uploads the file with which 953 it was initialized. Unlike `LocalAudioFile`, it is not a subclass of 954 `AudioData`, so contains no sample data. 955 """
956 - def __init__(self, filename, verbose=True):
957 """ 958 :param filename: path to a local MP3 file 959 """ 960 961 track_md5 = hashlib.md5(file(filename, 'rb').read()).hexdigest() 962 if verbose: 963 print >> sys.stderr, "Computed MD5 of file is " + track_md5 964 try: 965 if verbose: 966 print >> sys.stderr, "Probing for existing analysis" 967 tempanalysis = AudioAnalysis(track_md5) 968 except Exception, e: 969 print e 970 if verbose: 971 print >> sys.stderr, "Analysis not found. Uploading..." 972 tempanalysis = AudioAnalysis(filename) 973 974 self.analysis = tempanalysis 975 self.analysis.source = self
976
977 -class AudioQuantum(AudioRenderable) :
978 """ 979 A unit of musical time, identified at minimum with a start time and 980 a duration, both in seconds. It most often corresponds with a `section`, 981 `bar`, `beat`, `tatum`, or (by inheritance) `segment` obtained from an Analyze 982 API call. 983 984 Additional properties include: 985 986 end 987 computed time offset for convenience: `start` + `duration` 988 container 989 a circular reference to the containing `AudioQuantumList`, 990 created upon creation of the `AudioQuantumList` that covers 991 the whole track 992 """
993 - def __init__(self, start=0, duration=0, kind=None, confidence=None, source=None) :
994 """ 995 Initializes an `AudioQuantum`. 996 997 :param start: offset from the start of the track, in seconds 998 :param duration: length of the `AudioQuantum` 999 :param kind: string containing what kind of rhythm unit it came from 1000 :param confidence: float between zero and one 1001 """ 1002 self.start = start 1003 self.duration = duration 1004 self.kind = kind 1005 self.confidence = confidence 1006 self._source = source
1007
1008 - def get_end(self):
1009 return self.start + self.duration
1010 1011 end = property(get_end, doc=""" 1012 A computed property: the sum of `start` and `duration`. 1013 """) 1014
1015 - def get_source(self):
1016 "Returns itself or its parent." 1017 if self._source: 1018 return self._source 1019 else: 1020 source = None 1021 try: 1022 source = self.container.source 1023 except AttributeError: 1024 source = None 1025 return source
1026
1027 - def set_source(self, value):
1028 if isinstance(value, AudioData): 1029 self._source = value 1030 else: 1031 raise TypeError("Source must be an instance of echonest.remix.audio.AudioData")
1032 1033 source = property(get_source, set_source, doc=""" 1034 The `AudioData` source for the AudioQuantum. 1035 """) 1036
1037 - def parent(self):
1038 """ 1039 Returns the containing `AudioQuantum` in the rhythm hierarchy: 1040 a `tatum` returns a `beat`, a `beat` returns a `bar`, and a `bar` returns a 1041 `section`. 1042 Note that some AudioQuantums have no parent. None will be returned in this case. 1043 """ 1044 parent_dict = {'tatum': 'beats', 1045 'beat': 'bars', 1046 'bar': 'sections'} 1047 try: 1048 all_chunks = getattr(self.container.container, parent_dict[self.kind]) 1049 for chunk in all_chunks: 1050 if self.start < chunk.end and self.end > chunk.start: 1051 return chunk 1052 return None 1053 except LookupError: 1054 # Might not be in pars, might not have anything in parent. 1055 return None
1056
1057 - def children(self):
1058 """ 1059 Returns an `AudioQuantumList` of the AudioQuanta that it contains, 1060 one step down the hierarchy. A `beat` returns `tatums`, a `bar` returns 1061 `beats`, and a `section` returns `bars`. 1062 """ 1063 children_dict = {'beat': 'tatums', 1064 'bar': 'beats', 1065 'section': 'bars'} 1066 try: 1067 all_chunks = getattr(self.container.container, children_dict[self.kind]) 1068 child_chunks = AudioQuantumList(kind=children_dict[self.kind]) 1069 for chunk in all_chunks: 1070 if chunk.start >= self.start and chunk.end <= self.end: 1071 child_chunks.append(chunk) 1072 continue 1073 return child_chunks 1074 except LookupError: 1075 return None
1076 @property
1077 - def segments(self):
1078 """ 1079 Returns any segments that overlap or are in the same timespan as the AudioQuantum. 1080 Note that this means that some segments will appear in more than one AudioQuantum. 1081 This function, thus, is NOT suited to rhythmic modifications. 1082 """ 1083 # If this is a segment, return it in a list so we can iterate over it 1084 if self.kind == 'segment': 1085 return [self] 1086 1087 all_segments = self.source.analysis.segments 1088 filtered_segments = AudioQuantumList(kind="segment") 1089 1090 # Filter and then break once we've got the needed segments 1091 for segment in all_segments: 1092 if segment.start < self.end and segment.end > self.start: 1093 filtered_segments.append(segment) 1094 elif len(filtered_segments) != 0: 1095 break 1096 return filtered_segments
1097
1098 - def mean_pitches(self):
1099 """ 1100 Returns a pitch vector that is the mean of the pitch vectors of any segments 1101 that overlap this AudioQuantum. 1102 Note that this means that some segments will appear in more than one AudioQuantum. 1103 """ 1104 temp_pitches = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] 1105 segments = self.segments 1106 for segment in segments: 1107 for index, pitch in enumerate(segment.pitches): 1108 temp_pitches[index] = temp_pitches[index] + pitch 1109 mean_pitches = [pitch / len(segments) for pitch in temp_pitches] 1110 return mean_pitches
1111
1112 - def mean_timbre(self):
1113 """ 1114 Returns a timbre vector that is the mean of the pitch vectors of any segments 1115 that overlap this AudioQuantum. 1116 Note that this means that some segments will appear in more than one AudioQuantum. 1117 """ 1118 temp_timbre = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] 1119 segments = self.segments 1120 for segment in segments: 1121 for index, timbre in enumerate(segment.timbre): 1122 temp_timbre[index] = temp_timbre[index] + timbre 1123 mean_timbre = [timbre / len(segments) for timbre in temp_timbre] 1124 return mean_timbre
1125 1126
1127 - def mean_loudness(self):
1128 """ 1129 Returns the mean of the maximum loudness of any segments that overlap this AudioQuantum. 1130 Note that this means that some segments will appear in more than one AudioQuantum. 1131 """ 1132 loudness_average = 0 1133 segments = self.segments 1134 for segment in self.segments: 1135 loudness_average = loudness_average + segment.loudness_max 1136 return loudness_average / len(segments)
1137 1138
1139 - def group(self):
1140 """ 1141 Returns the `children`\() of the `AudioQuantum`\'s `parent`\(). 1142 In other words: 'siblings'. If no parent is found, then return the 1143 `AudioQuantumList` for the whole track. 1144 """ 1145 if self.parent(): 1146 return self.parent().children() 1147 else: 1148 return self.container
1149
1150 - def prev(self, step=1):
1151 """ 1152 Step backwards in the containing `AudioQuantumList`. 1153 Returns `self` if a boundary is reached. 1154 """ 1155 group = self.container 1156 try: 1157 loc = group.index(self) 1158 new = max(loc - step, 0) 1159 return group[new] 1160 except Exception: 1161 return self
1162
1163 - def next(self, step=1):
1164 """ 1165 Step forward in the containing `AudioQuantumList`. 1166 Returns `self` if a boundary is reached. 1167 """ 1168 group = self.container 1169 try: 1170 loc = group.index(self) 1171 new = min(loc + step, len(group)) 1172 return group[new] 1173 except Exception: 1174 return self
1175
1176 - def __str__(self):
1177 """ 1178 Lists the `AudioQuantum`.kind with start and 1179 end times, in seconds, e.g.:: 1180 1181 "segment (20.31 - 20.42)" 1182 """ 1183 return "%s (%.2f - %.2f)" % (self.kind, self.start, self.end)
1184
1185 - def __repr__(self):
1186 """ 1187 A string representing a constructor, including kind, start time, 1188 duration, and (if it exists) confidence, e.g.:: 1189 1190 "AudioQuantum(kind='tatum', start=42.198267, duration=0.1523394)" 1191 """ 1192 if self.confidence is not None: 1193 return "AudioQuantum(kind='%s', start=%f, duration=%f, confidence=%f)" % (self.kind, self.start, self.duration, self.confidence) 1194 else: 1195 return "AudioQuantum(kind='%s', start=%f, duration=%f)" % (self.kind, self.start, self.duration)
1196
1197 - def local_context(self):
1198 """ 1199 Returns a tuple of (*index*, *length*) within rhythm siblings, where 1200 *index* is the (zero-indexed) position within its `group`\(), and 1201 *length* is the number of siblings within its `group`\(). 1202 """ 1203 group = self.group() 1204 count = len(group) 1205 try: 1206 loc = group.index(self) 1207 except Exception: # seem to be some uncontained beats 1208 loc = 0 1209 return (loc, count,)
1210
1211 - def absolute_context(self):
1212 """ 1213 Returns a tuple of (*index*, *length*) within the containing 1214 `AudioQuantumList`, where *index* is the (zero-indexed) position within 1215 its container, and *length* is the number of siblings within the 1216 container. 1217 """ 1218 group = self.container 1219 count = len(group) 1220 loc = group.index(self) 1221 return (loc, count,)
1222
1223 - def context_string(self):
1224 """ 1225 Returns a one-indexed, human-readable version of context. 1226 For example:: 1227 1228 "bar 4 of 142, beat 3 of 4, tatum 2 of 3" 1229 """ 1230 if self.parent() and self.kind != "bar": 1231 return "%s, %s %i of %i" % (self.parent().context_string(), 1232 self.kind, self.local_context()[0] + 1, 1233 self.local_context()[1]) 1234 else: 1235 return "%s %i of %i" % (self.kind, self.absolute_context()[0] + 1, 1236 self.absolute_context()[1])
1237
1238 - def __getstate__(self):
1239 """ 1240 Eliminates the circular reference for pickling. 1241 """ 1242 dictclone = self.__dict__.copy() 1243 if 'container' in dictclone: 1244 del dictclone['container'] 1245 return dictclone
1246
1247 - def toxml(self, context=None):
1248 attributedict = {'duration': str(self.duration), 1249 'start': str(self.start)} 1250 try: 1251 if not(hasattr(context, 'source') and self.source == context.source): 1252 attributedict['source'] = self.source.analysis.identifier 1253 except Exception: 1254 pass 1255 xml = etree.Element(self.kind, attrib=attributedict) 1256 if context: 1257 return xml 1258 else: 1259 return minidom.parseString(xml).toprettyxml()
1260 1261
1262 - def render(self, start=0.0, to_audio=None, with_source=None):
1263 if not to_audio: 1264 source = self.resolve_source(with_source) 1265 return source[self] 1266 if with_source != self.source: 1267 return 1268 to_audio.add_at(start, with_source[self]) 1269 return
1270
1271 1272 -class AudioSegment(AudioQuantum):
1273 """ 1274 Subclass of `AudioQuantum` for the data-rich segments returned by 1275 the Analyze API. 1276 """
1277 - def __init__(self, start=0., duration=0., pitches = None, timbre = None, 1278 loudness_begin=0., loudness_max=0., time_loudness_max=0., 1279 loudness_end=None, kind='segment', source=None):
1280 """ 1281 Initializes an `AudioSegment`. 1282 1283 :param start: offset from start of the track, in seconds 1284 :param duration: duration of the `AudioSegment`, in seconds 1285 :param pitches: a twelve-element list with relative loudnesses of each 1286 pitch class, from C (pitches[0]) to B (pitches[11]) 1287 :param timbre: a twelve-element list with the loudness of each of a 1288 principal component of time and/or frequency profile 1289 :param kind: string identifying the kind of AudioQuantum: "segment" 1290 :param loudness_begin: loudness in dB at the start of the segment 1291 :param loudness_max: loudness in dB at the loudest moment of the 1292 segment 1293 :param time_loudness_max: time (in sec from start of segment) of 1294 loudest moment 1295 :param loudness_end: loudness at end of segment (if it is given) 1296 """ 1297 self.start = start 1298 self.duration = duration 1299 self.pitches = pitches or [] 1300 self.timbre = timbre or [] 1301 self.loudness_begin = loudness_begin 1302 self.loudness_max = loudness_max 1303 self.time_loudness_max = time_loudness_max 1304 if loudness_end: 1305 self.loudness_end = loudness_end 1306 self.kind = kind 1307 self.confidence = None 1308 self._source = source
1309 1310 1311 @property
1312 - def tatum(self):
1313 """ 1314 Returns the tatum that overlaps most with the segment 1315 Note that some segments have NO overlapping tatums. 1316 If this is the case, None will be returned. 1317 """ 1318 all_tatums = self.source.analysis.tatums 1319 filtered_tatums = [] 1320 for tatum in all_tatums: 1321 # If the segment contains the tatum 1322 if self.start < tatum.start and self.end > tatum.end: 1323 filtered_tatums.append((tatum, tatum.duration)) 1324 # If the tatum contains the segment 1325 elif tatum.start < self.start and tatum.end > self.end: 1326 filtered_tatums.append((tatum, self.duration)) 1327 # If the tatum overlaps and starts before the segment 1328 elif tatum.start < self.start and tatum.end > self.start: 1329 filtered_tatums.append((tatum, tatum.end - self.start)) 1330 # If the tatum overlaps and starts after the segment 1331 elif tatum.start < self.end and tatum.end > self.end: 1332 filtered_tatums.append((tatum, self.end - tatum.start)) 1333 # If we're past the segment, stop 1334 elif tatum.start > self.end: 1335 break 1336 1337 # Sort and get the tatum with the maximum overlap 1338 sorted_tatums = sorted(filtered_tatums, key=lambda tatum: tatum[1], reverse=True) 1339 if not sorted_tatums: 1340 return None 1341 else: 1342 return sorted_tatums[0][0]
1343 1344 @property
1345 - def beat(self):
1346 return self.tatum.parent
1347
1348 1349 -class ModifiedRenderable(AudioRenderable):
1350 """Class that contains any AudioRenderable, but overrides the 1351 render() method with nested effects, called sequentially on the 1352 result of the preceeding effect."""
1353 - def __init__(self, original, effects=[]):
1354 if isinstance(original, ModifiedRenderable): 1355 self._original = original._original 1356 self._effects = original._effects + effects 1357 else: 1358 self._original = original 1359 self._effects = effects
1360 1361 @property
1362 - def duration(self):
1363 dur = self._original.duration 1364 for effect in self._effects: 1365 if hasattr(effect, 'duration'): 1366 dur = effect.duration(dur) 1367 return dur
1368 1369 @property
1370 - def source(self):
1371 return self._original.source
1372 1373 @property
1374 - def sources(self):
1375 return self._original.sources
1376
1377 - def render(self, start=0.0, to_audio=None, with_source=None):
1378 if not to_audio: 1379 source = self.resolve_source(with_source) 1380 base = self._original.render(with_source=with_source) 1381 copy = AudioData32(ndarray=base.data, sampleRate=base.sampleRate, numChannels=base.numChannels, defer=False) 1382 for effect in self._effects: 1383 copy = effect.modify(copy) 1384 return copy 1385 if with_source != self.source: 1386 return 1387 base = self._original.render(with_source=with_source) 1388 copy = AudioData32(ndarray=base.data, shape=base.data.shape, sampleRate=base.sampleRate, numChannels=base.numChannels, defer=False) 1389 for effect in self._effects: 1390 copy = effect.modify(copy) 1391 to_audio.add_at(start, copy) 1392 return
1393
1394 - def toxml(self, context=None):
1395 outerattributedict = {'duration': str(self.duration)} 1396 node = etree.Element("modified_audioquantum", attrib=outerattributedict) 1397 1398 innerattributedict = {'duration': str(self._original.duration), 1399 'start': str(self._original.start)} 1400 try: 1401 if not(hasattr(context, 'source') and self.source == context.source): 1402 innerattributedict['source'] = self.source.analysis.identifier 1403 except Exception: 1404 pass 1405 orignode = etree.Element(self._original.kind, attrib=innerattributedict) 1406 node.append(orignode) 1407 fx = etree.Element('effects') 1408 for effect in self._effects: 1409 fxdict = {'id': '%s.%s' % (effect.__module__, effect.__class__.__name__)} 1410 fxdict.update(effect.__dict__) 1411 fx.append(etree.Element('effect', attrib=fxdict)) 1412 node.append(fx) 1413 if context: 1414 return node 1415 else: 1416 return minidom.parseString(node).toprettyxml()
1417
1418 1419 -class AudioEffect(object):
1420 - def __call__(self, aq):
1421 return ModifiedRenderable(aq, [self])
1422
1423 -class LevelDB(AudioEffect):
1424 - def __init__(self, change):
1425 self.change = change
1426
1427 - def modify(self, adata):
1428 adata.data *= pow(10.,self.change/20.) 1429 return adata
1430
1431 -class AmplitudeFactor(AudioEffect):
1432 - def __init__(self, change):
1433 self.change = change
1434
1435 - def modify(self, adata):
1436 adata.data *= self.change 1437 return adata
1438
1439 -class TimeTruncateFactor(AudioEffect):
1440 - def __init__(self, factor):
1441 self.factor = factor
1442
1443 - def duration(self, old_duration):
1444 return old_duration * self.factor
1445
1446 - def modify(self, adata):
1447 endindex = int(self.factor * len(adata)) 1448 if self.factor > 1: 1449 adata.pad_with_zeros(endindex - len(adata)) 1450 adata.endindex = endindex 1451 return adata[:endindex]
1452
1453 1454 -class TimeTruncateLength(AudioEffect):
1455 - def __init__(self, new_duration):
1456 self.new_duration = new_duration
1457
1458 - def duration(self, old_duration):
1459 return self.new_duration
1460
1461 - def modify(self, adata):
1462 endindex = int(self.new_duration * adata.sampleRate) 1463 if self.new_duration > adata.duration: 1464 adata.pad_with_zeros(endindex - len(adata)) 1465 adata.endindex = endindex 1466 return adata[:endindex]
1467
1468 1469 -class AudioQuantumList(list, AudioRenderable):
1470 """ 1471 A container that enables content-based selection and filtering. 1472 A `List` that contains `AudioQuantum` objects, with additional methods 1473 for manipulating them. 1474 1475 When an `AudioQuantumList` is created for a track via a call to the 1476 Analyze API, `attach`\() is called so that its container is set to the 1477 containing `AudioAnalysis`, and the container of each of the 1478 `AudioQuantum` list members is set to itself. 1479 1480 Additional accessors now include AudioQuantum elements such as 1481 `start`, `duration`, and `confidence`, which each return a List of the 1482 corresponding properties in the contained AudioQuanta. A special name 1483 is `kinds`, which returns a List of the `kind` of each `AudioQuantum`. 1484 If `AudioQuantumList.kind` is "`segment`", then `pitches`, `timbre`, 1485 `loudness_begin`, `loudness_max`, `time_loudness_max`, and `loudness_end` 1486 are available. 1487 """
1488 - def __init__(self, initial = None, kind = None, container = None, source = None):
1489 """ 1490 Initializes an `AudioQuantumList`. All parameters are optional. 1491 1492 :param initial: a `List` type with the initial contents 1493 :param kind: a label for the kind of `AudioQuantum` contained 1494 within 1495 :param container: a reference to the containing `AudioAnalysis` 1496 :param source: a reference to the `AudioData` with the corresponding samples 1497 and time base for the contained AudioQuanta 1498 """ 1499 list.__init__(self) 1500 self.kind = None 1501 self._source = None 1502 if isinstance(initial, AudioQuantumList): 1503 self.kind = initial.kind 1504 self.container = initial.container 1505 self._source = initial.source 1506 if kind: 1507 self.kind = kind 1508 if container: 1509 self.container = container 1510 if source: 1511 self._source = source 1512 if initial: 1513 self.extend(initial)
1514
1515 - def get_many(attribute):
1516 def fun(self): 1517 """ 1518 Returns a list of %s for each `AudioQuantum`. 1519 """ % attribute 1520 return [getattr(x, attribute) for x in list.__iter__(self)]
1521 return fun
1522
1523 - def get_many_if_segment(attribute):
1524 def fun(self): 1525 """ 1526 Returns a list of %s for each `Segment`. 1527 """ % attribute 1528 if self.kind == 'segment': 1529 return [getattr(x, attribute) for x in list.__iter__(self)] 1530 else: 1531 raise AttributeError("<%s> only accessible for segments" % (attribute,))
1532 return fun 1533
1534 - def get_duration(self):
1535 return sum(self.durations)
1536 #return sum([x.duration for x in self]) 1537
1538 - def get_source(self):
1539 "Returns its own or its parent's source." 1540 if len(self) < 1: 1541 return 1542 if self._source: 1543 return self._source 1544 else: 1545 try: 1546 source = self.container.source 1547 except AttributeError: 1548 source = self[0].source 1549 return source
1550
1551 - def set_source(self, value):
1552 "Checks input to see if it is an `AudioData`." 1553 if isinstance(value, AudioData): 1554 self._source = value 1555 else: 1556 raise TypeError("Source must be an instance of echonest.remix.audio.AudioData")
1557 1558 durations = property(get_many('duration')) 1559 kinds = property(get_many('kind')) 1560 start = property(get_many('start')) 1561 confidence = property(get_many('confidence')) 1562 1563 pitches = property(get_many_if_segment('pitches')) 1564 timbre = property(get_many_if_segment('timbre')) 1565 loudness_begin = property(get_many_if_segment('loudness_begin')) 1566 loudness_max = property(get_many_if_segment('loudness_max')) 1567 time_loudness_max = property(get_many_if_segment('time_loudness_max')) 1568 loudness_end = property(get_many_if_segment('loudness_end')) 1569 1570 source = property(get_source, set_source, doc=""" 1571 The `AudioData` source for the `AudioQuantumList`. 1572 """) 1573 1574 duration = property(get_duration, doc=""" 1575 Total duration of the `AudioQuantumList`. 1576 """) 1577
1578 - def sources(self):
1579 ss = set() 1580 for aq in list.__iter__(self): 1581 ss.update(aq.sources()) 1582 return ss
1583
1584 - def attach(self, container):
1585 """ 1586 Create circular references to the containing `AudioAnalysis` and for the 1587 contained `AudioQuantum` objects. 1588 """ 1589 self.container = container 1590 for i in self: 1591 i.container = self
1592
1593 - def __getstate__(self):
1594 """ 1595 Eliminates the circular reference for pickling. 1596 """ 1597 dictclone = self.__dict__.copy() 1598 if 'container' in dictclone: 1599 del dictclone['container'] 1600 return dictclone
1601
1602 - def toxml(self, context=None):
1603 xml = etree.Element("sequence") 1604 xml.attrib['duration'] = str(self.duration) 1605 if not context: 1606 xml.attrib['source'] = self.source.analysis.identifier 1607 for s in self.sources(): 1608 xml.append(s.toxml()) 1609 elif self._source: 1610 try: 1611 if self.source != context.source: 1612 xml.attrib['source'] = self.source.analysis.identifier 1613 except Exception: 1614 pass 1615 for x in list.__iter__(self): 1616 xml.append(x.toxml(context=self)) 1617 if context: 1618 return xml 1619 else: 1620 return minidom.parseString(xml).toprettyxml()
1621 1622
1623 - def render(self, start=0.0, to_audio=None, with_source=None):
1624 if len(self) < 1: 1625 return 1626 if not to_audio: 1627 dur = 0 1628 tempsource = self.source or list.__getitem__(self, 0).source 1629 for aq in list.__iter__(self): 1630 dur += int(aq.duration * tempsource.sampleRate) 1631 to_audio = self.init_audio_data(tempsource, dur) 1632 if not hasattr(with_source, 'data'): 1633 for tsource in self.sources(): 1634 this_start = start 1635 for aq in list.__iter__(self): 1636 aq.render(start=this_start, to_audio=to_audio, with_source=tsource) 1637 this_start += aq.duration 1638 if tsource.defer: tsource.unload() 1639 return to_audio 1640 else: 1641 if with_source not in self.sources(): 1642 return 1643 for aq in list.__iter__(self): 1644 aq.render(start=start, to_audio=to_audio, with_source=with_source) 1645 start += aq.duration
1646
1647 1648 -class Simultaneous(AudioQuantumList):
1649 """ 1650 Stacks all contained AudioQuanta atop one another, adding their respective 1651 samples. The rhythmic length of the segment is the duration of the first 1652 `AudioQuantum`, but there can be significant overlap caused by the longest 1653 segment. 1654 1655 Sample usage:: 1656 Simultaneous(a.analysis.bars).encode("my.mp3") 1657 """
1658 - def __init__(self, *args, **kwargs):
1659 AudioQuantumList.__init__(self, *args, **kwargs)
1660
1661 - def get_duration(self):
1662 try: 1663 return self[0].duration 1664 except Exception: 1665 return 0.
1666 1667 duration = property(get_duration, doc=""" 1668 Rhythmic duration of the `Simultaneous` AudioQuanta: the 1669 same as the duration of the first in the list. 1670 """) 1671
1672 - def toxml(self, context=None):
1673 xml = etree.Element("parallel") 1674 xml.attrib['duration'] = str(self.duration) 1675 if not context: 1676 xml.attrib['source'] = self.source.analysis.identifier 1677 elif self.source != context.source: 1678 try: 1679 xml.attrib['source'] = self.source.analysis.identifier 1680 except Exception: 1681 pass 1682 for x in list.__iter__(self): 1683 xml.append(x.toxml(context=self)) 1684 if context: 1685 return xml 1686 else: 1687 return minidom.parseString(xml).toprettyxml()
1688
1689 - def render(self, start=0.0, to_audio=None, with_source=None):
1690 if not to_audio: 1691 tempsource = self.source or list.__getitem__(self, 0).source 1692 dur = int(max(self.durations) * tempsource.sampleRate) 1693 to_audio = self.init_audio_data(tempsource, dur) 1694 if not hasattr(with_source, 'data'): 1695 for source in self.sources(): 1696 for aq in list.__iter__(self): 1697 aq.render(start=start, to_audio=to_audio, with_source=source) 1698 if source.defer: source.unload() 1699 return to_audio 1700 else: 1701 if with_source not in self.sources(): 1702 return 1703 else: 1704 for aq in list.__iter__(self): 1705 aq.render(start=start, to_audio=to_audio, with_source=with_source)
1706
1707 1708 -def _dataParser(tag, nodes):
1709 out = AudioQuantumList(kind=tag) 1710 for n in nodes: 1711 out.append(AudioQuantum(start=n['start'], kind=tag, confidence=n['confidence'])) 1712 if len(out) > 1: 1713 for i in range(len(out) - 1) : 1714 out[i].duration = out[i+1].start - out[i].start 1715 out[-1].duration = out[-2].duration 1716 return out
1717
1718 1719 -def _attributeParser(tag, nodes):
1720 out = AudioQuantumList(kind=tag) 1721 for n in nodes : 1722 out.append(AudioQuantum(n['start'], n['duration'], tag)) 1723 return out
1724
1725 1726 -def _segmentsParser(nodes):
1727 out = AudioQuantumList(kind='segment') 1728 for n in nodes: 1729 out.append(AudioSegment(start=n['start'], duration=n['duration'], 1730 pitches=n['pitches'], timbre=n['timbre'], 1731 loudness_begin=n['loudness_start'], 1732 loudness_max=n['loudness_max'], 1733 time_loudness_max=n['loudness_max_time'], 1734 loudness_end=n.get('loudness_end'))) 1735 return out
1736
1737 -class FileTypeError(Exception):
1738 - def __init__(self, filename, message):
1739 self.filename = filename 1740 self.message = message
1741
1742 - def __str__(self):
1743 return self.message+': '+self.filename
1744
1745 1746 -class EchoNestRemixError(Exception):
1747 """ 1748 Error raised by the Remix API. 1749 """ 1750 pass
1751