diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f37ddefee8a86199e8ed31049157173a51797fd4..23819d73c5fa3b5f6eebbab5e2b014ec8005bcc2 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -27,7 +27,6 @@ repos: args: # http://www.pydocstyle.org/en/latest/error_codes.html - --ignore=D1,D203,D213,D401,D406,D407,D413 - exclude: '.*pitivi/utils/extract.py$|.*pitivi/autoaligner.py$' - repo: https://gitlab.com/PyCQA/flake8 rev: 3.9.2 hooks: @@ -36,16 +35,16 @@ repos: # http://flake8.pycqa.org/en/latest/user/error-codes.html # https://pycodestyle.readthedocs.io/en/latest/intro.html#error-codes - --ignore=E402,E501,E722,F401,F841,W504 - exclude: > - (?x)^( - pitivi/utils/extract.py| - pitivi/autoaligner.py| - )$ - repo: https://github.com/pre-commit/mirrors-mypy rev: 'v0.910-1' hooks: - id: mypy - files: '^pitivi/(clipproperties.py|timeline/timeline.py)$' + files: > + (?x)^pitivi/( + autoaligner.py| + clipproperties.py| + timeline/timeline.py + )$ args: - --no-strict-optional - repo: local @@ -57,8 +56,6 @@ repos: types: [python] exclude: > (?x)^( - pitivi/utils/extract.py| - pitivi/autoaligner.py| tests/validate-tests/manager.py| bin/pitivi.in )$ diff --git a/data/ui/timelinetoolbar.ui b/data/ui/timelinetoolbar.ui index c887dc2a4ce4b7c0aa98f95b60e13f6562ffd3bf..d5bfe99ce5993494b8ef6ba58ddeae3c8206516e 100644 --- a/data/ui/timelinetoolbar.ui +++ b/data/ui/timelinetoolbar.ui @@ -119,8 +119,7 @@ True False Align clips based on their soundtracks - False - False + timeline.align-clips Align True stopwatch-symbolic diff --git a/pitivi/autoaligner.py b/pitivi/autoaligner.py index fa7bfeed723194f09156efe4dabbc92cf3d1a197..2d3977bc8719299a4bf7c07e58c6a2a5fdac9e26 100644 --- a/pitivi/autoaligner.py +++ b/pitivi/autoaligner.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Pitivi video editor # Copyright (c) 2011, Benjamin M. Schwartz +# Copyright (c) 2022, Thejas Kiran P S # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public @@ -14,668 +15,121 @@ # # You should have received a copy of the GNU Lesser General Public # License along with this program; if not, see . -"""Automatic alignment of `Clip`s.""" -import array -import os -import time +import os.path +from typing import List -from gi.repository import Gst -from gi.repository import Gtk +import numpy.typing +from gi.repository import GES +from scipy.signal import correlate +from scipy.signal import correlation_lags - -try: - import numpy -except ImportError: - numpy = None - -from gettext import gettext as _ - -import pitivi.configure as configure - -from pitivi.utils.ui import beautify_eta -from pitivi.utils.misc import call_false -from pitivi.utils.extract import Extractee +from pitivi.timeline.previewers import get_wavefile_location_for_uri +from pitivi.timeline.previewers import SAMPLE_DURATION from pitivi.utils.loggable import Loggable -def nextpow2(x): - a = 1 - while a < x: - a *= 2 - return a - - -def submax(left, middle, right): - """ - Find the maximum of a quadratic function from three samples. - - Given samples from a quadratic P(x) at x=-1, 0, and 1, find the x - that extremizes P. This is useful for determining the subsample - position of the extremum given three samples around the observed - extreme. - - @param left: value at x=-1 - @type left: L{float} - @param middle: value at x=0 - @type middle: L{float} - @param right: value at x=1 - @type right: L{float} - @returns: value of x that extremizes the interpolating quadratic - @rtype: L{float} - - """ - L = middle - left # L and R are both positive if middle is the - R = middle - right # observed max of the integer samples - return 0.5 * (R - L) / (R + L) - # Derivation: Consider a quadratic q(x) := P(0) - P(x). Then q(x) has - # two roots, one at 0 and one at z, and the extreme is at (0+z)/2 - # (i.e. at z/2) - # q(x) = bx*(x-z) # a may be positive or negative - # q(1) = b*(1 - z) = R - # q(-1) = b*(1 + z) = L - # (1+z)/(1-z) = L/R (from here it's just algebra to find a) - # z + 1 = R/L - (R/L)*z - # z*(1+R/L) = R/L - 1 - # z = (R/L - 1)/(R/L + 1) = (R-L)/(R+L) - - -def rigidalign(reference, targets): - """ - Estimate the relative shift between reference and targets. - - The algorithm works by subtracting the mean, and then locating - the maximum of the cross-correlation. For inputs of length M{N}, - the running time is M{O(C{len(targets)}*N*log(N))}. - - @param reference: the waveform to regard as fixed - @type reference: Sequence(Number) - @param targets: the waveforms that should be aligned to reference - @type targets: Sequence(Sequence(Number)) - @returns: The shift necessary to bring each target into alignment - with the reference. The returned shift may not be an integer, - indicating that the best alignment would be achieved by a - non-integer shift and appropriate interpolation. - @rtype: Sequence(Number) - - """ - # L is the maximum size of a cross-correlation between the - # reference and any of the targets. - L = len(reference) + max(len(t) for t in targets) - 1 - # We round up L to the next power of 2 for speed in the FFT. - L = nextpow2(L) - reference = reference - numpy.mean(reference) - fref = numpy.fft.rfft(reference, L).conj() - shifts = [] - for t in targets: - t = t - numpy.mean(t) - # Compute cross-correlation - xcorr = numpy.fft.irfft(fref * numpy.fft.rfft(t, L)) - # shift maximizes dotproduct(t[shift:],reference) - # int() to convert numpy.int32 to python int - shift = int(numpy.argmax(xcorr)) - subsample_shift = submax(xcorr[(shift - 1) % L], - xcorr[shift], - xcorr[(shift + 1) % L]) - shift = shift + subsample_shift - # shift is now a float indicating the interpolated maximum - if shift >= len(t): # Negative shifts appear large and positive - shift -= L # This corrects them to be negative - shifts.append(-shift) - # Sign reversed to move the target instead of the reference - return shifts - - -def _findslope(a): - # Helper function for affinealign - # The provided matrix a contains a bright line whose slope we want to know, - # against a noisy background. - # The line starts at 0,0. If the slope is positive, it runs toward the - # center of the matrix (i.e. toward (-1,-1)) - # If the slope is negative, it wraps from 0,0 to 0,-1 and continues toward - # the center, (i.e. toward (-1,0)). - # The line segment terminates at the midline along the X direction. - # We locate the line by simply checking the sum along each possible line - # up to the Y-max edge of a. The caller sets the limit by choosing the - # size of a. - # The function returns a floating-point slope assuming that the matrix - # has "square pixels". - Y, X = a.shape - X /= 2 - x_pos = numpy.arange(1, X) - x_neg = numpy.arange(2 * X - 1, X, -1) - best_end = 0 - max_sum = 0 - for end in range(Y): - y = (x_pos * end) // X - s = numpy.sum(a[y, x_pos]) - if s > max_sum: - max_sum = s - best_end = end - s = numpy.sum(a[y, x_neg]) - if s > max_sum: - max_sum = s - best_end = -end - return float(best_end) / X - - -def affinealign(reference, targets, max_drift=0.02): - """ EXPERIMENTAL FUNCTION. - - Perform an affine registration between a reference and a number of - targets. Designed for aligning the amplitude envelopes of recordings of - the same event by different devices. - - NOTE: This method is currently NOT USED by Pitivi, as it has proven both - unnecessary and unusable. So far every test case has been registered - successfully by rigidalign, and until Pitivi supports time-stretching of - audio, the drift calculation cannot actually be used. - - @param reference: the reference signal to which others will be registered - @type reference: array(number) - @param targets: the signals to register - @type targets: ordered iterable(array(number)) - @param max_drift: the maximum absolute clock drift rate - (i.e. stretch factor) that will be considered during search - @type max_drift: positive L{float} - @return: (offsets, drifts). offsets[i] is the point in reference at which - targets[i] starts. drifts[i] is the speed of targets[i] relative to - the reference (positive is faster, meaning the target should be - slowed down to be in sync with the reference) - """ - L = len(reference) + max(len(t) for t in targets) - 1 - L2 = nextpow2(L) - bsize = int(20. / max_drift) # NEEDS TUNING - num_blocks = nextpow2(1.0 * len(reference) // bsize) # NEEDS TUNING - bspace = (len(reference) - bsize) // num_blocks - reference -= numpy.mean(reference) - - # Construct FFT'd reference blocks - freference_blocks = numpy.zeros((L2 / 2 + 1, num_blocks), - dtype=numpy.complex) - for i in range(num_blocks): - s = i * bspace - tmp = numpy.zeros((L2,)) - tmp[s:s + bsize] = reference[s:s + bsize] - freference_blocks[:, i] = numpy.fft.rfft(tmp, L2).conj() - freference_blocks[:10, :] = 0 # High-pass to ignore slow volume variations - - offsets = [] - drifts = [] - for t in targets: - t -= numpy.mean(t) - ft = numpy.fft.rfft(t, L2) - # fxcorr is the FFT'd cross-correlation with the reference blocks - fxcorr_blocks = numpy.zeros((L2 / 2 + 1, num_blocks), - dtype=numpy.complex) - for i in range(num_blocks): - fxcorr_blocks[:, i] = ft * freference_blocks[:, i] - fxcorr_blocks[:, i] /= numpy.sqrt( - numpy.sum(fxcorr_blocks[:, i] ** 2)) - del ft - # At this point xcorr_blocks would show a distinct bright line, nearly - # orthogonal to time, indicating where each of these blocks found their - # peak. Each point on this line represents the time in t where block i - # found its match. The time-intercept gives the time in b at which the - # reference starts, and the slope gives the amount by which the - # reference is faster relative to b. - - # The challenge now is to find this line. Our strategy is to reduce the - # search to one dimension by first finding the slope. - # The Fourier Transform of a smooth real line in 2D is an orthogonal - # line through the origin, with phase that gives its position. - # Unfortunately this line is not clearly visible in fxcorr_blocks, so - # we discard the phase (by taking the absolute value) and then inverse - # transform. This places the line at the origin, so we can find its - # slope. - - # Construct the half-autocorrelation matrix - # (A true autocorrelation matrix would be ifft(abs(fft(x))**2), but this - # is just ifft(abs(fft(x))).) - # Construction is stepwise partly in an attempt to save memory - # The width is 2*num_blocks in order to avoid overlapping positive and - # negative correlations - halfautocorr = numpy.fft.fft(fxcorr_blocks, 2 * num_blocks, 1) - halfautocorr = numpy.abs(halfautocorr) - halfautocorr = numpy.fft.ifft(halfautocorr, None, 1) - halfautocorr = numpy.fft.irfft(halfautocorr, None, 0) - # Now it's actually the half-autocorrelation. - # Chop out the bit we don't care about - halfautocorr = halfautocorr[:bspace * num_blocks * max_drift, :] - # Remove the local-correlation peak. - halfautocorr[-1:2, -1:2] = 0 # NEEDS TUNING - # Normalize each column (appears to be necessary) - for i in range(2 * num_blocks): - halfautocorr[:, i] /= numpy.sqrt( - numpy.sum(halfautocorr[:, i] ** 2)) - drift = _findslope(halfautocorr) / bspace - del halfautocorr - - # inverse transform and shift everything into alignment - xcorr_blocks = numpy.fft.irfft(fxcorr_blocks, None, 0) - del fxcorr_blocks - # TODO: see if phase ramps are worthwhile here - for i in range(num_blocks): - blockcenter = i * bspace + bsize / 2 - shift = int(blockcenter * drift) - if shift > 0: - temp = xcorr_blocks[:shift, i].copy() - xcorr_blocks[:-shift, i] = xcorr_blocks[shift:, i].copy() - xcorr_blocks[-shift:, i] = temp - elif shift < 0: - temp = xcorr_blocks[shift:, i].copy() - xcorr_blocks[-shift:, i] = xcorr_blocks[:shift, i].copy() - xcorr_blocks[:-shift, i] = temp - - # xcorr is the drift-compensated cross-correlation - xcorr = numpy.sum(xcorr_blocks, axis=1) - del xcorr_blocks - - offset = numpy.argmax(xcorr) - del xcorr - if offset >= len(t): - offset -= L2 - - # now offset is the point in target at which reference starts and - # drift is the speed with which the reference drifts relative to the - # target. We reverse these relationships for the caller. - slope = 1 + drift - offsets.append(-offset / slope) - drifts.append(1 / slope - 1) - return offsets, drifts - - -def getAudioTrack(clip): - """ - Helper function for getting an audio track from a Clip - - @param clip: The Clip from which to locate an audio track - @type clip: L{Clip} - @returns: An audio track from clip, or None if clip has no audio track - @rtype: audio L{TrackElement} or L{NoneType} - """ - for track in clip.track_elements: - if track.stream_type == AudioStream: - return track - return None - - -class ProgressMeter: - - """Abstract interface representing a progress meter.""" - - def addWatcher(self, function): - """ Add a progress watching callback function. This callback will - always be called from the main thread. - - @param function: a function to call with progress updates. - @type function: callable(fractional_progress, time_remaining_text). - fractional_progress is a float normalized to [0,1]. - time_remaining_text is a localized text string indicating the - estimated time remaining. - """ - raise NotImplementedError - - -class ProgressAggregator(ProgressMeter): - - """A ProgressMeter that aggregates progress reports. - - Reports from multiple sources are combined into a unified progress - report. - - """ - - def __init__(self): - # _targets is a list giving the size of each task. - self._targets = [] - # _portions is a list of the same length as _targets, indicating - # the portion of each task that as been completed (initially 0). - self._portions = [] - self._start = time.time() - self._watchers = [] - - def getPortionCB(self, target): - """Prepare a new input for the Aggregator. - - Given a target size - (in arbitrary units, but should be consistent across all calls on - a single ProgressAggregator object), it returns a callback that - can be used to update progress on this portion of the task. - - @param target: the total task size for this portion - @type target: number - @returns: a callback that can be used to inform the Aggregator of - subsequent updates to this portion - @rtype: function(x), where x should be a number indicating the - absolute amount of this subtask that has been completed. - - """ - i = len(self._targets) - self._targets.append(target) - self._portions.append(0) - - def cb(thusfar): - self._portions[i] = thusfar - GLib.idle_add(self._callForward) - return cb - - def addWatcher(self, function): - self._watchers.append(function) - - def _callForward(self): - # This function always returns False so that it may be safely - # invoked via GLib.idle_add(). Use of idle_add() is necessary - # to ensure that watchers are always called from the main thread, - # even if progress updates are received from other threads. - total_target = sum(self._targets) - total_completed = sum(self._portions) - if total_target == 0: - return False - frac = min(1.0, float(total_completed) / total_target) - now = time.time() - remaining = (now - self._start) * (1 - frac) / frac - for function in self._watchers: - function(frac, beautify_eta(int(remaining * Gst.SECOND))) - return False - - -class EnvelopeExtractee(Extractee, Loggable): - - """Class that computes the envelope of a 1-D signal (audio). - - The envelope is defined as the sum of the absolute value of the signal - over each block. This class computes the envelope incrementally, - so that the entire signal does not ever need to be stored. - - """ - - def __init__(self, blocksize, callback, *cbargs): - """ - @param blocksize: the number of samples in a block - @type blocksize: L{int} - @param callback: a function to call when the extraction is complete. - The function's first argument will be a numpy array - representing the envelope, and any later argument to this - function will be passed as subsequent arguments to callback. - - """ - Loggable.__init__(self) - self._blocksize = blocksize - self._cb = callback - self._cbargs = cbargs - self._blocks = numpy.zeros((0,), dtype=numpy.float32) - self._empty = array.array('f', []) - # self._samples buffers up to self._threshold samples, before - # their envelope is computed and store in self._blocks, in order - # to amortize some of the function call overheads. - self._samples = array.array('f', []) - self._threshold = 2000 * blocksize - self._progress_watchers = [] - - def receive(self, a): - self._samples.extend(a) - if len(self._samples) < self._threshold: - return - else: - self._process_samples() - - def addWatcher(self, w): - """ - Add a function to call with progress updates. - - @param w: callback function - @type w: function(# of samples received so far) - - """ - self._progress_watchers.append(w) - - def _process_samples(self): - excess = len(self._samples) % self._blocksize - if excess != 0: - samples_to_process = self._samples[:-excess] - self._samples = self._samples[-excess:] - else: - samples_to_process = self._samples - self._samples = array.array('f', []) - self.debug("Adding %s samples to %s blocks", - len(samples_to_process), len(self._blocks)) - newblocks = len(samples_to_process) // self._blocksize - samples_abs = numpy.abs( - samples_to_process).reshape((newblocks, self._blocksize)) - self._blocks.resize((len(self._blocks) + newblocks,)) - # This numpy.sum() call relies on samples_abs being a - # floating-point type. If samples_abs.dtype is int16 - # then the sum may overflow. - self._blocks[-newblocks:] = numpy.sum(samples_abs, 1) - for w in self._progress_watchers: - w(self._blocksize * len(self._blocks) + excess) - - def finalize(self): - self._process_samples() # absorb any remaining buffered samples - self._cb(self._blocks, *self._cbargs) - - class AutoAligner(Loggable): + """Logic for aligning clips based on their audio.""" - """ - Class for aligning a set of L{Clip}s automatically. - - The alignment is based on their contents, so that the shifted tracks - are synchronized. The current implementation only analyzes audio - data, so timeline objects without an audio track cannot be aligned. - - """ - - BLOCKRATE = 25 - """ - @ivar BLOCKRATE: The number of amplitude blocks per second. - - The AutoAligner works by computing the "amplitude envelope" of each - audio stream. We define an amplitude envelope as the absolute value - of the audio samples, downsampled to a low samplerate. This - samplerate, in Hz, is given by BLOCKRATE. (It is given this name - because the downsampling filter is implemented by very simple - averaging over blocks, i.e. a box filter.) 25 Hz appears to be a - good choice because it evenly divides all common audio samplerates - (e.g. 11025 and 8000). Lower blockrate requires less CPU time but - produces less accurate alignment. Higher blockrate is the reverse - (and also cannot evenly divide all samplerates). - - """ - - def __init__(self, clips, callback): - """ - @param clips: an iterable of L{Clip}s. - In this implementation, only L{Clip}s with at least one - audio track will be aligned. - @type clips: iter(L{Clip}) - @param callback: A function to call when alignment is complete. No - arguments will be provided. - @type callback: function - - """ + def __init__(self, selection): Loggable.__init__(self) - # self._clips maps each object to its envelope. The values - # are initially None prior to envelope extraction. - self._clips = dict.fromkeys(clips) - self._callback = callback - # stack of (Track, Extractee) pairs waiting to be processed - # When start() is called, the stack will be populated, and then - # processed sequentially. Only one item from the stack will be - # actively in process at a time. - self._extraction_stack = [] + # Remove transition clips if any. + clips = [clip for clip in selection if isinstance(clip, GES.UriClip)] + # Sorting the clip in descending order according to their length + self._clips: List[GES.Clip] = sorted(clips, + key=lambda clip: clip.props.duration, + reverse=True) + + def _get_peaks(self, + clips: List[GES.Clip] + ) -> List[numpy.typing.NDArray[numpy.float64]]: + """Returns peak values of each clip from its wave cache.""" + peaks = [] + for clip in clips: + wavefile = get_wavefile_location_for_uri(clip.get_uri()) + clip_peaks = numpy.load(wavefile) + + # Slice out samples of trimmed part. + start = clip.inpoint // SAMPLE_DURATION + end = (clip.inpoint + clip.duration) // SAMPLE_DURATION + peaks.append(clip_peaks[start:end]) + return peaks @staticmethod - def canAlign(clips): - """ - Can an AutoAligner align these objects? - - Determine whether a group of timeline objects can all - be aligned together by an AutoAligner. - - @param clips: a group of timeline objects - @type clips: iterable(L{Clip}) - @returns: True iff the objects can aligned. - @rtype: L{bool} - - """ - # numpy is a "soft dependency". If you're running without numpy, - # this False return value is your only warning not to - # use the AutoAligner, which will crash immediately. - return all(getAudioTrack(t) is not None for t in clips) - - def _extractNextEnvelope(self): - audiotrack, extractee = self._extraction_stack.pop() - r = RandomAccessAudioExtractor(audiotrack.factory, - audiotrack.stream) - r.extract(extractee, audiotrack.in_point, - audiotrack.out_point - audiotrack.in_point) - return False - - def _envelopeCb(self, array, clip): - self.debug("Receiving envelope for %s", clip) - self._clips[clip] = array - if self._extraction_stack: - self._extractNextEnvelope() - else: # This was the last envelope - self._performShifts() - self._callback() - - def start(self): - """ - Initiate the auto-alignment process. - - @returns: a L{ProgressMeter} indicating the progress of the - alignment - @rtype: L{ProgressMeter} - - """ - progress_aggregator = ProgressAggregator() - pairs = [] # (Clip, {audio}TrackElement) pairs - for clip in list(self._clips.keys()): - audiotrack = getAudioTrack(clip) - if audiotrack is not None: - pairs.append((clip, audiotrack)) - else: # forget any Clip without an audio track - self._clips.pop(clip) - if len(pairs) >= 2: - for clip, audiotrack in pairs: - # blocksize is the number of samples per block - blocksize = audiotrack.stream.rate // self.BLOCKRATE - extractee = EnvelopeExtractee( - blocksize, self._envelopeCb, clip) - # numsamples is the total number of samples in the track, - # which is used by progress_aggregator to determine - # the percent completion. - numsamples = ((audiotrack.duration / Gst.SECOND) * - audiotrack.stream.rate) - extractee.addWatcher( - progress_aggregator.getPortionCB(numsamples)) - self._extraction_stack.append((audiotrack, extractee)) - # After we return, start the extraction cycle. - # This GLib.idle_add call should not be necessary; - # we should be able to invoke _extractNextEnvelope directly - # here. However, there is some as-yet-unexplained - # race condition between the Python GIL, GTK UI updates, - # GLib mainloop, and pygst multithreading, resulting in - # occasional deadlocks during autoalignment. - # This call to idle_add() reportedly eliminates the deadlock. - # No one knows why. - GLib.idle_add(self._extractNextEnvelope) - else: # We can't do anything without at least two audio tracks - # After we return, call the callback function (once) - GLib.idle_add(call_false, self._callback) - return progress_aggregator - - def _chooseReference(self): - """ - Chooses the timeline object to use as a reference. + def can_align(clips: List[GES.Clip]) -> bool: + """Checks if auto alignment of the clips is possible.""" + if len(clips) < 2: + return False - This function currently selects the one with lowest priority, - i.e. appears highest in the GUI. The behavior of this function - affects user interaction, because the user may want to - determine which object moves and which stays put. + # Check all clips have an audio track. + if not (all(c.get_track_types() & GES.TrackType.AUDIO + for c in clips)): + return False - @returns: the timeline object with lowest priority. - @rtype: L{Clip} + # Check every clip is from a different layer. + layers = [clip.get_layer() for clip in clips] + if len(set(layers)) < len(layers): + return False + # Check if peaks data have been generated by the previewer. + for clip in clips: + peaks_file_uri = get_wavefile_location_for_uri(clip.get_uri()) + if not os.path.isfile(peaks_file_uri): + return False + + return True + + def _xalign(self, + peaks1: numpy.typing.NDArray[numpy.float64], + peaks2: numpy.typing.NDArray[numpy.float64] + ) -> numpy.int64: + """Calculates lag in peak-arrays of a pair of clips using cross correlation.""" + corr = correlate(peaks1, peaks2) + lags = correlation_lags(peaks1.size, peaks2.size) + lag = lags[numpy.argmax(corr)] + return lag + + def _calculate_shifts(self, + peaks: List[numpy.typing.NDArray[numpy.float64]] + ) -> List[numpy.int64]: + """Calculates the shift required by target clips wrt to reference clip. + + Args: + peaks: List of peak values of each clip. """ - def priority(clip): - return clip.priority - return min(iter(self._clips.keys()), key=priority) - - def _performShifts(self): - self.debug("performing shifts") - reference = self._chooseReference() - # By using pop(), this line also removes the reference - # Clip and its envelope from further consideration, - # saving some CPU time in rigidalign. - reference_envelope = self._clips.pop(reference) - # We call list() because we need a reliable ordering of the pairs - # (In python 3, dict.items() returns an unordered dictview) - pairs = list(self._clips.items()) - envelopes = [p[1] for p in pairs] - offsets = rigidalign(reference_envelope, envelopes) - for (movable, envelope), offset in zip(pairs, offsets): - # tshift is the offset rescaled to units of nanoseconds - tshift = int((offset * Gst.SECOND) / self.BLOCKRATE) - self.debug("Shifting %s to %i ns from %i", - movable, tshift, reference.start) - newstart = reference.start + tshift - if newstart >= 0: - movable.start = newstart - else: - # Timeline objects always must have a positive start point, so - # if alignment would move an object to start at negative time, - # we instead make it start at zero and chop off the required - # amount at the beginning. - movable.start = 0 - movable.in_point = movable.in_point - newstart - movable.duration += newstart - - -class AlignmentProgressDialog: - - """ Dialog indicating the progress of the auto-alignment process. - Code derived from L{RenderingProgressDialog}, but greatly simplified - (read-only, no buttons).""" - - def __init__(self, app): - self.builder = Gtk.Builder() - self.builder.add_from_file( - os.path.join(configure.get_ui_dir(), "alignmentprogress.ui")) - self.builder.connect_signals(self) + # Select peaks of largest clip as reference. + reference = peaks[0] + reference -= reference.mean() + + shifts = [] + # Adding 0 shift for the reference clip. + shifts.append(numpy.int64(0)) + for clip_peaks in peaks[1:]: + clip_peaks -= clip_peaks.mean() + shift = self._xalign(reference, clip_peaks) + # Converting shift to time to be shifted in ns. + shift *= SAMPLE_DURATION + shifts.append(shift) + + return shifts + + def run(self) -> None: + if not self.can_align(self._clips): + return - self.window = self.builder.get_object("align-progress") - self.progressbar = self.builder.get_object("progressbar") - # Parent this dialog with mainwindow - # set_transient_for allows this dialog to properly - # minimize together with the mainwindow. This method is - # taken from RenderingProgressDialog. In both cases, it appears - # to work correctly, although there is a known bug for Gnome 3 in - # RenderingProgressDialog (bug #652917) - self.window.set_transient_for(app.gui) + peaks = self._get_peaks(self._clips) - # FIXME: Add a cancel button + shifts = self._calculate_shifts(peaks) + self._perform_shifts(shifts) - def updatePosition(self, fraction, estimated): - self.progressbar.set_fraction(fraction) - self.window.set_title(_("%d%% Analyzed") % int(100 * fraction)) - if estimated: - self.progressbar.set_text(_("About %s left") % estimated) + def _perform_shifts(self, shifts: List[numpy.int64]) -> None: + reference = self._clips[0] + starts = [reference.props.start + shift for shift in shifts] + min_start = min(starts) + if min_start < 0: + # Adjust the starts to avoid placing clips at a negative position. + starts = [start - min_start for start in starts] -if __name__ == '__main__': - # Simple command-line test - from sys import argv - names = argv[1:] - envelopes = [numpy.fromfile(n) for n in names] - reference = envelopes[-1] - offsets, drifts = affinealign(reference, envelopes, 0.02) - print(offsets, drifts) - import matplotlib.pyplot as plt - fig, ax = plt.subplots() - for o, d, e in zip(offsets, drifts, envelopes): - t = o + (1 + d) * numpy.arange(len(e)) - ax.plot(t, e / numpy.sqrt(numpy.sum(e ** 2))) - plt.show() + for clip, start in zip(self._clips, starts): + clip.props.start = start diff --git a/pitivi/timeline/timeline.py b/pitivi/timeline/timeline.py index 95786378cb5c1708875a35756837c43d25266a38..8b341d67ea75a1fe6ee020d1d3cb2915cd12110d 100644 --- a/pitivi/timeline/timeline.py +++ b/pitivi/timeline/timeline.py @@ -28,7 +28,6 @@ from gi.repository import Gst from gi.repository import Gtk from pitivi.action_search_bar import ActionSearchBar -from pitivi.autoaligner import AlignmentProgressDialog from pitivi.autoaligner import AutoAligner from pitivi.configure import get_ui_dir from pitivi.configure import in_devel @@ -1670,6 +1669,7 @@ class TimelineContainer(Gtk.Grid, Zoomable, Loggable): self.forward_one_frame_action.set_enabled(project_loaded) self.backward_one_second_action.set_enabled(project_loaded) self.forward_one_second_action.set_enabled(project_loaded) + self.align_clips_action.set_enabled(AutoAligner.can_align(selection)) # Internal API @@ -1861,6 +1861,10 @@ class TimelineContainer(Gtk.Grid, Zoomable, Loggable): self.add_effect_action, _("Add an effect to the selected clip")) + self.align_clips_action = Gio.SimpleAction.new("align-clips", None) + self.align_clips_action.connect("activate", self._align_selected_cb) + group.add_action(self.align_clips_action) + if in_devel(): self.gapless_action = Gio.SimpleAction.new("toggle-gapless-mode", None) self.gapless_action.connect("activate", self._gaplessmode_toggled_cb) @@ -2184,18 +2188,11 @@ class TimelineContainer(Gtk.Grid, Zoomable, Loggable): if not self.ges_timeline: return - progress_dialog = AlignmentProgressDialog(self.app) - progress_dialog.window.show() - self.app.action_log.begin("align", toplevel=True) - - def aligned_cb(): # Called when alignment is complete - self.app.action_log.commit() - self._project.pipeline.commit_timeline() - progress_dialog.window.destroy() - - auto_aligner = AutoAligner(self.timeline.selection, aligned_cb) - progress_meter = auto_aligner.start() - progress_meter.add_watcher(progress_dialog.update_position) + with self.app.action_log.started("Align clips", + finalizing_action=CommitTimelineFinalizingAction(self._project.pipeline), + toplevel=True): + auto_aligner = AutoAligner(self.timeline.selection) + auto_aligner.run() def _split_cb(self, unused_action, unused_parameter): """Splits clips. diff --git a/pitivi/undo/undo.py b/pitivi/undo/undo.py index c2497456d3c70f86e0fc6f7bfa88ca3b085bee7e..90c561075917924963f844b9f35cbc39fc5a9ab8 100644 --- a/pitivi/undo/undo.py +++ b/pitivi/undo/undo.py @@ -172,6 +172,15 @@ class UndoableActionLog(GObject.Object, Loggable): The operation will be composed of all the actions which have been pushed and also of the committed sub-operations. + + Args: + action_group_name (str): The name of the operation. + finalizing_action (FinalizingAction): The action to be performed + at the end of undoing or redoing the stacked actions. + mergeable (bool): Whether this stack accepts merges with future + compatible stacks. + toplevel (bool): If true, throws error if this operation is + started while another one is being recorded. """ if self.running: self.debug("Abort because running") diff --git a/pitivi/utils/extract.py b/pitivi/utils/extract.py deleted file mode 100644 index bda97343426ea4ec644ba890355eab088193c2a1..0000000000000000000000000000000000000000 --- a/pitivi/utils/extract.py +++ /dev/null @@ -1,247 +0,0 @@ -# -*- coding: utf-8 -*- -# Pitivi video editor -# Copyright (c) 2005, Edward Hervey -# Copyright (c) 2011, Benjamin M. Schwartz -# -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this program; if not, see . -""" -Classes for extracting decoded contents of streams into Python - -Code derived from ui/previewer.py. -""" -# FIXME reimplement after GES port -from collections import deque - -from gi.repository import Gst - -from pitivi.utils.loggable import Loggable -# from pitivi.elements.singledecodebin import SingleDecodeBin -# from pitivi.elements.extractionsink import ExtractionSink - - -def linkDynamic(element, target): - - def pad_added(unused_bin, pad, target): - compatpad = target.get_compatible_pad(pad) - if compatpad: - pad.link_full(compatpad, Gst.PAD_LINK_CHECK_NOTHING) - element.connect("pad-added", pad_added, target) - - -def pipeline(graph): - E = iter(graph.items()) - V = iter(graph.keys()) - p = Gst.Pipeline() - p.add(*V) - for u, v in E: - if v: - try: - u.link(v) - except Gst.LinkError: - linkDynamic(u, v) - return p - - -class Extractee: - - """Abstract base class for receiving raw data from an L{Extractor}.""" - - def receive(self, array): - """ - Receive a chunk of data from an Extractor. - - @param array: The chunk of data as an array - @type array: any kind of numeric array - - """ - raise NotImplementedError - - def finalize(self): - """ - Inform the Extractee that receive() will not be called again. - - Indicates that the extraction is complete, so the Extractee should - process the data it has received. - - """ - raise NotImplementedError - - -class Extractor(Loggable): - - """ - Abstract base class for extraction of raw data from a stream. - - Closely modeled on L{Previewer}. - - """ - - def __init__(self, factory, stream_): - """ - Create a new Extractor. - - @param factory: the factory with which to decode the stream - @type factory: L{ObjectFactory} - @param stream_: the stream to decode - @type stream_: L{Stream} - """ - Loggable.__init__(self) - self.debug("Initialized with %s %s", factory, stream_) - - def extract(self, extractee, start, duration): - """ - Extract the raw data corresponding to a segment of the stream. - - @param extractee: the L{Extractee} that will receive the raw data - @type extractee: L{Extractee} - @param start: The point in the stream at which the segment starts - (nanoseconds) - @type start: L{long} - @param duration: The duration of the segment (nanoseconds) - @type duration: L{long} - - """ - raise NotImplementedError - - -class RandomAccessExtractor(Extractor): - - """ - Abstract class for L{Extractor}s of random access streams. - - Closely inspired by L{RandomAccessPreviewer}. - - """ - - def __init__(self, factory, stream_): - Extractor.__init__(self, factory, stream_) - # FIXME: - # why doesn't this work? - # bin = factory.makeBin(stream_) - uri = factory.uri - caps = stream_.caps - bin = SingleDecodeBin(uri=uri, caps=caps, stream=stream_) - - self._pipelineInit(factory, bin) - - def _pipelineInit(self, factory, bin): - """ - Create the pipeline for the preview process. - - Subclasses should - override this method and create a pipeline, connecting to - callbacks to the appropriate signals, and prerolling the - pipeline if necessary. - - """ - raise NotImplementedError - - -class RandomAccessAudioExtractor(RandomAccessExtractor): - - """ - L{Extractor} for random access audio streams. - - Closely inspired by L{RandomAccessAudioPreviewer}. - - """ - - def __init__(self, factory, stream_): - self._queue = deque() - RandomAccessExtractor.__init__(self, factory, stream_) - self._ready = False - - def _pipelineInit(self, factory, sbin): - self.audioSink = ExtractionSink() - self.audioSink.set_stopped_cb(self._finishSegment) - # This audiorate element ensures that the extracted raw-data - # timeline matches the timestamps used for seeking, even if the - # audio source has gaps or other timestamp abnormalities. - audiorate = Gst.ElementFactory.make("audiorate") - conv = Gst.ElementFactory.make("audioconvert") - q = Gst.ElementFactory.make("queue") - self.audioPipeline = pipeline({ - sbin: audiorate, - audiorate: conv, - conv: q, - q: self.audioSink, - self.audioSink: None}) - bus = self.audioPipeline.get_bus() - bus.add_signal_watch() - bus.connect("message::error", self._busMessageErrorCb) - self._donecb_id = bus.connect("message::async-done", - self._busMessageAsyncDoneCb) - - self.audioPipeline.set_state(Gst.State.PAUSED) - # The audiopipeline.set_state() method does not take effect - # immediately, but the extraction process (and in particular - # self._startSegment) will not work properly until - # self.audioPipeline reaches the desired state (State.PAUSED). - # To ensure that this is the case, we wait until the ASYNC_DONE - # message is received before setting self._ready = True, - # which enables extraction to proceed. - - def _busMessageErrorCb(self, unused_bus, message): - error, debug = message.parse_error() - self.error("Event bus error: %s; %s", error, debug) - - return Gst.BusSyncReply.PASS - - def _busMessageAsyncDoneCb(self, bus, unused_message): - self.debug("Pipeline is ready for seeking") - bus.disconnect(self._donecb_id) # Don't call me again - self._ready = True - if self._queue: # Someone called .extract() before we were ready - self._run() - - def _startSegment(self, timestamp, duration): - self.debug("processing segment with timestamp=%i and duration=%i", - timestamp, duration) - res = self.audioPipeline.seek(1.0, - Gst.Format.TIME, - Gst.SeekFlags.FLUSH | Gst.SeekFlags.ACCURATE, - Gst.SeekType.SET, timestamp, - Gst.SeekType.SET, timestamp + duration) - if not res: - self.warning("seek failed %s", timestamp) - self.audioPipeline.set_state(Gst.State.PLAYING) - - return res - - def _finishSegment(self): - self.audioSink.extractee.finalize() - self.audioSink.reset() - self._queue.popleft() - # If there's more to do, keep running - if self._queue: - self._run() - - def extract(self, extractee, start, duration): - stopped = not self._queue - self._queue.append((extractee, start, duration)) - if stopped and self._ready: - self._run() - # if self._ready is False, self._run() will be called from - # self._busMessageDoneCb(). - - def _run(self): - # Control flows in a cycle: - # _run -> _startSegment -> busMessageSegmentDoneCb -> _finishSegment -> _run - # This forms a loop that extracts an entire segment (i.e. satisfies an - # extract request) in each cycle. The cycle - # runs until the queue of Extractees empties. If the cycle is not - # running, extract() will kick it off again. - extractee, start, duration = self._queue[0] - self.audioSink.set_extractee(extractee) - self._startSegment(start, duration) diff --git a/tests/test_autoaligner.py b/tests/test_autoaligner.py new file mode 100644 index 0000000000000000000000000000000000000000..191f37187300363520c6ea22eb2826d40411bb06 --- /dev/null +++ b/tests/test_autoaligner.py @@ -0,0 +1,100 @@ +# -*- coding: utf-8 -*- +# Pitivi video editor +# Copyright (c) 2022, Thejas Kiran P S +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this program; if not, see . +import os + +from gi.repository import GES +from gi.repository import Gst + +from pitivi.autoaligner import AutoAligner +from pitivi.timeline.previewers import AudioPreviewer +from pitivi.timeline.previewers import get_wavefile_location_for_uri +from pitivi.undo.timeline import CommitTimelineFinalizingAction +from tests import common + + +class TestAutoAligner(common.TestCase): + """Tests for AutoAligner class.""" + + def __generate_wavefile(self, clip): + wavefile = get_wavefile_location_for_uri(clip.props.uri) + if os.path.exists(wavefile): + return + + for source in clip.get_children(False): + if isinstance(source, GES.AudioUriSource): + source_clip = source + + mainloop = common.create_main_loop() + previewer = AudioPreviewer(source_clip, 90) + previewer.connect("done", lambda x: mainloop.quit()) + previewer.start_generation() + mainloop.run() + self.assertTrue(os.path.exists(wavefile)) + + @common.setup_timeline + def test_auto_aligner(self): + # Prevent magnetic snapping from interfering with the alignment of clips. + self.timeline.props.snapping_distance = 0 + self.timeline.append_layer() + layers = self.timeline.get_layers() + # Add clips(tears_of_steel.webm) to both layers with a + # slight difference in their starting positions. + clip1 = self.add_clip(layers[0], start=0, duration=Gst.SECOND) + clip2 = self.add_clip(layers[1], start=Gst.SECOND, duration=Gst.SECOND) + self.__generate_wavefile(clip1) + + self.assertNotEqual(clip1.start, clip2.start) + autoaligner = AutoAligner([clip1, clip2]) + autoaligner.run() + self.assertEqual(clip1.start, clip2.start) + + @common.setup_timeline + def test_negative_shifts(self): + """Tests shifts causing negative clip.start are handled properly.""" + self.timeline.props.snapping_distance = 0 + self.timeline.append_layer() + layers = self.timeline.get_layers() + clip1 = self.add_clip(layers[0], start=0, inpoint=Gst.SECOND // 2, duration=Gst.SECOND) + clip2 = self.add_clip(layers[1], start=0, duration=Gst.SECOND) + self.__generate_wavefile(clip1) + + autoaligner = AutoAligner([clip1, clip2]) + autoaligner.run() + self.assertEqual(clip1.start, Gst.SECOND // 2) + self.assertEqual(clip2.start, 0) + + @common.setup_timeline + def test_align_undo_redo(self): + self.timeline.props.snapping_distance = 0 + self.timeline.append_layer() + layers = self.timeline.get_layers() + + clip1 = self.add_clip(layers[0], start=0, duration=Gst.SECOND) + clip2 = self.add_clip(layers[1], start=Gst.SECOND, duration=Gst.SECOND) + self.__generate_wavefile(clip1) + + with self.action_log.started("Align clips", + finalizing_action=CommitTimelineFinalizingAction(self.project.pipeline), + toplevel=True): + autoaligner = AutoAligner([clip1, clip2]) + autoaligner.run() + self.assertEqual([clip1.start, clip2.start], [0, 0]) + + self.action_log.undo() + self.assertEqual([clip1.start, clip2.start], [0, Gst.SECOND]) + self.action_log.redo() + self.assertEqual([clip1.start, clip2.start], [0, 0])