#!/usr/bin/env python3 import argparse parser = argparse.ArgumentParser(description='Crop and scale a video based on bounding boxes', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('boxes', help='Path to csv file where bounding boxes are saved') parser.add_argument('video', help='Path to video to crop') parser.add_argument('--intermediary', help='Path to intermediary video if size differs') parser.add_argument('--no-interpolate', help='Do not interpolate over gaps between box updates', action='store_true') parser.add_argument('--label', help='Boxes label (uses all if not provided)') parser.add_argument('--save-stem', help='Location to save stem.hdf5 and stem.avi', default='out') parser.add_argument('--skip-zero-boxes', help='Skip boxes with zero size', action='store_true') parser.add_argument('--outputWH', help='Output width and height', nargs=2, default=[64,64], type=int) parser.add_argument('--debug', help='Save a debugging video as well', action='store_true') parser.add_argument('--extraMetadata', help='Path to json-formatted extra metadata (in addition to embedded in video file)') args = parser.parse_args() OUTW, OUTH = args.outputWH import csv import json import subprocess with open(args.boxes) as f: boxes = list(csv.reader(f)) if args.label: boxes = [b for b in boxes if b[0] == args.label] metadata=json.loads(subprocess.check_output(['ffprobe', '-v', 'quiet', '-print_format', 'json', '-show_format', '-show_streams', args.video])) import cv2 cap = cv2.VideoCapture(args.video) width = cap.get(cv2.CAP_PROP_FRAME_WIDTH) height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT) fps = cap.get(cv2.CAP_PROP_FPS) zoomX = 1.0 zoomY = 1.0 if args.intermediary: metadata=json.loads(subprocess.check_output(['ffprobe', '-v', 'quiet', '-print_format', 'json', '-show_format', '-show_streams', args.intermediary])) inter = cv2.VideoCapture(args.intermediary) interWidth = inter.get(cv2.CAP_PROP_FRAME_WIDTH) interHeight = inter.get(cv2.CAP_PROP_FRAME_HEIGHT) zoomX = width / interWidth zoomY = height / interHeight inter.release() if args.extraMetadata: with open(args.extraMetadata) as f: metadata['extra'] = json.load(f) # Each box is [str(label), float(time), int(x1), int(y1), int(x2), int(y2)] boxes = [[b[0], float(b[1]), int(int(b[2]) * zoomX), int(int(b[3]) * zoomY), int(int(b[4]) * zoomX), int(int(b[5]) * zoomY)] for b in boxes] if args.skip_zero_boxes: boxes = [b for b in boxes if b[2] != b[4] and b[3] != b[5]] def alignDims(box: list, whRatio: float = 1.0) -> list: '''Make a bounding box dimentions adhere to given ratio Arguments: box: The bounding box, formatted [x1, y1, x2, y2] whRatio: The desired ratio of width to height Returns: list: The adjusted box ''' coords1, coords2 = [0, 2], [1, 3] if (box[coords2[1]] - box[coords2[0]]) * whRatio > (box[coords1[1]] - box[coords1[0]]): coords1, coords2 = coords2, coords1 whRatio = 1 / whRatio # coords1 > whRatio * coords2, but we want them equal d = int((box[coords1[1]] - box[coords1[0]]) / whRatio - (box[coords2[1]] - box[coords2[0]])) box[coords2[0]] -= int(d/2) box[coords2[1]] += int(d/2) + (d % 2) return box def shift(box: list, w: int, h: int) -> list: '''Shift a bounding box to be within w,h bounds Arguments: box: The bounding box, formatted [x1, y1, x2, y2] w: The width of the frame (i.e., maximum x value) h: The height of the frame (i.e., maximum y value) Returns: list: The shifted box ''' bounds = [[0, int(w)], [0, int(h)]] coords = [[0, 2], [1, 3]] for b, c in zip(bounds, coords): s = 0 if box[c[0]] < b[0]: s = -box[c[0]] elif box[c[1]] > b[1]: s = b[1] - box[c[1]] box[c[0]] += s box[c[1]] += s return box boxes = [box[0:2] + shift(alignDims(box[2:], OUTW/OUTH), width, height) for box in boxes] import numpy as np import h5py outArry = [] outVid = cv2.VideoWriter(args.save_stem + '.avi', cv2.VideoWriter_fourcc('M','J','P','G'), fps, (OUTW, OUTH)) if args.debug: outDebug = cv2.VideoWriter(args.save_stem + '-debug.avi', cv2.VideoWriter_fourcc('M','J','P','G'), fps, (int(OUTW * height / OUTH + width), int(height))) # This may take a bit, so do a progress bar from progress.bar import IncrementalBar bar = IncrementalBar('Frames Processed', max=cap.get(cv2.CAP_PROP_FRAME_COUNT), suffix='%(index)d/%(max)d - %(eta)d s') frameNum = 0 while cap.isOpened(): ret, frame = cap.read() if not ret: break time = frameNum / fps # Make boxes[0] the most recent timestamp without going over current time while len(boxes) >= 2 and boxes[1][1] <= time: boxes = boxes[1:] if len(boxes) == 1: # Stop when we run out break box = boxes[0] if not args.no_interpolate: weight = (boxes[1][1] - time) / (boxes[1][1] - boxes[0][1]) box = [boxes[0][0], time] + [int(boxes[0][i] * weight + boxes[1][i] * (1-weight)) for i in range(2, 6)] x1, x2 = sorted([box[2], box[4]]) y1, y2 = sorted([box[3], box[5]]) cropped = frame[y1:y2, x1:x2] if cropped.size < 1: resized = np.zeros((OUTH, OUTW, 3), dtype=cropped.dtype) else: resized = cv2.resize(cropped, (OUTH, OUTW), interpolation=cv2.INTER_CUBIC) outArry.append(resized) outVid.write(resized) if args.debug: # Draw box on frame cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2) # Resize "resized" scaledUp = cv2.resize(resized, (int(OUTW * height / OUTH), int(height)), interpolation=cv2.INTER_AREA) concatted = cv2.hconcat([frame, scaledUp]) outDebug.write(concatted) bar.next() frameNum += 1 bar.finish() print(f'Ending at time = {time}') # Save out array #np.savez_compressed(args.save_stem + '.npz', np.array(outArry)) with h5py.File(args.save_stem + '.hdf5', 'w') as f: f.create_dataset('data', data=np.array(outArry)) f.attrs['metadata'] = json.dumps(metadata)