""" Convert downloaded .webm recordings to .wav. Requires ffmpeg installed on the system: macOS: brew install ffmpeg Ubuntu: sudo apt install ffmpeg Usage: python scripts/convert_to_wav.py python scripts/convert_to_wav.py --in data/raw --out data/wav """ import argparse import subprocess from pathlib import Path DEFAULT_IN = Path('data/raw') DEFAULT_OUT = Path('data/wav') def convert(in_dir: Path, out_dir: Path): files = list(in_dir.rglob('*.wav')) if not files: print(f'No .webm files found in {in_dir}/') return out_dir.mkdir(parents=True, exist_ok=True) ok = fail = skip = 0 for src in sorted(files): dest = out_dir / src.with_suffix('.wav').name if dest.exists(): print(f' skip {src.name}') skip += 1 continue result = subprocess.run( [ 'ffmpeg', '-y', '-i', str(src), '-ar', '44100', # 44.1 kHz sample rate '-ac', '1', # mono '-sample_fmt', 's16', str(dest), ], capture_output=True, ) if result.returncode == 0: print(f' ok {src.name} -> {dest.name}') ok += 1 else: print(f' FAIL {src.name}') print(result.stderr.decode()[-300:]) fail += 1 print(f'\nDone. {ok} converted, {skip} skipped, {fail} failed.') if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--in', dest='in_dir', default=str(DEFAULT_IN)) parser.add_argument('--out', dest='out_dir', default=str(DEFAULT_OUT)) args = parser.parse_args() convert(Path(args.in_dir), Path(args.out_dir))