parent
3700c7ef10
commit
143db31d48
5 changed files with 143 additions and 115 deletions
30
README.md
30
README.md
|
@ -670,18 +670,24 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t
|
||||||
--add-metadata Write metadata to the video file
|
--add-metadata Write metadata to the video file
|
||||||
--no-add-metadata Do not write metadata (default)
|
--no-add-metadata Do not write metadata (default)
|
||||||
--parse-metadata FIELD:FORMAT Parse additional metadata like title/artist
|
--parse-metadata FIELD:FORMAT Parse additional metadata like title/artist
|
||||||
from other fields. Give field name to
|
from other fields. Give a template or field
|
||||||
extract data from, and format of the field
|
name to extract data from and the format to
|
||||||
seperated by a ":". Either regular
|
interpret it as, seperated by a ":". Either
|
||||||
expression with named capture groups or a
|
regular expression with named capture
|
||||||
similar syntax to the output template can
|
groups or a similar syntax to the output
|
||||||
also be used. The parsed parameters replace
|
template can be used for the FORMAT.
|
||||||
any existing values and can be use in
|
Similarly, the syntax for output template
|
||||||
output template. This option can be used
|
can be used for FIELD to parse the data
|
||||||
multiple times. Example: --parse-metadata
|
from multiple fields. The parsed parameters
|
||||||
"title:%(artist)s - %(title)s" matches a
|
replace any existing values and can be used
|
||||||
title like "Coldplay - Paradise". Example
|
in output templates. This option can be
|
||||||
(regex): --parse-metadata
|
used multiple times. Example: --parse-
|
||||||
|
metadata "title:%(artist)s - %(title)s"
|
||||||
|
matches a title like "Coldplay - Paradise".
|
||||||
|
Example: --parse-metadata "%(series)s
|
||||||
|
%(episode_number)s:%(title)s" sets the
|
||||||
|
title using series and episode number.
|
||||||
|
Example (regex): --parse-metadata
|
||||||
"description:Artist - (?P<artist>.+?)"
|
"description:Artist - (?P<artist>.+?)"
|
||||||
--xattrs Write metadata to the video file's xattrs
|
--xattrs Write metadata to the video file's xattrs
|
||||||
(using dublin core and xdg standards)
|
(using dublin core and xdg standards)
|
||||||
|
|
|
@ -67,6 +67,7 @@ from .utils import (
|
||||||
float_or_none,
|
float_or_none,
|
||||||
format_bytes,
|
format_bytes,
|
||||||
format_field,
|
format_field,
|
||||||
|
FORMAT_RE,
|
||||||
formatSeconds,
|
formatSeconds,
|
||||||
GeoRestrictedError,
|
GeoRestrictedError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
@ -772,95 +773,93 @@ class YoutubeDL(object):
|
||||||
'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
|
'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
|
||||||
return outtmpl_dict
|
return outtmpl_dict
|
||||||
|
|
||||||
|
def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
|
||||||
|
""" Make the template and info_dict suitable for substitution (outtmpl % info_dict)"""
|
||||||
|
template_dict = dict(info_dict)
|
||||||
|
|
||||||
|
# duration_string
|
||||||
|
template_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
|
||||||
|
formatSeconds(info_dict['duration'], '-')
|
||||||
|
if info_dict.get('duration', None) is not None
|
||||||
|
else None)
|
||||||
|
|
||||||
|
# epoch
|
||||||
|
template_dict['epoch'] = int(time.time())
|
||||||
|
|
||||||
|
# autonumber
|
||||||
|
autonumber_size = self.params.get('autonumber_size')
|
||||||
|
if autonumber_size is None:
|
||||||
|
autonumber_size = 5
|
||||||
|
template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
|
||||||
|
|
||||||
|
# resolution if not defined
|
||||||
|
if template_dict.get('resolution') is None:
|
||||||
|
if template_dict.get('width') and template_dict.get('height'):
|
||||||
|
template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
|
||||||
|
elif template_dict.get('height'):
|
||||||
|
template_dict['resolution'] = '%sp' % template_dict['height']
|
||||||
|
elif template_dict.get('width'):
|
||||||
|
template_dict['resolution'] = '%dx?' % template_dict['width']
|
||||||
|
|
||||||
|
if sanitize is None:
|
||||||
|
sanitize = lambda k, v: v
|
||||||
|
template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
|
||||||
|
for k, v in template_dict.items()
|
||||||
|
if v is not None and not isinstance(v, (list, tuple, dict)))
|
||||||
|
na = self.params.get('outtmpl_na_placeholder', 'NA')
|
||||||
|
template_dict = collections.defaultdict(lambda: na, template_dict)
|
||||||
|
|
||||||
|
# For fields playlist_index and autonumber convert all occurrences
|
||||||
|
# of %(field)s to %(field)0Nd for backward compatibility
|
||||||
|
field_size_compat_map = {
|
||||||
|
'playlist_index': len(str(template_dict['n_entries'])),
|
||||||
|
'autonumber': autonumber_size,
|
||||||
|
}
|
||||||
|
FIELD_SIZE_COMPAT_RE = r'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
|
||||||
|
mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl)
|
||||||
|
if mobj:
|
||||||
|
outtmpl = re.sub(
|
||||||
|
FIELD_SIZE_COMPAT_RE,
|
||||||
|
r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
|
||||||
|
outtmpl)
|
||||||
|
|
||||||
|
numeric_fields = list(self._NUMERIC_FIELDS)
|
||||||
|
|
||||||
|
# Format date
|
||||||
|
FORMAT_DATE_RE = FORMAT_RE.format(r'(?P<key>(?P<field>\w+)>(?P<format>.+?))')
|
||||||
|
for mobj in re.finditer(FORMAT_DATE_RE, outtmpl):
|
||||||
|
conv_type, field, frmt, key = mobj.group('type', 'field', 'format', 'key')
|
||||||
|
if key in template_dict:
|
||||||
|
continue
|
||||||
|
value = strftime_or_none(template_dict.get(field), frmt, na)
|
||||||
|
if conv_type in 'crs': # string
|
||||||
|
value = sanitize(field, value)
|
||||||
|
else: # number
|
||||||
|
numeric_fields.append(key)
|
||||||
|
value = float_or_none(value, default=None)
|
||||||
|
if value is not None:
|
||||||
|
template_dict[key] = value
|
||||||
|
|
||||||
|
# Missing numeric fields used together with integer presentation types
|
||||||
|
# in format specification will break the argument substitution since
|
||||||
|
# string NA placeholder is returned for missing fields. We will patch
|
||||||
|
# output template for missing fields to meet string presentation type.
|
||||||
|
for numeric_field in numeric_fields:
|
||||||
|
if numeric_field not in template_dict:
|
||||||
|
outtmpl = re.sub(
|
||||||
|
FORMAT_RE.format(re.escape(numeric_field)),
|
||||||
|
r'%({0})s'.format(numeric_field), outtmpl)
|
||||||
|
|
||||||
|
return outtmpl, template_dict
|
||||||
|
|
||||||
def _prepare_filename(self, info_dict, tmpl_type='default'):
|
def _prepare_filename(self, info_dict, tmpl_type='default'):
|
||||||
try:
|
try:
|
||||||
template_dict = dict(info_dict)
|
|
||||||
|
|
||||||
template_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
|
|
||||||
formatSeconds(info_dict['duration'], '-')
|
|
||||||
if info_dict.get('duration', None) is not None
|
|
||||||
else None)
|
|
||||||
|
|
||||||
template_dict['epoch'] = int(time.time())
|
|
||||||
autonumber_size = self.params.get('autonumber_size')
|
|
||||||
if autonumber_size is None:
|
|
||||||
autonumber_size = 5
|
|
||||||
template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
|
|
||||||
if template_dict.get('resolution') is None:
|
|
||||||
if template_dict.get('width') and template_dict.get('height'):
|
|
||||||
template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
|
|
||||||
elif template_dict.get('height'):
|
|
||||||
template_dict['resolution'] = '%sp' % template_dict['height']
|
|
||||||
elif template_dict.get('width'):
|
|
||||||
template_dict['resolution'] = '%dx?' % template_dict['width']
|
|
||||||
|
|
||||||
sanitize = lambda k, v: sanitize_filename(
|
sanitize = lambda k, v: sanitize_filename(
|
||||||
compat_str(v),
|
compat_str(v),
|
||||||
restricted=self.params.get('restrictfilenames'),
|
restricted=self.params.get('restrictfilenames'),
|
||||||
is_id=(k == 'id' or k.endswith('_id')))
|
is_id=(k == 'id' or k.endswith('_id')))
|
||||||
template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
|
|
||||||
for k, v in template_dict.items()
|
|
||||||
if v is not None and not isinstance(v, (list, tuple, dict)))
|
|
||||||
na = self.params.get('outtmpl_na_placeholder', 'NA')
|
|
||||||
template_dict = collections.defaultdict(lambda: na, template_dict)
|
|
||||||
|
|
||||||
outtmpl = self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default'])
|
outtmpl = self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default'])
|
||||||
force_ext = OUTTMPL_TYPES.get(tmpl_type)
|
outtmpl, template_dict = self.prepare_outtmpl(outtmpl, info_dict, sanitize)
|
||||||
|
|
||||||
# For fields playlist_index and autonumber convert all occurrences
|
|
||||||
# of %(field)s to %(field)0Nd for backward compatibility
|
|
||||||
field_size_compat_map = {
|
|
||||||
'playlist_index': len(str(template_dict['n_entries'])),
|
|
||||||
'autonumber': autonumber_size,
|
|
||||||
}
|
|
||||||
FIELD_SIZE_COMPAT_RE = r'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
|
|
||||||
mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl)
|
|
||||||
if mobj:
|
|
||||||
outtmpl = re.sub(
|
|
||||||
FIELD_SIZE_COMPAT_RE,
|
|
||||||
r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
|
|
||||||
outtmpl)
|
|
||||||
|
|
||||||
# As of [1] format syntax is:
|
|
||||||
# %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
|
|
||||||
# 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
|
|
||||||
FORMAT_RE = r'''(?x)
|
|
||||||
(?<!%)
|
|
||||||
%
|
|
||||||
\({0}\) # mapping key
|
|
||||||
(?:[#0\-+ ]+)? # conversion flags (optional)
|
|
||||||
(?:\d+)? # minimum field width (optional)
|
|
||||||
(?:\.\d+)? # precision (optional)
|
|
||||||
[hlL]? # length modifier (optional)
|
|
||||||
(?P<type>[diouxXeEfFgGcrs%]) # conversion type
|
|
||||||
'''
|
|
||||||
|
|
||||||
numeric_fields = list(self._NUMERIC_FIELDS)
|
|
||||||
|
|
||||||
# Format date
|
|
||||||
FORMAT_DATE_RE = FORMAT_RE.format(r'(?P<key>(?P<field>\w+)>(?P<format>.+?))')
|
|
||||||
for mobj in re.finditer(FORMAT_DATE_RE, outtmpl):
|
|
||||||
conv_type, field, frmt, key = mobj.group('type', 'field', 'format', 'key')
|
|
||||||
if key in template_dict:
|
|
||||||
continue
|
|
||||||
value = strftime_or_none(template_dict.get(field), frmt, na)
|
|
||||||
if conv_type in 'crs': # string
|
|
||||||
value = sanitize(field, value)
|
|
||||||
else: # number
|
|
||||||
numeric_fields.append(key)
|
|
||||||
value = float_or_none(value, default=None)
|
|
||||||
if value is not None:
|
|
||||||
template_dict[key] = value
|
|
||||||
|
|
||||||
# Missing numeric fields used together with integer presentation types
|
|
||||||
# in format specification will break the argument substitution since
|
|
||||||
# string NA placeholder is returned for missing fields. We will patch
|
|
||||||
# output template for missing fields to meet string presentation type.
|
|
||||||
for numeric_field in numeric_fields:
|
|
||||||
if numeric_field not in template_dict:
|
|
||||||
outtmpl = re.sub(
|
|
||||||
FORMAT_RE.format(re.escape(numeric_field)),
|
|
||||||
r'%({0})s'.format(numeric_field), outtmpl)
|
|
||||||
|
|
||||||
# expand_path translates '%%' into '%' and '$$' into '$'
|
# expand_path translates '%%' into '%' and '$$' into '$'
|
||||||
# correspondingly that is not what we want since we need to keep
|
# correspondingly that is not what we want since we need to keep
|
||||||
|
@ -875,6 +874,7 @@ class YoutubeDL(object):
|
||||||
# title "Hello $PATH", we don't want `$PATH` to be expanded.
|
# title "Hello $PATH", we don't want `$PATH` to be expanded.
|
||||||
filename = expand_path(outtmpl).replace(sep, '') % template_dict
|
filename = expand_path(outtmpl).replace(sep, '') % template_dict
|
||||||
|
|
||||||
|
force_ext = OUTTMPL_TYPES.get(tmpl_type)
|
||||||
if force_ext is not None:
|
if force_ext is not None:
|
||||||
filename = replace_extension(filename, force_ext, template_dict.get('ext'))
|
filename = replace_extension(filename, force_ext, template_dict.get('ext'))
|
||||||
|
|
||||||
|
|
|
@ -1147,13 +1147,18 @@ def parseOpts(overrideArguments=None):
|
||||||
metavar='FIELD:FORMAT', dest='metafromfield', action='append',
|
metavar='FIELD:FORMAT', dest='metafromfield', action='append',
|
||||||
help=(
|
help=(
|
||||||
'Parse additional metadata like title/artist from other fields. '
|
'Parse additional metadata like title/artist from other fields. '
|
||||||
'Give field name to extract data from, and format of the field seperated by a ":". '
|
'Give a template or field name to extract data from and the '
|
||||||
|
'format to interpret it as, seperated by a ":". '
|
||||||
'Either regular expression with named capture groups or a '
|
'Either regular expression with named capture groups or a '
|
||||||
'similar syntax to the output template can also be used. '
|
'similar syntax to the output template can be used for the FORMAT. '
|
||||||
'The parsed parameters replace any existing values and can be use in output template. '
|
'Similarly, the syntax for output template can be used for FIELD '
|
||||||
|
'to parse the data from multiple fields. '
|
||||||
|
'The parsed parameters replace any existing values and can be used in output templates. '
|
||||||
'This option can be used multiple times. '
|
'This option can be used multiple times. '
|
||||||
'Example: --parse-metadata "title:%(artist)s - %(title)s" matches a title like '
|
'Example: --parse-metadata "title:%(artist)s - %(title)s" matches a title like '
|
||||||
'"Coldplay - Paradise". '
|
'"Coldplay - Paradise". '
|
||||||
|
'Example: --parse-metadata "%(series)s %(episode_number)s:%(title)s" '
|
||||||
|
'sets the title using series and episode number. '
|
||||||
'Example (regex): --parse-metadata "description:Artist - (?P<artist>.+?)"'))
|
'Example (regex): --parse-metadata "description:Artist - (?P<artist>.+?)"'))
|
||||||
postproc.add_option(
|
postproc.add_option(
|
||||||
'--xattrs',
|
'--xattrs',
|
||||||
|
|
|
@ -8,7 +8,7 @@ from ..utils import str_or_none
|
||||||
|
|
||||||
|
|
||||||
class MetadataFromFieldPP(PostProcessor):
|
class MetadataFromFieldPP(PostProcessor):
|
||||||
regex = r'(?P<field>\w+):(?P<format>.+)$'
|
regex = r'(?P<in>.+):(?P<out>.+)$'
|
||||||
|
|
||||||
def __init__(self, downloader, formats):
|
def __init__(self, downloader, formats):
|
||||||
PostProcessor.__init__(self, downloader)
|
PostProcessor.__init__(self, downloader)
|
||||||
|
@ -19,11 +19,20 @@ class MetadataFromFieldPP(PostProcessor):
|
||||||
match = re.match(self.regex, f)
|
match = re.match(self.regex, f)
|
||||||
assert match is not None
|
assert match is not None
|
||||||
self._data.append({
|
self._data.append({
|
||||||
'field': match.group('field'),
|
'in': match.group('in'),
|
||||||
'format': match.group('format'),
|
'out': match.group('out'),
|
||||||
'regex': self.format_to_regex(match.group('format'))})
|
'tmpl': self.field_to_template(match.group('in')),
|
||||||
|
'regex': self.format_to_regex(match.group('out')),
|
||||||
|
})
|
||||||
|
|
||||||
def format_to_regex(self, fmt):
|
@staticmethod
|
||||||
|
def field_to_template(tmpl):
|
||||||
|
if re.match(r'\w+$', tmpl):
|
||||||
|
return '%%(%s)s' % tmpl
|
||||||
|
return tmpl
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def format_to_regex(fmt):
|
||||||
r"""
|
r"""
|
||||||
Converts a string like
|
Converts a string like
|
||||||
'%(title)s - %(artist)s'
|
'%(title)s - %(artist)s'
|
||||||
|
@ -37,7 +46,7 @@ class MetadataFromFieldPP(PostProcessor):
|
||||||
# replace %(..)s with regex group and escape other string parts
|
# replace %(..)s with regex group and escape other string parts
|
||||||
for match in re.finditer(r'%\((\w+)\)s', fmt):
|
for match in re.finditer(r'%\((\w+)\)s', fmt):
|
||||||
regex += re.escape(fmt[lastpos:match.start()])
|
regex += re.escape(fmt[lastpos:match.start()])
|
||||||
regex += r'(?P<' + match.group(1) + r'>[^\r\n]+)'
|
regex += r'(?P<%s>[^\r\n]+)' % match.group(1)
|
||||||
lastpos = match.end()
|
lastpos = match.end()
|
||||||
if lastpos < len(fmt):
|
if lastpos < len(fmt):
|
||||||
regex += re.escape(fmt[lastpos:])
|
regex += re.escape(fmt[lastpos:])
|
||||||
|
@ -45,22 +54,16 @@ class MetadataFromFieldPP(PostProcessor):
|
||||||
|
|
||||||
def run(self, info):
|
def run(self, info):
|
||||||
for dictn in self._data:
|
for dictn in self._data:
|
||||||
field, regex = dictn['field'], dictn['regex']
|
tmpl, info_copy = self._downloader.prepare_outtmpl(dictn['tmpl'], info)
|
||||||
if field not in info:
|
data_to_parse = tmpl % info_copy
|
||||||
self.report_warning('Video doesnot have a %s' % field)
|
self.write_debug('Searching for r"%s" in %s' % (dictn['regex'], tmpl))
|
||||||
continue
|
match = re.search(dictn['regex'], data_to_parse)
|
||||||
data_to_parse = str_or_none(info[field])
|
|
||||||
if data_to_parse is None:
|
|
||||||
self.report_warning('Field %s cannot be parsed' % field)
|
|
||||||
continue
|
|
||||||
self.write_debug('Searching for r"%s" in %s' % (regex, field))
|
|
||||||
match = re.search(regex, data_to_parse)
|
|
||||||
if match is None:
|
if match is None:
|
||||||
self.report_warning('Could not interpret video %s as "%s"' % (field, dictn['format']))
|
self.report_warning('Could not interpret video %s as "%s"' % (dictn['in'], dictn['out']))
|
||||||
continue
|
continue
|
||||||
for attribute, value in match.groupdict().items():
|
for attribute, value in match.groupdict().items():
|
||||||
info[attribute] = value
|
info[attribute] = value
|
||||||
self.to_screen('parsed %s from %s: %s' % (attribute, field, value if value is not None else 'NA'))
|
self.to_screen('parsed %s from "%s": %s' % (attribute, dictn['in'], value if value is not None else 'NA'))
|
||||||
return [], info
|
return [], info
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -4205,6 +4205,20 @@ OUTTMPL_TYPES = {
|
||||||
'pl_infojson': 'info.json',
|
'pl_infojson': 'info.json',
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# As of [1] format syntax is:
|
||||||
|
# %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
|
||||||
|
# 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
|
||||||
|
FORMAT_RE = r'''(?x)
|
||||||
|
(?<!%)
|
||||||
|
%
|
||||||
|
\({0}\) # mapping key
|
||||||
|
(?:[#0\-+ ]+)? # conversion flags (optional)
|
||||||
|
(?:\d+)? # minimum field width (optional)
|
||||||
|
(?:\.\d+)? # precision (optional)
|
||||||
|
[hlL]? # length modifier (optional)
|
||||||
|
(?P<type>[diouxXeEfFgGcrs%]) # conversion type
|
||||||
|
'''
|
||||||
|
|
||||||
|
|
||||||
def limit_length(s, length):
|
def limit_length(s, length):
|
||||||
""" Add ellipses to overly long strings """
|
""" Add ellipses to overly long strings """
|
||||||
|
|
Loading…
Reference in a new issue