From 32310b7c9f7b2c14111a8ebc08c69cc6e9e522d1 Mon Sep 17 00:00:00 2001 From: Samantaz Fox Date: Fri, 6 Oct 2023 08:18:26 +0200 Subject: [PATCH 1/2] i18Next: Add exceptions for mixed v3/v4 plural forms --- spec/i18next_plurals_spec.cr | 65 +++++++++++++++++----------- src/invidious/helpers/i18next.cr | 73 +++++++++++++++++++++++++++----- 2 files changed, 103 insertions(+), 35 deletions(-) diff --git a/spec/i18next_plurals_spec.cr b/spec/i18next_plurals_spec.cr index ee9ff394..57a93340 100644 --- a/spec/i18next_plurals_spec.cr +++ b/spec/i18next_plurals_spec.cr @@ -15,12 +15,15 @@ FORM_TESTS = { "ar" => I18next::Plurals::PluralForms::Special_Arabic, "be" => I18next::Plurals::PluralForms::Dual_Slavic, "cy" => I18next::Plurals::PluralForms::Special_Welsh, + "fr" => I18next::Plurals::PluralForms::Special_French_Portuguese, "en" => I18next::Plurals::PluralForms::Single_not_one, - "fr" => I18next::Plurals::PluralForms::Single_gt_one, + "es" => I18next::Plurals::PluralForms::Special_Spanish_Italian, "ga" => I18next::Plurals::PluralForms::Special_Irish, "gd" => I18next::Plurals::PluralForms::Special_Scottish_Gaelic, "he" => I18next::Plurals::PluralForms::Special_Hebrew, + "hr" => I18next::Plurals::PluralForms::Special_Hungarian_Serbian, "is" => I18next::Plurals::PluralForms::Special_Icelandic, + "it" => I18next::Plurals::PluralForms::Special_Spanish_Italian, "jv" => I18next::Plurals::PluralForms::Special_Javanese, "kw" => I18next::Plurals::PluralForms::Special_Cornish, "lt" => I18next::Plurals::PluralForms::Special_Lithuanian, @@ -30,13 +33,13 @@ FORM_TESTS = { "mt" => I18next::Plurals::PluralForms::Special_Maltese, "or" => I18next::Plurals::PluralForms::Special_Odia, "pl" => I18next::Plurals::PluralForms::Special_Polish_Kashubian, - "pt" => I18next::Plurals::PluralForms::Single_gt_one, - "pt-PT" => I18next::Plurals::PluralForms::Single_not_one, - "pt-BR" => I18next::Plurals::PluralForms::Single_gt_one, + "pt" => I18next::Plurals::PluralForms::Special_French_Portuguese, + "pt-PT" => I18next::Plurals::PluralForms::Special_French_Portuguese, "ro" => I18next::Plurals::PluralForms::Special_Romanian, - "su" => I18next::Plurals::PluralForms::None, "sk" => I18next::Plurals::PluralForms::Special_Czech_Slovak, "sl" => I18next::Plurals::PluralForms::Special_Slovenian, + "su" => I18next::Plurals::PluralForms::None, + "sr" => I18next::Plurals::PluralForms::Special_Hungarian_Serbian, } SUFFIX_TESTS = { @@ -73,10 +76,18 @@ SUFFIX_TESTS = { {num: 1, suffix: ""}, {num: 10, suffix: "_plural"}, ], + "es" => [ + {num: 0, suffix: "_2"}, + {num: 1, suffix: "_0"}, + {num: 10, suffix: "_2"}, + {num: 6_000_000, suffix: "_1"}, + ], "fr" => [ - {num: 0, suffix: ""}, - {num: 1, suffix: ""}, - {num: 10, suffix: "_plural"}, + {num: 0, suffix: "_0"}, + {num: 1, suffix: "_0"}, + {num: 10, suffix: "_2"}, + {num: 4_000_000, suffix: "_1"}, + {num: 6_260_000, suffix: "_2"}, ], "ga" => [ {num: 1, suffix: "_0"}, @@ -156,30 +167,24 @@ SUFFIX_TESTS = { {num: 5, suffix: "_2"}, ], "pt" => [ - {num: 0, suffix: ""}, - {num: 1, suffix: ""}, - {num: 10, suffix: "_plural"}, + {num: 0, suffix: "_0"}, + {num: 1, suffix: "_0"}, + {num: 10, suffix: "_2"}, + {num: 42, suffix: "_2"}, + {num: 9_000_000, suffix: "_1"}, ], "pt-PT" => [ - {num: 0, suffix: "_plural"}, - {num: 1, suffix: ""}, - {num: 10, suffix: "_plural"}, - ], - "pt-BR" => [ - {num: 0, suffix: ""}, - {num: 1, suffix: ""}, - {num: 10, suffix: "_plural"}, + {num: 0, suffix: "_0"}, + {num: 1, suffix: "_0"}, + {num: 10, suffix: "_2"}, + {num: 42, suffix: "_2"}, + {num: 9_000_000, suffix: "_1"}, ], "ro" => [ {num: 0, suffix: "_1"}, {num: 1, suffix: "_0"}, {num: 20, suffix: "_2"}, ], - "su" => [ - {num: 0, suffix: "_0"}, - {num: 1, suffix: "_0"}, - {num: 10, suffix: "_0"}, - ], "sk" => [ {num: 0, suffix: "_2"}, {num: 1, suffix: "_0"}, @@ -191,6 +196,18 @@ SUFFIX_TESTS = { {num: 2, suffix: "_2"}, {num: 3, suffix: "_3"}, ], + "su" => [ + {num: 0, suffix: "_0"}, + {num: 1, suffix: "_0"}, + {num: 10, suffix: "_0"}, + ], + "sr" => [ + {num: 1, suffix: "_0"}, + {num: 51, suffix: "_0"}, + {num: 32, suffix: "_1"}, + {num: 100, suffix: "_2"}, + {num: 100_000, suffix: "_2"}, + ], } Spectator.describe "i18next_Plural_Resolver" do diff --git a/src/invidious/helpers/i18next.cr b/src/invidious/helpers/i18next.cr index e84f88fb..a857c67b 100644 --- a/src/invidious/helpers/i18next.cr +++ b/src/invidious/helpers/i18next.cr @@ -35,19 +35,25 @@ module I18next::Plurals Special_Slovenian = 21 Special_Hebrew = 22 Special_Odia = 23 + + # Mixed v3/v4 rules in Weblate + # See: https://github.com/translate/translate/issues/4873 + Special_French_Portuguese + Special_Hungarian_Serbian + Special_Spanish_Italian end private PLURAL_SETS = { PluralForms::Single_gt_one => [ - "ach", "ak", "am", "arn", "br", "fil", "fr", "gun", "ln", "mfe", "mg", - "mi", "oc", "pt", "pt-BR", "tg", "tl", "ti", "tr", "uz", "wa", + "ach", "ak", "am", "arn", "br", "fil", "gun", "ln", "mfe", "mg", + "mi", "oc", "tg", "tl", "ti", "tr", "uz", "wa", ], PluralForms::Single_not_one => [ "af", "an", "ast", "az", "bg", "bn", "ca", "da", "de", "dev", "el", "en", - "eo", "es", "et", "eu", "fi", "fo", "fur", "fy", "gl", "gu", "ha", "hi", - "hu", "hy", "ia", "it", "kk", "kn", "ku", "lb", "mai", "ml", "mn", "mr", + "eo", "et", "eu", "fi", "fo", "fur", "fy", "gl", "gu", "ha", "hi", + "hu", "hy", "ia", "kk", "kn", "ku", "lb", "mai", "ml", "mn", "mr", "nah", "nap", "nb", "ne", "nl", "nn", "no", "nso", "pa", "pap", "pms", - "ps", "pt-PT", "rm", "sco", "se", "si", "so", "son", "sq", "sv", "sw", + "ps", "rm", "sco", "se", "si", "so", "son", "sq", "sv", "sw", "ta", "te", "tk", "ur", "yo", ], PluralForms::None => [ @@ -55,7 +61,7 @@ module I18next::Plurals "lo", "ms", "sah", "su", "th", "tt", "ug", "vi", "wo", "zh", ], PluralForms::Dual_Slavic => [ - "be", "bs", "cnr", "dz", "hr", "ru", "sr", "uk", + "be", "bs", "cnr", "dz", "ru", "uk", ], } @@ -81,6 +87,13 @@ module I18next::Plurals "ro" => PluralForms::Special_Romanian, "sk" => PluralForms::Special_Czech_Slovak, "sl" => PluralForms::Special_Slovenian, + # Mixed v3/v4 rules + "es" => PluralForms::Special_Spanish_Italian, + "fr" => PluralForms::Special_French_Portuguese, + "hr" => PluralForms::Special_Hungarian_Serbian, + "it" => PluralForms::Special_Spanish_Italian, + "pt" => PluralForms::Special_French_Portuguese, + "sr" => PluralForms::Special_Hungarian_Serbian, } # These are the v1 and v2 compatible suffixes. @@ -150,11 +163,8 @@ module I18next::Plurals end def get_plural_form(locale : String) : PluralForms - # Extract the ISO 639-1 or 639-2 code from an RFC 5646 language code, - # except for pt-BR and pt-PT which needs to be kept as-is. - if !locale.matches?(/^pt-(BR|PT)$/) - locale = locale.split('-')[0] - end + # Extract the ISO 639-1 or 639-2 code from an RFC 5646 language code + locale = locale.split('-')[0] return self.forms[locale] if self.forms[locale]? @@ -246,6 +256,10 @@ module I18next::Plurals when .special_slovenian? then return special_slovenian(count) when .special_hebrew? then return special_hebrew(count) when .special_odia? then return special_odia(count) + # Mixed v3/v4 forms + when .special_spanish_italian? then return special_cldr_Spanish_Italian(count) + when .special_french_portuguese? then return special_cldr_French_Portuguese(count) + when .special_hungarian_serbian? then return special_cldr_Hungarian_Serbian(count) else # default, if nothing matched above return 0_u8 @@ -507,5 +521,42 @@ module I18next::Plurals def self.special_odia(count : Int) : UInt8 return (count == 1) ? 0_u8 : 1_u8 end + + # ------------------- + # "v3.5" rules + # ------------------- + + # Plural form for Spanish & Italian languages + # + # This rule is mostly compliant to CLDR v42 + # + def self.special_cldr_Spanish_Italian(count : Int) : UInt8 + return 0_u8 if (count == 1) # one + return 1_u8 if (count != 0 && count % 1_000_000 == 0) # many + return 2_u8 # other + end + + # Plural form for French and Portuguese + # + # This rule is mostly compliant to CLDR v42 + # + def self.special_cldr_French_Portuguese(count : Int) : UInt8 + return 0_u8 if (count == 0 || count == 1) # one + return 1_u8 if (count % 1_000_000 == 0) # many + return 2_u8 # other + end + + # Plural form for Hungarian and Serbian + # + # This rule is mostly compliant to CLDR v42 + # + def self.special_cldr_Hungarian_Serbian(count : Int) : UInt8 + n_mod_10 = count % 10 + n_mod_100 = count % 100 + + return 0_u8 if (n_mod_10 == 1 && n_mod_100 != 11) # one + return 1_u8 if (2 <= n_mod_10 <= 4 && (n_mod_100 < 12 || 14 < n_mod_100)) # few + return 2_u8 # other + end end end From f26c9953442f3a7d5ef7046be58ace4557dbce36 Mon Sep 17 00:00:00 2001 From: Samantaz Fox Date: Sat, 7 Oct 2023 19:12:17 +0200 Subject: [PATCH 2/2] i18next: Revert some changes, as es/pt/pt-PT aren't mixed up (yet) --- spec/i18next_plurals_spec.cr | 25 ++++++++++++------------- src/invidious/helpers/i18next.cr | 21 ++++++++++++--------- 2 files changed, 24 insertions(+), 22 deletions(-) diff --git a/spec/i18next_plurals_spec.cr b/spec/i18next_plurals_spec.cr index 57a93340..dab97710 100644 --- a/spec/i18next_plurals_spec.cr +++ b/spec/i18next_plurals_spec.cr @@ -17,7 +17,7 @@ FORM_TESTS = { "cy" => I18next::Plurals::PluralForms::Special_Welsh, "fr" => I18next::Plurals::PluralForms::Special_French_Portuguese, "en" => I18next::Plurals::PluralForms::Single_not_one, - "es" => I18next::Plurals::PluralForms::Special_Spanish_Italian, + "es" => I18next::Plurals::PluralForms::Single_not_one, "ga" => I18next::Plurals::PluralForms::Special_Irish, "gd" => I18next::Plurals::PluralForms::Special_Scottish_Gaelic, "he" => I18next::Plurals::PluralForms::Special_Hebrew, @@ -33,8 +33,8 @@ FORM_TESTS = { "mt" => I18next::Plurals::PluralForms::Special_Maltese, "or" => I18next::Plurals::PluralForms::Special_Odia, "pl" => I18next::Plurals::PluralForms::Special_Polish_Kashubian, - "pt" => I18next::Plurals::PluralForms::Special_French_Portuguese, - "pt-PT" => I18next::Plurals::PluralForms::Special_French_Portuguese, + "pt" => I18next::Plurals::PluralForms::Single_gt_one, + "pt-BR" => I18next::Plurals::PluralForms::Special_French_Portuguese, "ro" => I18next::Plurals::PluralForms::Special_Romanian, "sk" => I18next::Plurals::PluralForms::Special_Czech_Slovak, "sl" => I18next::Plurals::PluralForms::Special_Slovenian, @@ -77,10 +77,10 @@ SUFFIX_TESTS = { {num: 10, suffix: "_plural"}, ], "es" => [ - {num: 0, suffix: "_2"}, - {num: 1, suffix: "_0"}, - {num: 10, suffix: "_2"}, - {num: 6_000_000, suffix: "_1"}, + {num: 0, suffix: "_plural"}, + {num: 1, suffix: ""}, + {num: 10, suffix: "_plural"}, + {num: 6_000_000, suffix: "_plural"}, ], "fr" => [ {num: 0, suffix: "_0"}, @@ -166,7 +166,7 @@ SUFFIX_TESTS = { {num: 1, suffix: "_0"}, {num: 5, suffix: "_2"}, ], - "pt" => [ + "pt-BR" => [ {num: 0, suffix: "_0"}, {num: 1, suffix: "_0"}, {num: 10, suffix: "_2"}, @@ -174,11 +174,10 @@ SUFFIX_TESTS = { {num: 9_000_000, suffix: "_1"}, ], "pt-PT" => [ - {num: 0, suffix: "_0"}, - {num: 1, suffix: "_0"}, - {num: 10, suffix: "_2"}, - {num: 42, suffix: "_2"}, - {num: 9_000_000, suffix: "_1"}, + {num: 0, suffix: ""}, + {num: 1, suffix: ""}, + {num: 10, suffix: "_plural"}, + {num: 9_000_000, suffix: "_plural"}, ], "ro" => [ {num: 0, suffix: "_1"}, diff --git a/src/invidious/helpers/i18next.cr b/src/invidious/helpers/i18next.cr index a857c67b..252af6b9 100644 --- a/src/invidious/helpers/i18next.cr +++ b/src/invidious/helpers/i18next.cr @@ -37,6 +37,8 @@ module I18next::Plurals Special_Odia = 23 # Mixed v3/v4 rules in Weblate + # `es`, `pt` and `pt-PT` doesn't seem to have been refreshed + # by weblate yet, but I suspect it will happen one day. # See: https://github.com/translate/translate/issues/4873 Special_French_Portuguese Special_Hungarian_Serbian @@ -46,11 +48,11 @@ module I18next::Plurals private PLURAL_SETS = { PluralForms::Single_gt_one => [ "ach", "ak", "am", "arn", "br", "fil", "gun", "ln", "mfe", "mg", - "mi", "oc", "tg", "tl", "ti", "tr", "uz", "wa", + "mi", "oc", "pt", "tg", "tl", "ti", "tr", "uz", "wa", ], PluralForms::Single_not_one => [ "af", "an", "ast", "az", "bg", "bn", "ca", "da", "de", "dev", "el", "en", - "eo", "et", "eu", "fi", "fo", "fur", "fy", "gl", "gu", "ha", "hi", + "eo", "es", "et", "eu", "fi", "fo", "fur", "fy", "gl", "gu", "ha", "hi", "hu", "hy", "ia", "kk", "kn", "ku", "lb", "mai", "ml", "mn", "mr", "nah", "nap", "nb", "ne", "nl", "nn", "no", "nso", "pa", "pap", "pms", "ps", "rm", "sco", "se", "si", "so", "son", "sq", "sv", "sw", @@ -88,12 +90,11 @@ module I18next::Plurals "sk" => PluralForms::Special_Czech_Slovak, "sl" => PluralForms::Special_Slovenian, # Mixed v3/v4 rules - "es" => PluralForms::Special_Spanish_Italian, - "fr" => PluralForms::Special_French_Portuguese, - "hr" => PluralForms::Special_Hungarian_Serbian, - "it" => PluralForms::Special_Spanish_Italian, - "pt" => PluralForms::Special_French_Portuguese, - "sr" => PluralForms::Special_Hungarian_Serbian, + "fr" => PluralForms::Special_French_Portuguese, + "hr" => PluralForms::Special_Hungarian_Serbian, + "it" => PluralForms::Special_Spanish_Italian, + "pt-BR" => PluralForms::Special_French_Portuguese, + "sr" => PluralForms::Special_Hungarian_Serbian, } # These are the v1 and v2 compatible suffixes. @@ -164,7 +165,9 @@ module I18next::Plurals def get_plural_form(locale : String) : PluralForms # Extract the ISO 639-1 or 639-2 code from an RFC 5646 language code - locale = locale.split('-')[0] + if !locale.matches?(/^pt-BR$/) + locale = locale.split('-')[0] + end return self.forms[locale] if self.forms[locale]?