【每日一题20220907】混合字符串的的比较

:woman_artist: 给定两个字符串 s1 和 s2,我们希望可视化这两个字符串的差异。我们只考虑小写字母(a到z)。首先,让我们计算 s1 和 s2 中每个小写字母的频率。

s1 = "A aaaa bb c"

s2 = "& aaa bbb c d"

s1 拥有 4个 'a', 2个 'b', 1个 'c'

s2 拥有 3个 'a', 3个 'b', 1个 'c', 1个 'd'

因此,s1 和 s2 中 ‘a’ 的最大值是 s1 中的 4;“b”的最大值为 s2 中的 3。在下文中,当字母的出现次数最大值不大于 1 时,我们将不统计。

我们可以在以下字符串中恢复 s1 和 s2 之间的差异:
两个列表中最大数量的a的个数是4,而最大的个数是在1中,所以表示为"1:aaaa",两个列表中最大数量的b的个数是3,最大的个数是在2中,所以表示为"2:bbb",其他字母长度为1,忽略,最后排序后用"/"连接起来结果为 "1:aaaa/2:bbb"

任务是生成一个字符串,其中s1或s2的每个小写字母出现与其最大值一样多的次数,如果此最大值严格大于1;这些字母将以字符串的编号为前缀,如果它们的最大值同时分布在 s1 和 s2 中,则前缀为"=:"

在结果中,子字符串(子字符串例如为2:nnnnn或1:hhh;它包含前缀)将按其长度的递减顺序排列,当它们具有相同的长度时按升序词典顺序排序;不同的组将由“/”分隔。具体来讲,先根据最大的字母的长度进行降叙排序,然后再针对相同长度的数据根据归属第一、第二和相等的顺序排序,最后再把相同的归属的同长度元素进行字母序升序排序,最后使用"/"进行分隔。

详细的例子如下。

s1 = "my&friend&Paul has heavy hats! &"
s2 = "my friend John has many many friends &"
mix(s1, s2) --> "2:nnnnn/1:aaaa/1:hhh/2:mmm/2:yyy/2:dd/2:ff/2:ii/2:rr/=:ee/=:ss"

s1 = "mmmmm m nnnnn y&friend&Paul has heavy hats! &"
s2 = "my frie n d Joh n has ma n y ma n y frie n ds n&"
mix(s1, s2) --> "1:mmmmmm/=:nnnnnn/1:aaaa/1:hhh/2:yyy/2:dd/2:ff/2:ii/2:rr/=:ee/=:ss"

s1="Are the kids at home? aaaaa fffff"
s2="Yes they are here! aaaaa fffff"
mix(s1, s2) --> "=:aaaaaa/2:eeeee/=:fffff/1:tt/2:rr/=:hh"

题目难度:一般
题目来源:https://www.codewars.com/kata/5629db57620258aa9d000014

def mix(s1: str, s2: str) -> str:
    # your code here

assert mix("Are they here", "yes, they are here") ==  "2:eeeee/2:yy/=:hh/=:rr"
assert mix("Sadus:cpms>orqn3zecwGvnznSgacs","MynwdKizfd$lvse+gnbaGydxyXzayp") == '2:yyyy/1:ccc/1:nnn/1:sss/2:ddd/=:aa/=:zz'
assert mix("looping is fun but dangerous", "less dangerous than coding") == "1:ooo/1:uuu/2:sss/=:nnn/1:ii/2:aa/2:dd/2:ee/=:gg"
assert mix(" In many languages", " there's a pair of functions") == "1:aaa/1:nnn/1:gg/2:ee/2:ff/2:ii/2:oo/2:rr/2:ss/2:tt"
assert mix("Lords of the Fallen", "gamekult") == "1:ee/1:ll/1:oo"
assert mix("hogwarts", "hogwarts") ==  ""
assert mix("A generation must confront the looming ", "hogwarrts") == "1:nnnnn/1:ooooo/1:tttt/1:eee/1:gg/1:ii/1:mm/=:rr"
assert mix("Are the kids at home? aaaaa fffff", "Yes they are here! aaaaa fffff") ==  "=:aaaaaa/2:eeeee/=:fffff/1:tt/2:rr/=:hh"
assert mix("mmmmm m nnnnn y&friend&Paul has heavy hats! &", "my frie n d Joh n has ma n y ma n y frie n ds n&") ==  "1:mmmmmm/=:nnnnnn/1:aaaa/1:hhh/2:yyy/2:dd/2:ff/2:ii/2:rr/=:ee/=:ss"
import re
def mix(s1: str, s2: str):
    return_str = ""
    s1 = re.sub("[^a-z]", "", s1)
    s2 = re.sub("[^a-z]", "", s2)
    dic_1 = {i: s1.count(i) for i in s1 if s1.count(i) > 1}
    dic_2 = {i: s2.count(i) for i in s2 if s2.count(i) > 1}
    for k1, v1 in dic_1.items():
        if k1 in dic_2.keys():
            if v1 > dic_2[k1]:
                return_str += f"1:{k1 * v1}/"
            elif v1 == dic_2[k1]:
                return_str += f"=:{k1 * v1}/"
            else:
                return_str += f"2:{k1 * dic_2[k1]}/"
            dic_2.pop(k1)
        else:
            return_str += f"1:{k1 * v1}/"
    if len(dic_2) != 0:
        for k2, v2 in dic_2.items():
            return_str += f"2:{k2 * v2}/"

    def list_sort(li: list):
        len_str = len(li[1])
        t = li[0]
        str_ = li[1][0]
        t_dic = {"=": -1, "2": 1, "1": 2}
        return len_str, t_dic[t], -ord(str_)
    li_list = re.findall("([12=]):(.*?)/", return_str)
    print(li_list)
    li_list = sorted(li_list, key=list_sort, reverse=True)
    return_str_ = ""
    for i, j in li_list:
        return_str_ += f"{i}:{j}/"
    return return_str_[:-1]
def gen_letter_frequency(string: str):
    """
    生成字母频率
    :param string: 字符串
    :return: (字母, 字母频率)
    """
    for letter in set(string):
        if letter.islower() and letter.isalpha():
            if string.count(letter) > 1:
                yield letter, string.count(letter)

def gen_format_letter(dict1: dict, dict2: dict):
    """
    生成格式化字母
    :param dict1: 字典
    :param dict2: 字典
    :return: (字母频率, 前缀, 字母串)
    """
    # 合并字典键
    keys = set(dict1) | set(dict2)
    for key in keys:
        value1, value2 = dict1.get(key, 0), dict2.get(key, 0),
        if value1 > value2:
            yield value1, "1:", key * value1
        elif value1 < value2:
            yield value2, "2:", key * value2
        elif value1 == value2:
            yield value1, "=:", key * value1

def mix(s1: str, s2: str) -> str:
    """
    字符串比较
    :param s1: 字符串1
    :param s2: 字符串2
    :return: 格式化后的比较结果
    """
    # 生成字母频率
    letter_freq1 = gen_letter_frequency(s1)
    letter_freq2 = gen_letter_frequency(s2)
    # 生成格式化字母
    form_letter = gen_format_letter(dict(letter_freq1), dict(letter_freq2))
    # 多级排序
    sort_letter = sorted(sorted(sorted(form_letter, key=lambda x: x[2], reverse=False),
                                key=lambda x: x[1], reverse=False), key=lambda x: x[0], reverse=True)

    return "/".join([f"{item[1]}{item[2]}" for item in sort_letter])


assert mix("Are they here", "yes, they are here") == "2:eeeee/2:yy/=:hh/=:rr"
assert mix("Sadus:cpms>orqn3zecwGvnznSgacs","MynwdKizfd$lvse+gnbaGydxyXzayp") == '2:yyyy/1:ccc/1:nnn/1:sss/2:ddd/=:aa/=:zz'
assert mix("looping is fun but dangerous", "less dangerous than coding") == "1:ooo/1:uuu/2:sss/=:nnn/1:ii/2:aa/2:dd/2:ee/=:gg"
assert mix(" In many languages", " there's a pair of functions") == "1:aaa/1:nnn/1:gg/2:ee/2:ff/2:ii/2:oo/2:rr/2:ss/2:tt"
assert mix("Lords of the Fallen", "gamekult") == "1:ee/1:ll/1:oo"
assert mix("hogwarts", "hogwarts") ==  ""
assert mix("A generation must confront the looming ", "hogwarrts") == "1:nnnnn/1:ooooo/1:tttt/1:eee/1:gg/1:ii/1:mm/=:rr"
assert mix("Are the kids at home? aaaaa fffff", "Yes they are here! aaaaa fffff") ==  "=:aaaaaa/2:eeeee/=:fffff/1:tt/2:rr/=:hh"
assert mix("mmmmm m nnnnn y&friend&Paul has heavy hats! &", "my frie n d Joh n has ma n y ma n y frie n ds n&") ==  "1:mmmmmm/=:nnnnnn/1:aaaa/1:hhh/2:yyy/2:dd/2:ff/2:ii/2:rr/=:ee/=:ss"

def mix(s1: str, s2: str) -> str:
    total = set(s1) | set(s2)
    res = []
    for i in total:
        if str.islower(i) and max(s1.count(i),s2.count(i))>1:
            if s2.count(i)>s1.count(i):
                res.append((2,i*s2.count(i)))
            elif s2.count(i)<s1.count(i):
                res.append((1, i * s1.count(i)))
            else:
                res.append((3, i * s1.count(i)))
    return '/'.join([(str(i[0])+":"+i[1]).replace('3','=') for i in sorted(res,key=lambda x:(-len(x[1]),x[0],x[1]))])
def mix(s1: str, s2: str) -> str:
    res_set=set()
    x1=re.sub(r'[^a-z]','',s1)
    x2=re.sub(r'[^a-z]','',s2)
    dict1={i:x1.count(i) for i in x1 if x1.count(i)>1}
    dict2={i:x2.count(i) for i in x2 if x2.count(i)>1}
    for i in dict1:
        if i not in dict2.keys():
            res_set.add(("1",i*dict1[i]))
        else:
            for j in dict2:
                if j not in dict1.keys():
                    res_set.add(("2",j*dict2[j]))
                else:
                    if i == j:
                        if dict1[i]<dict2[j]:
                            res_set.add(("2",j*dict2[j]))
                        elif dict1[i]>dict2[j]:
                            res_set.add(("1",i*dict1[i]))
                        else:
                            res_set.add(("=",i*dict1[i]))
    return '/'.join([i[0]+':'+i[1] for i in sorted(res_set,key=lambda x:(-len(x[1]),x[0],x[1]))])


assert mix("Are they here", "yes, they are here") ==  "2:eeeee/2:yy/=:hh/=:rr"
assert mix("Sadus:cpms>orqn3zecwGvnznSgacs","MynwdKizfd$lvse+gnbaGydxyXzayp") == '2:yyyy/1:ccc/1:nnn/1:sss/2:ddd/=:aa/=:zz'
assert mix("looping is fun but dangerous", "less dangerous than coding") == "1:ooo/1:uuu/2:sss/=:nnn/1:ii/2:aa/2:dd/2:ee/=:gg"
assert mix(" In many languages", " there's a pair of functions") == "1:aaa/1:nnn/1:gg/2:ee/2:ff/2:ii/2:oo/2:rr/2:ss/2:tt"
assert mix("Lords of the Fallen", "gamekult") == "1:ee/1:ll/1:oo"
assert mix("hogwarts", "hogwarts") ==  ""
assert mix("A generation must confront the looming ", "hogwarrts") == "1:nnnnn/1:ooooo/1:tttt/1:eee/1:gg/1:ii/1:mm/=:rr"
assert mix("Are the kids at home? aaaaa fffff", "Yes they are here! aaaaa fffff") ==  "=:aaaaaa/2:eeeee/=:fffff/1:tt/2:rr/=:hh"
assert mix("mmmmm m nnnnn y&friend&Paul has heavy hats! &", "my frie n d Joh n has ma n y ma n y frie n ds n&") ==  "1:mmmmmm/=:nnnnnn/1:aaaa/1:hhh/2:yyy/2:dd/2:ff/2:ii/2:rr/=:ee/=:ss"