【每日一题1103】寻找子串

:woman_mage: 给定一个非空字符串words,只包含纯小写字母。请编写一个函数,找出其中最短的一个子字符串,以及它重复的最大次数。

示例:
输入: ababab,输出: ("ab", 3)
输入:abcde,输出:("abcde", 1)

题目难度:中等
题目来源:codewars: Repeated Substring

def find_repeat(words: str) -> tuple:
    pass

assert find_repeat("ababab") == ("ab", 3)
assert find_repeat("abcde") == ("abcde", 1)

占个坑,求简单解

def find_repeat(word:str):
    length=len(word)
    for i in range(len(word)):
        if length%(i+1)==0:
            res2=length/(i+1)
            res1=word[0:(i+1)]
            if res1*int(res2)==word:
                return (res1,int(res2))
            else:
                continue
        else:
            continue
def find_repeat(words: str) -> tuple:
    son_list = [words[i:i + x + 1] for x in range(len(words)) for i in range(len(words) - x)]
    count_dict = {}
    for w in son_list:
        if w not in count_dict.keys():
            count_dict[w] = words.count(w)
        else:
            if words.count(w) > count_dict[w]:
                count_dict[w] = words.count(w)
    count_max = 0
    max_word = ''
    for key, value in count_dict.items():
        if count_dict[key] > count_max:
            count_max = count_dict[key]
            max_word = key
        if count_dict[key] == count_max:
            if len(key) > len(max_word):
                max_word = key
    return max_word, count_max


assert find_repeat("ababab") == ("ab", 3)
assert find_repeat("abcde") == ("abcde", 1)

没看懂,能讲讲思路吗?

:sob: :sob: :sob:,没有思路

没理解错的话可能是这样

def find_repeat(words: str) -> tuple:
    return [(words[0:i], len(words) // i) for i in range(1, len(words)+1) if words[0:i] * (len(words) // i) == words][0]


assert find_repeat("ababab") == ("ab", 3)
assert find_repeat("abcde") == ("abcde", 1)
def find_repeat(words):
    lengh=len(words)
    min_len=float("inf")
    max_count=0
    for i in range(lengh):
        for j in range(i+2,lengh):
                if len(words[i:j])<min_len and words.count(words[i:j])>max_count:
                    min_len=len(words[i:j])
                    max_count=words.count(words[i:j])
                    word=words[i:j]
    if max_count>1:
        return (word,max_count)
    if max_count==1:
        return (words,max_count)

assert find_repeat("ababab") == ("ab", 3)
assert find_repeat("abcde") == ("abcde", 1)

这个思路存在问题

啥问题,请赐教 :grinning:

你这个获得不是子字符串,我如果将words改为abababc,你这边的断言就是错的
assert find_repeat(“abababc”) == (“ab”, 3)

可能题意理解的不一致,我理解的是最短字串且重复n次能匹配word,按你的意思题目中的实例“abcde“,应该得到的结果是(”a",1),而不是(“abcde”,1),同理如果是abababc,得到的结果应该为(abababc,1),而不是(“ab”,3),因为重复3次并不能匹配abababc。 这样才能解释为什么abcde输出的是(“abcde”,1)

是这样的,我对这个题目的描述也很困惑,因为子字符串的确包括a,b等等。
按照你这样的理解,那其实也没有错。
另外,如果words是abababa的话我的断言出来的也有歧义。

我没搞懂(“abcde”, 1)最短的子字符串为啥是 abcde,不应该是 ab,bc,cd,ce 吗?

我理解的是应该存在重复的子字符串吧,这道题我读起来也不是很清晰

def find_repeat(self, word:str):
    '''
    给定一个非空字符串words,只包含纯小写字母。请编写一个函数,找出其中最短的一个子字符串,以及它重复的最大次数。

    示例:
    输入: ababab,输出: ("ab", 3)。
    输入:abcde,输出:("abcde", 1)。
    :return:
    '''
    repeat_list = []
    wordlen = len(word)
    for i in range(0, wordlen):
        for j in range(i+1, wordlen):
            for k in range(2,  wordlen - j + 1):
                if i + k - 1 >= j:
                    break

                if word[j: j+k] == word[i: i+k]:
                    repeat_list.append(word[i: i + k])
                    continue
                else:
                    continue

        pass
    repeat_set = set(repeat_list)
    repeat_dict = {}
    result = tuple((word, 1))
    max_num = 1
    for data in repeat_set:
        count = 1
        for data2 in repeat_list:
            if data == data2:
                count +=1
        repeat_dict[data] = count
        if count > max_num:
            result = (data, count)
    print(repeat_list)
    print(repeat_dict)
    return result

def find_repeat(words: str) → tuple:

words_length = len(words)

temp_word = ""

min_length = words_length

min_word = words

max_times = 1

temp_times = 1

for i in range(1,words_length+1):

    for j in range(0,words_length-i+1):

        """"子字符串长度大于1时,需要再多一个循环"""

        if i > 1:

            for k in range(j,words_length+i-1,i):

                if temp_word == words[k:i+k]:

                    temp_length = i

                    temp_times += 1

                    if temp_length <= min_length:

                        if temp_times > max_times:

                            min_length = temp_length

                            max_times = temp_times

                            min_word  = temp_word

                else:

                    temp_word = words[k:i+k]

                    temp_times = 1

            """"子字符串长度小于1时"""

        else:

            if temp_word == words[j:i+j]:

                temp_length = i

                temp_times += 1

                if temp_length <= min_length:

                    if temp_times > max_times:

                        min_length = temp_length

                        max_times = temp_times

                        min_word  = temp_word

            else:

                temp_word = words[j:i+j]

                temp_times = 1

print(min_word,max_times)

return (min_word,max_times)

assert find_repeat(“ababab”) == (“ab”, 3)

assert find_repeat(“abcde”) == (“abcde”, 1)

assert find_repeat(“1baba1”) == (“ba”, 2)

assert find_repeat(“11abab1”) == (“1”, 2)

根据断言觉得可能是如果最大次数的字符串有多个,那么就取最长的字符串,我是这么理解的

def find_repeat(words: str) -> tuple:
    dic1={}
    list2=[]
    for i in range(len(words)):
        for j in range(i+2,len(words)+1):
            dic1[words[i:j]]=words.count(words[i:j])
    list1=sorted(dic1.items(),key=lambda xy:xy[1],reverse=True)
    max_num=max(list1,key=lambda x:x[1])[1]
    for j in list1:
        if j[1]==max_num:
            list2.append(j)
    if len(list2)>1:
        return max(list2,key=lambda x:len(x[0]))
    else:
        return list2[0]
assert find_repeat("ababab") == ("ab", 3)
assert find_repeat("abcde") == ("abcde", 1)

def find_repeat(words: str) -> tuple:
    m = 0
    ord_num = 97
    str_list = []
    return_str = ''
    while m < len(list(words)):
        str = ''
        n = 0
        for n in range(n, len(list(words))):
            if ord(words[n]) == ord_num + n:
                str += words[n]
                n += 1
            else:
                break
        if str != '':
            str_list.append(str)
        m += 1
    if len(str_list) == 1:
        return_str = str_list[0]
    else:
        return_str = str_list[0]
        for i in range(1, len(str_list)):
            if len(str_list[i]) < len(list(return_str)):
                return_str = str_list[i]
    return (return_str, words.count(return_str))



assert find_repeat("ababab") == ("ab", 3)
assert find_repeat("abcde") == ("abcde", 1)

这道题感觉理解起来有些歧义,不太懂到底子串是要怎么计算;
比如abcabcbc的结果到底应该是啥?是a,2,还是bc,4


def find_repeat(words: str) -> tuple:
    result = {}
    substr = ""
    for w in words:
        if substr == "": substr = w
        else:
            last_count = words.count(substr)
            count = words.count(substr+w)
            if count >= last_count:
                substr += w
            else:
                result[substr] = last_count
                if len(substr)*last_count == len(words): break
                substr = w
    result[substr] = words.count(substr)
    ss,count = "",0
    for key in result:
        if ss == "" or len(key)<len(ss):
            ss = key
            count = result[key]
    return (ss,count)


assert find_repeat("ababab") == ("ab", 3)
assert find_repeat("abcde") == ("abcde", 1)

assert find_repeat("a") == ("a", 1)
assert find_repeat("ab") == ("ab", 1)
assert find_repeat("abcabcbc") == ("bc", 3)
assert find_repeat("bcabcabcbc") == ("bc",4)
assert find_repeat("adeadebcbc") == ("bc", 2)
assert find_repeat("bcbcadeade") == ("bc", 2)