【每日一题1103】寻找子串

Amoyshmily · 2021 年11 月 3 日 03:05

给定一个非空字符串words，只包含纯小写字母。请编写一个函数，找出其中最短的一个子字符串，以及它重复的最大次数。

示例：
输入： ababab，输出： ("ab", 3)。
输入：abcde，输出：("abcde", 1)。

题目难度：中等
题目来源：codewars: Repeated Substring

def find_repeat(words: str) -> tuple:
    pass

assert find_repeat("ababab") == ("ab", 3)
assert find_repeat("abcde") == ("abcde", 1)

fwj · 2021 年11 月 3 日 03:44

占个坑，求简单解

def find_repeat(word:str):
    length=len(word)
    for i in range(len(word)):
        if length%(i+1)==0:
            res2=length/(i+1)
            res1=word[0:(i+1)]
            if res1*int(res2)==word:
                return (res1,int(res2))
            else:
                continue
        else:
            continue

nobugs · 2021 年11 月 3 日 03:56

def find_repeat(words: str) -> tuple:
    son_list = [words[i:i + x + 1] for x in range(len(words)) for i in range(len(words) - x)]
    count_dict = {}
    for w in son_list:
        if w not in count_dict.keys():
            count_dict[w] = words.count(w)
        else:
            if words.count(w) > count_dict[w]:
                count_dict[w] = words.count(w)
    count_max = 0
    max_word = ''
    for key, value in count_dict.items():
        if count_dict[key] > count_max:
            count_max = count_dict[key]
            max_word = key
        if count_dict[key] == count_max:
            if len(key) > len(max_word):
                max_word = key
    return max_word, count_max


assert find_repeat("ababab") == ("ab", 3)
assert find_repeat("abcde") == ("abcde", 1)

nobugs · 2021 年11 月 3 日 03:57

没看懂，能讲讲思路吗？

Mooii · 2021 年11 月 3 日 04:20

，没有思路

nobugs · 2021 年11 月 3 日 05:29

没理解错的话可能是这样

def find_repeat(words: str) -> tuple:
    return [(words[0:i], len(words) // i) for i in range(1, len(words)+1) if words[0:i] * (len(words) // i) == words][0]


assert find_repeat("ababab") == ("ab", 3)
assert find_repeat("abcde") == ("abcde", 1)

jisonglin · 2021 年11 月 3 日 06:51

def find_repeat(words):
    lengh=len(words)
    min_len=float("inf")
    max_count=0
    for i in range(lengh):
        for j in range(i+2,lengh):
                if len(words[i:j])<min_len and words.count(words[i:j])>max_count:
                    min_len=len(words[i:j])
                    max_count=words.count(words[i:j])
                    word=words[i:j]
    if max_count>1:
        return (word,max_count)
    if max_count==1:
        return (words,max_count)

assert find_repeat("ababab") == ("ab", 3)
assert find_repeat("abcde") == ("abcde", 1)

jisonglin · 2021 年11 月 3 日 06:52

这个思路存在问题

fwj · 2021 年11 月 3 日 07:29

啥问题，请赐教

jisonglin · 2021 年11 月 3 日 08:51

你这个获得不是子字符串，我如果将words改为abababc，你这边的断言就是错的
assert find_repeat(“abababc”) == (“ab”, 3)

fwj · 2021 年11 月 3 日 09:00

可能题意理解的不一致，我理解的是最短字串且重复n次能匹配word，按你的意思题目中的实例“abcde“，应该得到的结果是（”a"，1），而不是（“abcde”，1），同理如果是abababc，得到的结果应该为（abababc，1），而不是（“ab”，3），因为重复3次并不能匹配abababc。这样才能解释为什么abcde输出的是（“abcde”，1）

jisonglin · 2021 年11 月 3 日 09:08

是这样的，我对这个题目的描述也很困惑，因为子字符串的确包括a，b等等。
按照你这样的理解，那其实也没有错。
另外，如果words是abababa的话我的断言出来的也有歧义。

Stone1688 · 2021 年11 月 4 日 02:25

我没搞懂(“abcde”, 1)最短的子字符串为啥是 abcde，不应该是 ab，bc，cd，ce 吗？

nobugs · 2021 年11 月 4 日 02:37

我理解的是应该存在重复的子字符串吧，这道题我读起来也不是很清晰

929684526_7390 · 2021 年11 月 4 日 03:46

def find_repeat(self, word:str):
    '''
    给定一个非空字符串words，只包含纯小写字母。请编写一个函数，找出其中最短的一个子字符串，以及它重复的最大次数。

    示例：
    输入： ababab，输出： ("ab", 3)。
    输入：abcde，输出：("abcde", 1)。
    :return:
    '''
    repeat_list = []
    wordlen = len(word)
    for i in range(0, wordlen):
        for j in range(i+1, wordlen):
            for k in range(2,  wordlen - j + 1):
                if i + k - 1 >= j:
                    break

                if word[j: j+k] == word[i: i+k]:
                    repeat_list.append(word[i: i + k])
                    continue
                else:
                    continue

        pass
    repeat_set = set(repeat_list)
    repeat_dict = {}
    result = tuple((word, 1))
    max_num = 1
    for data in repeat_set:
        count = 1
        for data2 in repeat_list:
            if data == data2:
                count +=1
        repeat_dict[data] = count
        if count > max_num:
            result = (data, count)
    print(repeat_list)
    print(repeat_dict)
    return result

PrinceMercy · 2021 年11 月 4 日 09:17

def find_repeat(words: str) → tuple:

words_length = len(words)

temp_word = ""

min_length = words_length

min_word = words

max_times = 1

temp_times = 1

for i in range(1,words_length+1):

    for j in range(0,words_length-i+1):

        """"子字符串长度大于1时，需要再多一个循环"""

        if i > 1:

            for k in range(j,words_length+i-1,i):

                if temp_word == words[k:i+k]:

                    temp_length = i

                    temp_times += 1

                    if temp_length <= min_length:

                        if temp_times > max_times:

                            min_length = temp_length

                            max_times = temp_times

                            min_word  = temp_word

                else:

                    temp_word = words[k:i+k]

                    temp_times = 1

            """"子字符串长度小于1时"""

        else:

            if temp_word == words[j:i+j]:

                temp_length = i

                temp_times += 1

                if temp_length <= min_length:

                    if temp_times > max_times:

                        min_length = temp_length

                        max_times = temp_times

                        min_word  = temp_word

            else:

                temp_word = words[j:i+j]

                temp_times = 1

print(min_word,max_times)

return (min_word,max_times)

assert find_repeat(“ababab”) == (“ab”, 3)

assert find_repeat(“abcde”) == (“abcde”, 1)

assert find_repeat(“1baba1”) == (“ba”, 2)

assert find_repeat(“11abab1”) == (“1”, 2)

lekaixin · 2021 年11 月 18 日 09:34

根据断言觉得可能是如果最大次数的字符串有多个，那么就取最长的字符串，我是这么理解的

def find_repeat(words: str) -> tuple:
    dic1={}
    list2=[]
    for i in range(len(words)):
        for j in range(i+2,len(words)+1):
            dic1[words[i:j]]=words.count(words[i:j])
    list1=sorted(dic1.items(),key=lambda xy:xy[1],reverse=True)
    max_num=max(list1,key=lambda x:x[1])[1]
    for j in list1:
        if j[1]==max_num:
            list2.append(j)
    if len(list2)>1:
        return max(list2,key=lambda x:len(x[0]))
    else:
        return list2[0]
assert find_repeat("ababab") == ("ab", 3)
assert find_repeat("abcde") == ("abcde", 1)

wangke · 2022 年1 月 5 日 09:48

981603575_2605 · 2022 年1 月 7 日 01:40

def find_repeat(words: str) -> tuple:
    m = 0
    ord_num = 97
    str_list = []
    return_str = ''
    while m < len(list(words)):
        str = ''
        n = 0
        for n in range(n, len(list(words))):
            if ord(words[n]) == ord_num + n:
                str += words[n]
                n += 1
            else:
                break
        if str != '':
            str_list.append(str)
        m += 1
    if len(str_list) == 1:
        return_str = str_list[0]
    else:
        return_str = str_list[0]
        for i in range(1, len(str_list)):
            if len(str_list[i]) < len(list(return_str)):
                return_str = str_list[i]
    return (return_str, words.count(return_str))



assert find_repeat("ababab") == ("ab", 3)
assert find_repeat("abcde") == ("abcde", 1)

Huis · 2022 年1 月 17 日 02:42

这道题感觉理解起来有些歧义，不太懂到底子串是要怎么计算；
比如abcabcbc的结果到底应该是啥？是a,2，还是bc,4


def find_repeat(words: str) -> tuple:
    result = {}
    substr = ""
    for w in words:
        if substr == "": substr = w
        else:
            last_count = words.count(substr)
            count = words.count(substr+w)
            if count >= last_count:
                substr += w
            else:
                result[substr] = last_count
                if len(substr)*last_count == len(words): break
                substr = w
    result[substr] = words.count(substr)
    ss,count = "",0
    for key in result:
        if ss == "" or len(key)<len(ss):
            ss = key
            count = result[key]
    return (ss,count)


assert find_repeat("ababab") == ("ab", 3)
assert find_repeat("abcde") == ("abcde", 1)

assert find_repeat("a") == ("a", 1)
assert find_repeat("ab") == ("ab", 1)
assert find_repeat("abcabcbc") == ("bc", 3)
assert find_repeat("bcabcabcbc") == ("bc",4)
assert find_repeat("adeadebcbc") == ("bc", 2)
assert find_repeat("bcbcadeade") == ("bc", 2)