Last Updated: 2023-12-22 08:56:34 Friday
-- TOC --
就是实现strstr接口...
Given two strings needle and haystack, return the index of the first occurrence of needle in haystack, or -1 if needle is not part of haystack.
Example 1:
Input: haystack = "sadbutsad", needle = "sad"
Output: 0
Explanation: "sad" occurs at index 0 and 6.
The first occurrence is at index 0, so we return 0.
Example 2:
Input: haystack = "leetcode", needle = "leeto"
Output: -1
Explanation: "leeto" did not occur in "leetcode", so we return -1.
Constraints:
- 1 <= haystack.length, needle.length <= 104
- haystack and needle consist of only lowercase English characters.
这就是C++ string对象的find
接口!Python也有find
,而C有strstr
!
这不是LeetCode第一次出题实现标准库函数,LeetCode第8题,实现简化版的atoi。
class Solution:
def strStr(self, haystack: str, needle: str) -> int:
return haystack.find(needle)
class Solution {
public:
int strStr(string haystack, string needle) noexcept{
return haystack.find(needle);
}
};
int strStr(char* haystack, char* needle) {
char *addr = strstr(haystack, needle);
return addr ? addr-haystack : -1;
}
KMP,Knuth-Morris-Pratt,这个著名的精妙的算法的精髓,就是通过needle构建DFA(确定有限状态自动机)。然后,把haystack放入这个DFA过一次,就得到结果了。假设needle长度为M,haystack长度为N,算法的时间复杂度为\(O(M+N)\)。
class Solution:
def strStr(self, haystack: str, needle: str) -> int:
if(size:=len(needle)) == 0:
return 0
# create DFA
dfa = [{} for _ in range(size)]
dfa[0][needle[0]] = 1
x = 0
for i in range(1,size):
for cond,stat in dfa[x].items():
dfa[i][cond] = stat
c = needle[i]
dfa[i][c] = i+1
x = dfa[x][c] if c in dfa[x] else 0
# search
j = 0
for i,c in enumerate(haystack,1):
if c in dfa[j]:
j = dfa[j][c]
if j == size:
return i-size
else:
j = 0
return -1
构建DFA的那几行代码非常精妙!理解x是关键,在for loop内,x从0号状态开始,i从1号状态开始,x跟随后面状态的变化条件而变化。x实际上是DFA中状态回退的位置,因为存在一些状态转移路径,不需要回到最初的0号状态。内嵌的for loop必须在前面,有可能后面构建状态正常推进的代码会覆盖前面某条从x得到的路径,这也是必须的。
class Solution {
public:
int strStr(string haystack, string needle){
size_t size { needle.size() };
if(size == 0)
return 0;
// create DFA
vector<unordered_map<char,int>> dfa;
dfa.resize(size);
dfa[0][needle[0]] = 1;
int x { 0 };
for(int i{1}; i<size; ++i){
for(auto [k,v]: dfa[x])
dfa[i][k] = v;
char c = needle[i];
dfa[i][c] = i + 1;
if(dfa[x].contains(c))
x = dfa[x][c];
else x = 0;
}
// search
int s { 0 };
for(int i{}; i<haystack.size(); ++i){
char c = haystack[i];
if(dfa[s].contains(c)){
s = dfa[s][c];
if(s == size)
return i-size+1;
}
else s = 0;
}
return -1;
}
};
#define CNUM 26 // count for lowercase English char
int strStr(char* haystack, char* needle) {
int size = strlen(needle);
if(size == 0)
return 0;
// create DFA
int (*dfa)[CNUM] = calloc(size,CNUM*sizeof(int));
dfa[0][needle[0]-97] = 1;
int x = 0;
for(int i=1; i<size; ++i){
for(int j=0; j<CNUM; ++j)
dfa[i][j] = dfa[x][j];
char c = needle[i]-97;
dfa[i][c] = i+1;
x = dfa[x][c];
}
// search
int s = 0;
for(int i=0; i<strlen(haystack); ++i){
char c = haystack[i]-97;
if(dfa[s][c] != 0){
s = dfa[s][c];
if(s == size){
free(dfa);
return i-size+1;
}
}else s = 0;
}
free(dfa);
return -1;
}
int (*dfa)[26]
,dfa是一个指向int[26]
类型的指针本文链接:https://cs.pynote.net/ag/leetcode/202311091/
-- EOF --
-- MORE --