strStr
http://www.lintcode.com/en/problem/strstr/
Discussion
比较简单。两个长度都为0的情况容易漏掉。
先来个O(nm) rumtime, 是构造substring,然后以substring为单位进行比较,虽然很容易理解coding也更简单,但是要构造source string需要额外的空间,每次取substring也要消耗额外的时间。
int strStr(const char *source, const char *target) {
// write your code here
if(source == nullptr || target == nullptr) return -1;
string src(source);
int n = src.length();
int m = strlen(target);
if(m == 0) return 0;
for(int i=0; i < n-m+1; i++) {
if(src.substr(i, m) == target) {
return i;
}
}
return -1;
}
//use strncmp function for cstring to avoid using substr
int strStr(const char *source, const char *target) {
// write your code here
if(source == nullptr || target == nullptr) return -1;
//string src(source);
int n = strlen(source);
int m = strlen(target);
if(m == 0) return 0;
for(int i=0; i < n-m+1; i++) {
if(strncmp(source+i, target, m) == 0) {
return i;
}
}
return -1;
}
再写一个O(nm)的暴力搜索算法,一个字符一个字符的比,并在次基础上扩展到KMP算法。
int strStr(const char *source, const char *target) {
// write your code here
if(source == nullptr || target == nullptr) {
return -1;
}
int n = strlen(source);
int m = strlen(target);
if(m == 0) return 0;
int i = 0;
int j = 0;
while(i<n) {
if(source[i] == target[j]) {
i++;
j++;
} else {
i = i-j+1;//move back j steps (where it starts), and move forward 1 step
j = 0; //start from the begining of target string
}
if(j == m) return i-j;
}
return -1;
}
Solution 2 KMP Algotithm
KMP 算法参考:http://wiki.jikexueyuan.com/project/kmp-algorithm/define.html
讲的非常非常详细,关键是如何利用之前搜索信息,移动target。
//O(n) run time
//O(m) space
class Solution {
public:
/**
* Returns a index to the first occurrence of target in source,
* or -1 if target is not part of source.
* @param source string to be scanned.
* @param target string containing the sequence of characters to match.
*/
int strStr(const char *source, const char *target) {
// write your code here
if(source == nullptr || target == nullptr) {
return -1;
}
int n = strlen(source);
int m = strlen(target);
if(m == 0) return 0;
int i = 0;
int j = 0;
vector<int> next = GetNext(target);
while(i<n) {
if(j == -1 || source[i] == target[j]) {
i++;//跟上面暴力搜索不同之处在于多了一个j == -1的判断
j++;
} else {
//i = i-j+1;//move back j steps (where it starts), and move forward 1 step
//j = 0; //start from the begining of target string
j = next[j];
}
if(j == m) return i-j;
}
return -1;
}
//output: next[m]
//next[i]: the max length of common subfix and prefix in target[0~i-1]
//next[i] = k means: target[0], target[1]... target[k-1] == target[i-k],
//... target[i-2], target[i-1]
vector<int> GetNext(const char *target) {
int m = strlen(target);
vector<int> next(m, -1); //next[0] = -1, means begining of target
int i = 0;
int j = -1; //starting postion, no common subfix and prefix
while(i < m-1) {
if(j==-1 || target[i] == target[j]) {//target[j]: prefix, target[i]: subfix,若匹配,则最大common子串加一
next[i+1] = j+1;
i++;
j++;
} else {
j = next[j];//若不匹配,则继续看这个最大子串的最大子串,也就是从下一个next[j]开始找小一点的common subfix and prefix
}
}
return next;
}
};