本文共 4400 字,大约阅读时间需要 14 分钟。
问题描述:
Given two sequences. X = (x1 , x2 , …, xm), Y = (y1 , y2 , …, yn), find a maximum length common subsequence (LCS) of X and Y.
E.g.: X = (A, B, C, B, D, A, B), Y = (B, D, C, A, B, A), (B, C, B, A) and (B, D, A, B) are longest common subsequences of X and Y (length = 4).题目联想:
突然就想起了大二上学期数据结构里的算法 —— KMP模式匹配算法?,现在忘了…… 在这里码住记得复习。。
最长公共子串和最长公共子序列的区别:
子串要求在原字符串中是连续的,而子序列则只需保持相对顺序一致,并不要求连续。
❗算法描述❗:
回到动态规划上来,依然按照动态规划的四步走策略:
① 定义子问题:
x[0 …… i],y[0 …… j]
② 是否满足最优子结构:
嗯~ o(* ̄▽ ̄*)o满足?
③ 确定子问题的求解顺序:
很明显咯,自底向上bottom - up!
|~ 0, i = 0 or j = 0; c[i, j] = |~ c[i-1, j-1] + 1, x[i] == x[j]; |~ max(c[i, j-1], c[i-1, j]), x[i] != x[j].
④ 回溯最优解:
代码实现:
#include#include using namespace std;void printLCS(vector > b, vector x, int i, int j);int main(){ vector x; cout << "请输入X序列:" << endl; while (1) { char temp; cin >> temp; x.push_back(temp); if (getchar() == '\n') { break; } } vector y; cout << "请输入Y序列:" << endl; while (1) { char temp; cin >> temp; y.push_back(temp); if (getchar() == '\n') { break; } } int len1 = int(x.size()); int len2 = int(y.size()); vector > c(len1 + 1, vector (len2 + 1, 0)); vector > b(len1 + 1, vector (len2 + 1, -2)); for (int i = 1; i < len1 + 1; ++i) //因为二维数组的第 0 行和第 0 列 作为 base case 没有使用 { for (int j = 1; j < len2 + 1; ++j) { if (x[i - 1] == y[j - 1]) //所以字符串的第 i-1 个对应于 二维数组的 第 i 行 { c[i][j] = c[i - 1][j - 1] + 1; b[i][j] = 0; } else { //c[i][j] = max(c[i][j - 1], c[i - 1][j]); if (c[i - 1][j] >= c[i][j - 1]) { c[i][j] = c[i - 1][j]; b[i][j] = 1; } else { c[i][j] = c[i][j - 1]; b[i][j] = -1; } } } } cout << "最长公共子序列的长度:" << endl; cout << c[len1][len2] << endl; cout << "最长公共子序列:" << endl; printLCS(b, x, len1, len2); cout << endl; return 0;}void printLCS(vector > b, vector x, int i, int j){ if (i == 0 || j == 0) { return; } if (b[i][j] == 0) { printLCS(b, x, i - 1, j - 1); cout << x[i - 1] << ' '; } else if (b[i][j] == 1) { printLCS(b, x, i - 1, j); } else { printLCS(b, x, i, j - 1); }}
最后贴一个写的蛮全的博客叭:
2019/04/12次日更新:
发现其实上面从 c[1][1]开始遍历有点绕,弄得简单点,我们不要把 c[i][0] 、c[0][j] 就一直赋值为 0 了,稍微修改一下,如下(修改时注意数组下标范围的更改):
#include#include using namespace std;int max(int, int);int main(){ vector x; cout << "请输入X序列:" << endl; while (1) { char temp; cin >> temp; x.push_back(temp); if (getchar() == '\n') { break; } } vector y; cout << "请输入Y序列:" << endl; while (1) { char temp; cin >> temp; y.push_back(temp); if (getchar() == '\n') { break; } } int len1 = int(x.size()); int len2 = int(y.size()); vector > c(len1, vector (len2 , 0)); //c[i][0]、c[0][j]为base case,所以要先计算好它们的值 if (x[0] == y[0]) { c[0][0] = 1; } for (int i = 1; i < len1; ++i) { if (x[i] == y[0]) { c[i][0] = 1; } else { c[i][0] = c[i-1][0]; } } for (int j = 1; j < len2; ++j) { if (x[0] == y[j]) { c[0][j] = 1; } } for (int i = 1; i < len1; ++i) { for (int j = 1; j < len2; ++j) { if (x[i] == y[j]) { c[i][j] = c[i - 1][j - 1] + 1; } else { c[i][j] = max(c[i][j - 1], c[i - 1][j]); } } } cout << "最长公共子序列的长度:" << endl; cout << c[len1 - 1][len2 - 1] << endl; return 0;}int max(int a, int b){ return a > b ? a : b;}
and 优化:
如果只要求输出最长公共子序列的长度,我们可以再将上一算法进行优化,对记录最优解的数组 即 数组 c[] 使用滚动数组,只需要两行就 ok 啦!
具体的自己画下图就懂啦嘻嘻?
#include#include using namespace std;int max(int, int);int main(){ vector x; cout << "请输入X序列:" << endl; while (1) { char temp; cin >> temp; x.push_back(temp); if (getchar() == '\n') { break; } } vector y; cout << "请输入Y序列:" << endl; while (1) { char temp; cin >> temp; y.push_back(temp); if (getchar() == '\n') { break; } } int len1 = int(x.size()); int len2 = int(y.size()); vector > c(2, vector (len2 , 0)); //滚动数组,节约空间 for (int j = 0; j < len2; ++j) { if (x[0] == y[j]) { c[0][j] = 1; } } for (int i = 1; i < len1; ++i) { if (x[i] == y[0]) { c[0][0] = 1; } for (int j = 1; j < len2; ++j) { if (x[i] == y[j]) { c[1][j] = c[0][j - 1] + 1; c[0][j] = c[1][j]; } else { c[1][j] = max(c[0][j - 1], c[0][j]); c[0][j] = c[1][j]; } } } cout << "最长公共子序列的长度:" << endl; cout << c[0][len2 - 1] << endl; return 0;}int max(int a, int b){ return a > b ? a : b;}
转载地址:http://qttai.baihongyu.com/