Merge branch 'lcs' into unstable
This commit is contained in:
commit
121c51f4f3
26
src/db.c
26
src/db.c
@ -1554,6 +1554,32 @@ int *georadiusGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numk
|
||||
return keys;
|
||||
}
|
||||
|
||||
/* LCS ... [KEYS <key1> <key2>] ... */
|
||||
int *lcsGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numkeys)
|
||||
{
|
||||
int i;
|
||||
int *keys = getKeysTempBuffer;
|
||||
UNUSED(cmd);
|
||||
|
||||
/* We need to parse the options of the command in order to check for the
|
||||
* "KEYS" argument before the "STRINGS" argument. */
|
||||
for (i = 1; i < argc; i++) {
|
||||
char *arg = argv[i]->ptr;
|
||||
int moreargs = (argc-1) - i;
|
||||
|
||||
if (!strcasecmp(arg, "strings")) {
|
||||
break;
|
||||
} else if (!strcasecmp(arg, "keys") && moreargs >= 2) {
|
||||
keys[0] = i+1;
|
||||
keys[1] = i+2;
|
||||
*numkeys = 2;
|
||||
return keys;
|
||||
}
|
||||
}
|
||||
*numkeys = 0;
|
||||
return keys;
|
||||
}
|
||||
|
||||
/* Helper function to extract keys from memory command.
|
||||
* MEMORY USAGE <key> */
|
||||
int *memoryGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numkeys) {
|
||||
|
@ -1004,7 +1004,11 @@ struct redisCommand redisCommandTable[] = {
|
||||
|
||||
{"acl",aclCommand,-2,
|
||||
"admin no-script no-slowlog ok-loading ok-stale",
|
||||
0,NULL,0,0,0,0,0,0}
|
||||
0,NULL,0,0,0,0,0,0},
|
||||
|
||||
{"lcs",lcsCommand,-4,
|
||||
"write use-memory @string",
|
||||
0,lcsGetKeys,0,0,0,0,0,0}
|
||||
};
|
||||
|
||||
/*============================ Utility functions ============================ */
|
||||
|
@ -2102,6 +2102,7 @@ int *migrateGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numkey
|
||||
int *georadiusGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numkeys);
|
||||
int *xreadGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numkeys);
|
||||
int *memoryGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numkeys);
|
||||
int *lcsGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numkeys);
|
||||
|
||||
/* Cluster */
|
||||
void clusterInit(void);
|
||||
@ -2373,6 +2374,7 @@ void xdelCommand(client *c);
|
||||
void xtrimCommand(client *c);
|
||||
void lolwutCommand(client *c);
|
||||
void aclCommand(client *c);
|
||||
void lcsCommand(client *c);
|
||||
|
||||
#if defined(__GNUC__)
|
||||
void *calloc(size_t count, size_t size) __attribute__ ((deprecated));
|
||||
|
204
src/t_string.c
204
src/t_string.c
@ -479,3 +479,207 @@ void strlenCommand(client *c) {
|
||||
checkType(c,o,OBJ_STRING)) return;
|
||||
addReplyLongLong(c,stringObjectLen(o));
|
||||
}
|
||||
|
||||
/* LCS -- Longest common subsequence.
|
||||
*
|
||||
* LCS [IDX] [MINMATCHLEN <len>]
|
||||
* STRINGS <string> <string> | KEYS <keya> <keyb> */
|
||||
void lcsCommand(client *c) {
|
||||
uint32_t i, j;
|
||||
long long minmatchlen = 0;
|
||||
sds a = NULL, b = NULL;
|
||||
int getlen = 0, getidx = 0, withmatchlen = 0;
|
||||
robj *obja = NULL, *objb = NULL;
|
||||
|
||||
for (j = 1; j < (uint32_t)c->argc; j++) {
|
||||
char *opt = c->argv[j]->ptr;
|
||||
int moreargs = (c->argc-1) - j;
|
||||
|
||||
if (!strcasecmp(opt,"IDX")) {
|
||||
getidx = 1;
|
||||
} else if (!strcasecmp(opt,"LEN")) {
|
||||
getlen = 1;
|
||||
} else if (!strcasecmp(opt,"WITHMATCHLEN")) {
|
||||
withmatchlen = 1;
|
||||
} else if (!strcasecmp(opt,"MINMATCHLEN") && moreargs) {
|
||||
if (getLongLongFromObjectOrReply(c,c->argv[j+1],&minmatchlen,NULL)
|
||||
!= C_OK) return;
|
||||
if (minmatchlen < 0) minmatchlen = 0;
|
||||
j++;
|
||||
} else if (!strcasecmp(opt,"STRINGS")) {
|
||||
if (a != NULL) {
|
||||
addReplyError(c,"Either use STRINGS or KEYS");
|
||||
return;
|
||||
}
|
||||
a = c->argv[j+1]->ptr;
|
||||
b = c->argv[j+2]->ptr;
|
||||
j += 2;
|
||||
} else if (!strcasecmp(opt,"KEYS")) {
|
||||
if (a != NULL) {
|
||||
addReplyError(c,"Either use STRINGS or KEYS");
|
||||
return;
|
||||
}
|
||||
obja = lookupKeyRead(c->db,c->argv[j+1]);
|
||||
objb = lookupKeyRead(c->db,c->argv[j+2]);
|
||||
obja = obja ? getDecodedObject(obja) : createStringObject("",0);
|
||||
objb = objb ? getDecodedObject(objb) : createStringObject("",0);
|
||||
a = obja->ptr;
|
||||
b = objb->ptr;
|
||||
j += 2;
|
||||
} else {
|
||||
addReply(c,shared.syntaxerr);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/* Complain if the user passed ambiguous parameters. */
|
||||
if (a == NULL) {
|
||||
addReplyError(c,"Please specify two strings: "
|
||||
"STRINGS or KEYS options are mandatory");
|
||||
return;
|
||||
} else if (getlen && getidx) {
|
||||
addReplyError(c,
|
||||
"If you want both the length and indexes, please "
|
||||
"just use IDX.");
|
||||
return;
|
||||
}
|
||||
|
||||
/* Compute the LCS using the vanilla dynamic programming technique of
|
||||
* building a table of LCS(x,y) substrings. */
|
||||
uint32_t alen = sdslen(a);
|
||||
uint32_t blen = sdslen(b);
|
||||
|
||||
/* Setup an uint32_t array to store at LCS[i,j] the length of the
|
||||
* LCS A0..i-1, B0..j-1. Note that we have a linear array here, so
|
||||
* we index it as LCS[j+(blen+1)*j] */
|
||||
uint32_t *lcs = zmalloc((alen+1)*(blen+1)*sizeof(uint32_t));
|
||||
#define LCS(A,B) lcs[(B)+((A)*(blen+1))]
|
||||
|
||||
/* Start building the LCS table. */
|
||||
for (uint32_t i = 0; i <= alen; i++) {
|
||||
for (uint32_t j = 0; j <= blen; j++) {
|
||||
if (i == 0 || j == 0) {
|
||||
/* If one substring has length of zero, the
|
||||
* LCS length is zero. */
|
||||
LCS(i,j) = 0;
|
||||
} else if (a[i-1] == b[j-1]) {
|
||||
/* The len LCS (and the LCS itself) of two
|
||||
* sequences with the same final character, is the
|
||||
* LCS of the two sequences without the last char
|
||||
* plus that last char. */
|
||||
LCS(i,j) = LCS(i-1,j-1)+1;
|
||||
} else {
|
||||
/* If the last character is different, take the longest
|
||||
* between the LCS of the first string and the second
|
||||
* minus the last char, and the reverse. */
|
||||
uint32_t lcs1 = LCS(i-1,j);
|
||||
uint32_t lcs2 = LCS(i,j-1);
|
||||
LCS(i,j) = lcs1 > lcs2 ? lcs1 : lcs2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Store the actual LCS string in "result" if needed. We create
|
||||
* it backward, but the length is already known, we store it into idx. */
|
||||
uint32_t idx = LCS(alen,blen);
|
||||
sds result = NULL; /* Resulting LCS string. */
|
||||
void *arraylenptr = NULL; /* Deffered length of the array for IDX. */
|
||||
uint32_t arange_start = alen, /* alen signals that values are not set. */
|
||||
arange_end = 0,
|
||||
brange_start = 0,
|
||||
brange_end = 0;
|
||||
|
||||
/* Do we need to compute the actual LCS string? Allocate it in that case. */
|
||||
int computelcs = getidx || !getlen;
|
||||
if (computelcs) result = sdsnewlen(SDS_NOINIT,idx);
|
||||
|
||||
/* Start with a deferred array if we have to emit the ranges. */
|
||||
uint32_t arraylen = 0; /* Number of ranges emitted in the array. */
|
||||
if (getidx) {
|
||||
addReplyMapLen(c,2);
|
||||
addReplyBulkCString(c,"matches");
|
||||
arraylenptr = addReplyDeferredLen(c);
|
||||
}
|
||||
|
||||
i = alen, j = blen;
|
||||
while (computelcs && i > 0 && j > 0) {
|
||||
int emit_range = 0;
|
||||
if (a[i-1] == b[j-1]) {
|
||||
/* If there is a match, store the character and reduce
|
||||
* the indexes to look for a new match. */
|
||||
result[idx-1] = a[i-1];
|
||||
|
||||
/* Track the current range. */
|
||||
if (arange_start == alen) {
|
||||
arange_start = i-1;
|
||||
arange_end = i-1;
|
||||
brange_start = j-1;
|
||||
brange_end = j-1;
|
||||
} else {
|
||||
/* Let's see if we can extend the range backward since
|
||||
* it is contiguous. */
|
||||
if (arange_start == i && brange_start == j) {
|
||||
arange_start--;
|
||||
brange_start--;
|
||||
} else {
|
||||
emit_range = 1;
|
||||
}
|
||||
}
|
||||
/* Emit the range if we matched with the first byte of
|
||||
* one of the two strings. We'll exit the loop ASAP. */
|
||||
if (arange_start == 0 || brange_start == 0) emit_range = 1;
|
||||
idx--; i--; j--;
|
||||
} else {
|
||||
/* Otherwise reduce i and j depending on the largest
|
||||
* LCS between, to understand what direction we need to go. */
|
||||
uint32_t lcs1 = LCS(i-1,j);
|
||||
uint32_t lcs2 = LCS(i,j-1);
|
||||
if (lcs1 > lcs2)
|
||||
i--;
|
||||
else
|
||||
j--;
|
||||
if (arange_start != alen) emit_range = 1;
|
||||
}
|
||||
|
||||
/* Emit the current range if needed. */
|
||||
uint32_t match_len = arange_end - arange_start + 1;
|
||||
if (emit_range) {
|
||||
if (minmatchlen == 0 || match_len >= minmatchlen) {
|
||||
if (arraylenptr) {
|
||||
addReplyArrayLen(c,2+withmatchlen);
|
||||
addReplyArrayLen(c,2);
|
||||
addReplyLongLong(c,arange_start);
|
||||
addReplyLongLong(c,arange_end);
|
||||
addReplyArrayLen(c,2);
|
||||
addReplyLongLong(c,brange_start);
|
||||
addReplyLongLong(c,brange_end);
|
||||
if (withmatchlen) addReplyLongLong(c,match_len);
|
||||
arraylen++;
|
||||
}
|
||||
}
|
||||
arange_start = alen; /* Restart at the next match. */
|
||||
}
|
||||
}
|
||||
|
||||
/* Signal modified key, increment dirty, ... */
|
||||
|
||||
/* Reply depending on the given options. */
|
||||
if (arraylenptr) {
|
||||
addReplyBulkCString(c,"len");
|
||||
addReplyLongLong(c,LCS(alen,blen));
|
||||
setDeferredArrayLen(c,arraylenptr,arraylen);
|
||||
} else if (getlen) {
|
||||
addReplyLongLong(c,LCS(alen,blen));
|
||||
} else {
|
||||
addReplyBulkSds(c,result);
|
||||
result = NULL;
|
||||
}
|
||||
|
||||
/* Cleanup. */
|
||||
if (obja) decrRefCount(obja);
|
||||
if (objb) decrRefCount(objb);
|
||||
sdsfree(result);
|
||||
zfree(lcs);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -419,4 +419,34 @@ start_server {tags {"string"}} {
|
||||
r set foo bar
|
||||
r getrange foo 0 4294967297
|
||||
} {bar}
|
||||
|
||||
set rna1 {CACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGTGTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGTCGTTGACAGGACACGAGTAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTCGTCCGGGTGTG}
|
||||
set rna2 {ATTAAAGGTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGTGTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGTCGTTGACAGGACACGAGTAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTT}
|
||||
set rnalcs {ACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGTGTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGTCGTTGACAGGACACGAGTAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTT}
|
||||
|
||||
test {LCS string output with STRINGS option} {
|
||||
r LCS STRINGS $rna1 $rna2
|
||||
} $rnalcs
|
||||
|
||||
test {LCS len} {
|
||||
r LCS LEN STRINGS $rna1 $rna2
|
||||
} [string length $rnalcs]
|
||||
|
||||
test {LCS with KEYS option} {
|
||||
r set virus1 $rna1
|
||||
r set virus2 $rna2
|
||||
r LCS KEYS virus1 virus2
|
||||
} $rnalcs
|
||||
|
||||
test {LCS indexes} {
|
||||
dict get [r LCS IDX KEYS virus1 virus2] matches
|
||||
} {{{238 238} {239 239}} {{236 236} {238 238}} {{229 230} {236 237}} {{224 224} {235 235}} {{1 222} {13 234}}}
|
||||
|
||||
test {LCS indexes with match len} {
|
||||
dict get [r LCS IDX KEYS virus1 virus2 WITHMATCHLEN] matches
|
||||
} {{{238 238} {239 239} 1} {{236 236} {238 238} 1} {{229 230} {236 237} 2} {{224 224} {235 235} 1} {{1 222} {13 234} 222}}
|
||||
|
||||
test {LCS indexes with match len and minimum match len} {
|
||||
dict get [r LCS IDX KEYS virus1 virus2 WITHMATCHLEN MINMATCHLEN 5] matches
|
||||
} {{{1 222} {13 234} 222}}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user