Attachment "grouping_lsort.patch" to
ticket [2082681fff]
added by
kieranelby
2008-09-01 22:38:11.
? grouping_lsort.patch
Index: doc/lsort.n
===================================================================
RCS file: /cvsroot/tcl/tcl/doc/lsort.n,v
retrieving revision 1.30
diff --unified=3 -r1.30 lsort.n
--- doc/lsort.n 29 Jun 2008 22:28:24 -0000 1.30
+++ doc/lsort.n 1 Sep 2008 15:35:55 -0000
@@ -81,11 +81,11 @@
\fB\-index\0\fIindexList\fR
.
If this option is specified, each of the elements of \fIlist\fR must
-itself be a proper Tcl sublist. Instead of sorting based on whole
-sublists, \fBlsort\fR will extract the \fIindexList\fR'th element from
-each sublist
-(as if the overall element and the \fIindexList\fR were passed to
-\fBlindex\fR) and sort based on the given element.
+itself be a proper Tcl sublist (unless \fB-group\fR is used).
+Instead of sorting based on whole sublists, \fBlsort\fR will extract
+the \fIindexList\fR'th element from each sublist (as if the overall
+element and the \fIindexList\fR were passed to \fBlindex\fR) and sort
+based on the given element.
For example,
.RS
.CS
@@ -115,6 +115,30 @@
to achieve the same effect.
.RE
.TP 20
+\fB\-group\0\fIgrpSize\fR
+.
+If this option is specified, the list is treated as consisting of
+groups of \fIgrpSize\fR elements and the groups are sorted by
+either their first element or, if the \fB-index\fR option is used,
+by the element within each group given by the first index passed to
+\fB-index\fR (which is then ignored by \fB-index\fR). Elements
+always remain in the same position within their group.
+.
+The list length must be an integer multiple of \fIgrpSize\fR, which
+in turn must be at least 2.
+.
+For example,
+.RS
+.CS
+lsort -group 2 {carrot 10 apple 50 banana 25}
+.CE
+returns {apple 50 banana 25 carrot 10}, and
+.CS
+lsort -group 2 -index 1 -integer {carrot 10 apple 50 banana 25}
+.CE
+returns {carrot 10 banana 25 apple 50}.
+.RE
+.TP 20
\fB\-nocase\fR
.
Causes comparisons to be handled in a case-insensitive manner. Has no
@@ -180,6 +204,14 @@
{e 1} {d 2} { c 3} {b 4} {a 5}
.CE
.PP
+Sorting using grouping and multiple indices:
+.CS
+% # Note the first index value is relative to the group
+% \fBlsort\fR -group 3 -index {0 1} \e
+ {{Bob Smith} 25 Audi {Jane Doe} 40 Ford}
+{{Jane Doe} 40 Ford {Bob Smith} 25 Audi}
+.CE
+.PP
Stripping duplicate values using sorting:
.CS
% \fBlsort\fR -unique {a b c a b c a b c}
Index: generic/tclCmdIL.c
===================================================================
RCS file: /cvsroot/tcl/tcl/generic/tclCmdIL.c,v
retrieving revision 1.151
diff --unified=3 -r1.151 tclCmdIL.c
--- generic/tclCmdIL.c 21 Aug 2008 23:57:43 -0000 1.151
+++ generic/tclCmdIL.c 1 Sep 2008 15:35:56 -0000
@@ -3481,18 +3481,20 @@
Tcl_Obj *const objv[]) /* Argument values. */
{
int i, j, index, unique, indices, length, nocase = 0, sortMode, indexc;
+ int group, groupsize, groupoffset, idx, *new_indexv;
Tcl_Obj *resultPtr, *cmdPtr, **listObjPtrs, *listObj, *indexPtr;
SortElement *elementArray, *elementPtr;
SortInfo sortInfo; /* Information about this sort that needs to
* be passed to the comparison function. */
static const char *switches[] = {
"-ascii", "-command", "-decreasing", "-dictionary", "-increasing",
- "-index", "-indices", "-integer", "-nocase", "-real", "-unique", NULL
+ "-index", "-indices", "-integer", "-nocase", "-real", "-unique",
+ "-group", NULL
};
enum Lsort_Switches {
LSORT_ASCII, LSORT_COMMAND, LSORT_DECREASING, LSORT_DICTIONARY,
LSORT_INCREASING, LSORT_INDEX, LSORT_INDICES, LSORT_INTEGER,
- LSORT_NOCASE, LSORT_REAL, LSORT_UNIQUE
+ LSORT_NOCASE, LSORT_REAL, LSORT_UNIQUE, LSORT_GROUP
};
/*
@@ -3521,6 +3523,9 @@
cmdPtr = NULL;
unique = 0;
indices = 0;
+ group = 0;
+ groupsize = 1;
+ groupoffset = 0;
for (i = 1; i < objc-1; i++) {
if (Tcl_GetIndexFromObj(interp, objv[i], switches, "option", 0,
&index) != TCL_OK) {
@@ -3621,6 +3626,33 @@
case LSORT_INDICES:
indices = 1;
break;
+ case LSORT_GROUP:
+ if (i == (objc-2)) {
+ if (sortInfo.indexc > 1) {
+ ckfree((char *) sortInfo.indexv);
+ }
+ Tcl_AppendResult(interp,
+ "\"-group\" option must be followed "
+ "by group size", NULL);
+ return TCL_ERROR;
+ }
+ if (Tcl_GetIntFromObj(interp, objv[i+1], &groupsize) != TCL_OK) {
+ if (sortInfo.indexc > 1) {
+ ckfree((char *) sortInfo.indexv);
+ }
+ Tcl_AppendResult(interp, " to \"-group\" option", NULL);
+ return TCL_ERROR;
+ }
+ if (groupsize < 2) {
+ if (sortInfo.indexc > 1) {
+ ckfree((char *) sortInfo.indexv);
+ }
+ Tcl_AppendResult(interp, "group size must be at least 2", NULL);
+ return TCL_ERROR;
+ }
+ group = 1;
+ i++;
+ break;
}
}
if (nocase && (sortInfo.sortMode == SORTMODE_ASCII)) {
@@ -3675,6 +3707,46 @@
if (sortInfo.resultCode != TCL_OK || length <= 0) {
goto done;
}
+
+ if (group) {
+ if (length % groupsize) {
+ Tcl_AppendResult(interp,
+ "list size must be a multiple of the group size", NULL);
+ sortInfo.resultCode = TCL_ERROR;
+ goto done;
+ }
+ length = length / groupsize;
+ if (sortInfo.indexc > 0) {
+ /*
+ * Use the first value in the list supplied to -index as the offset
+ * of the element within each group by which to sort.
+ */
+ groupoffset = sortInfo.indexv[0];
+ if (groupoffset <= SORTIDX_END) {
+ groupoffset = (groupoffset - SORTIDX_END) + groupsize - 1;
+ }
+ if (groupoffset < 0 || groupoffset >= groupsize) {
+ Tcl_AppendResult(interp,
+ "when used with \"-group\", the leading \"-index\" value ",
+ " must be within the group", NULL);
+ sortInfo.resultCode = TCL_ERROR;
+ goto done;
+ }
+ if (sortInfo.indexc == 1) {
+ sortInfo.indexc = 0;
+ sortInfo.indexv = NULL;
+ } else {
+ sortInfo.indexc--;
+ new_indexv = (int *) ckalloc(sizeof(int) * sortInfo.indexc);
+ for (i = 0; i < sortInfo.indexc; i++) {
+ new_indexv[i] = sortInfo.indexv[i+1];
+ }
+ ckfree((char *) sortInfo.indexv);
+ sortInfo.indexv = new_indexv;
+ }
+ }
+ }
+
sortInfo.numElements = length;
indexc = sortInfo.indexc;
@@ -3706,16 +3778,17 @@
elementArray = (SortElement *) ckalloc( length * sizeof(SortElement));
for (i=0; i < length; i++){
+ idx = groupsize * i + groupoffset;
if (indexc) {
/*
* If this is an indexed sort, retrieve the corresponding element
*/
- indexPtr = SelectObjFromSublist(listObjPtrs[i], &sortInfo);
+ indexPtr = SelectObjFromSublist(listObjPtrs[idx], &sortInfo);
if (sortInfo.resultCode != TCL_OK) {
goto done1;
}
} else {
- indexPtr = listObjPtrs[i];
+ indexPtr = listObjPtrs[idx];
}
/*
@@ -3747,7 +3820,11 @@
* the objPtr itself, or its index in the original list.
*/
- elementArray[i].objPtr = (indices ? INT2PTR(i) : listObjPtrs[i]);
+ if (indices || group) {
+ elementArray[i].objPtr = INT2PTR(idx);
+ } else {
+ elementArray[i].objPtr = listObjPtrs[idx];
+ }
/*
* Merge this element in the pre-existing sublists (and merge together
@@ -3785,10 +3862,25 @@
Tcl_Obj **newArray, *objPtr;
int i;
- resultPtr = Tcl_NewListObj(sortInfo.numElements, NULL);
+ resultPtr = Tcl_NewListObj(sortInfo.numElements * groupsize, NULL);
listRepPtr = (List *) resultPtr->internalRep.twoPtrValue.ptr1;
newArray = &listRepPtr->elements;
- if (indices) {
+ if (group) {
+ for (i = 0; elementPtr != NULL ; elementPtr = elementPtr->nextPtr) {
+ idx = PTR2INT(elementPtr->objPtr);
+ for (j = 0; j < groupsize; j++) {
+ if (indices) {
+ objPtr = Tcl_NewIntObj(idx + j - groupoffset);
+ newArray[i++] = objPtr;
+ Tcl_IncrRefCount(objPtr);
+ } else {
+ objPtr = listObjPtrs[idx + j - groupoffset];
+ newArray[i++] = objPtr;
+ Tcl_IncrRefCount(objPtr);
+ }
+ }
+ }
+ } else if (indices) {
for (i = 0; elementPtr != NULL ; elementPtr = elementPtr->nextPtr){
objPtr = Tcl_NewIntObj(PTR2INT(elementPtr->objPtr));
newArray[i++] = objPtr;