Tcl Source Code

Artifact [7dcce7034b]
Login

Artifact 7dcce7034b2b217b114d54bf0aba9a06cee9a85b:

Attachment "grouping_lsort.patch" to ticket [2082681fff] added by kieranelby 2008-09-01 22:38:11.
? grouping_lsort.patch
Index: doc/lsort.n
===================================================================
RCS file: /cvsroot/tcl/tcl/doc/lsort.n,v
retrieving revision 1.30
diff --unified=3 -r1.30 lsort.n
--- doc/lsort.n	29 Jun 2008 22:28:24 -0000	1.30
+++ doc/lsort.n	1 Sep 2008 15:35:55 -0000
@@ -81,11 +81,11 @@
 \fB\-index\0\fIindexList\fR
 .
 If this option is specified, each of the elements of \fIlist\fR must
-itself be a proper Tcl sublist.  Instead of sorting based on whole
-sublists, \fBlsort\fR will extract the \fIindexList\fR'th element from
-each sublist
-(as if the overall element and the \fIindexList\fR were passed to
-\fBlindex\fR) and sort based on the given element.  
+itself be a proper Tcl sublist (unless \fB-group\fR is used).
+Instead of sorting based on whole sublists, \fBlsort\fR will extract
+the \fIindexList\fR'th element from each sublist (as if the overall
+element and the \fIindexList\fR were passed to \fBlindex\fR) and sort
+based on the given element.
 For example,
 .RS
 .CS
@@ -115,6 +115,30 @@
 to achieve the same effect.
 .RE
 .TP 20
+\fB\-group\0\fIgrpSize\fR
+.
+If this option is specified, the list is treated as consisting of
+groups of \fIgrpSize\fR elements and the groups are sorted by
+either their first element or, if the \fB-index\fR option is used,
+by the element within each group given by the first index passed to
+\fB-index\fR (which is then ignored by \fB-index\fR). Elements
+always remain in the same position within their group.
+.
+The list length must be an integer multiple of \fIgrpSize\fR, which
+in turn must be at least 2.
+.
+For example,
+.RS
+.CS
+lsort -group 2 {carrot 10 apple 50 banana 25}
+.CE
+returns {apple 50 banana 25 carrot 10}, and
+.CS
+lsort -group 2 -index 1 -integer {carrot 10 apple 50 banana 25}
+.CE
+returns {carrot 10 banana 25 apple 50}.
+.RE
+.TP 20
 \fB\-nocase\fR
 .
 Causes comparisons to be handled in a case-insensitive manner.  Has no
@@ -180,6 +204,14 @@
 {e 1} {d 2} { c 3} {b 4} {a 5}
 .CE
 .PP
+Sorting using grouping and multiple indices:
+.CS
+% # Note the first index value is relative to the group
+% \fBlsort\fR -group 3 -index {0 1} \e
+     {{Bob Smith} 25 Audi {Jane Doe} 40 Ford}
+{{Jane Doe} 40 Ford {Bob Smith} 25 Audi}
+.CE
+.PP
 Stripping duplicate values using sorting:
 .CS
 % \fBlsort\fR -unique {a b c a b c a b c}
Index: generic/tclCmdIL.c
===================================================================
RCS file: /cvsroot/tcl/tcl/generic/tclCmdIL.c,v
retrieving revision 1.151
diff --unified=3 -r1.151 tclCmdIL.c
--- generic/tclCmdIL.c	21 Aug 2008 23:57:43 -0000	1.151
+++ generic/tclCmdIL.c	1 Sep 2008 15:35:56 -0000
@@ -3481,18 +3481,20 @@
     Tcl_Obj *const objv[])	/* Argument values. */
 {
     int i, j, index, unique, indices, length, nocase = 0, sortMode, indexc;
+    int group, groupsize, groupoffset, idx, *new_indexv;
     Tcl_Obj *resultPtr, *cmdPtr, **listObjPtrs, *listObj, *indexPtr;
     SortElement *elementArray, *elementPtr;
     SortInfo sortInfo;		/* Information about this sort that needs to
 				 * be passed to the comparison function. */
     static const char *switches[] = {
 	"-ascii", "-command", "-decreasing", "-dictionary", "-increasing",
-	"-index", "-indices", "-integer", "-nocase", "-real", "-unique", NULL
+	"-index", "-indices", "-integer", "-nocase", "-real", "-unique",
+	"-group", NULL
     };
     enum Lsort_Switches {
 	LSORT_ASCII, LSORT_COMMAND, LSORT_DECREASING, LSORT_DICTIONARY,
 	LSORT_INCREASING, LSORT_INDEX, LSORT_INDICES, LSORT_INTEGER,
-	LSORT_NOCASE, LSORT_REAL, LSORT_UNIQUE
+	LSORT_NOCASE, LSORT_REAL, LSORT_UNIQUE, LSORT_GROUP
     };
 
     /*
@@ -3521,6 +3523,9 @@
     cmdPtr = NULL;
     unique = 0;
     indices = 0;
+    group = 0;
+    groupsize = 1;
+    groupoffset = 0;
     for (i = 1; i < objc-1; i++) {
 	if (Tcl_GetIndexFromObj(interp, objv[i], switches, "option", 0,
 		&index) != TCL_OK) {
@@ -3621,6 +3626,33 @@
 	case LSORT_INDICES:
 	    indices = 1;
 	    break;
+	case LSORT_GROUP:
+		if (i == (objc-2)) {
+			if (sortInfo.indexc > 1) {
+				ckfree((char *) sortInfo.indexv);
+			}
+			Tcl_AppendResult(interp,
+				"\"-group\" option must be followed "
+				"by group size", NULL);
+			return TCL_ERROR;
+		}
+		if (Tcl_GetIntFromObj(interp, objv[i+1], &groupsize) != TCL_OK) {
+			if (sortInfo.indexc > 1) {
+				ckfree((char *) sortInfo.indexv);
+			}
+			Tcl_AppendResult(interp, " to \"-group\" option", NULL);
+			return TCL_ERROR;
+		}
+		if (groupsize < 2) {
+			if (sortInfo.indexc > 1) {
+				ckfree((char *) sortInfo.indexv);
+			}
+			Tcl_AppendResult(interp, "group size must be at least 2", NULL);
+			return TCL_ERROR;
+		}
+		group = 1;
+		i++;
+		break;
 	}
     }
     if (nocase && (sortInfo.sortMode == SORTMODE_ASCII)) {
@@ -3675,6 +3707,46 @@
     if (sortInfo.resultCode != TCL_OK || length <= 0) {
 	goto done;
     }
+
+	if (group) {
+		if (length % groupsize) {
+			Tcl_AppendResult(interp,
+			  "list size must be a multiple of the group size", NULL);
+			sortInfo.resultCode = TCL_ERROR;
+			goto done;
+		}
+		length = length / groupsize;
+		if (sortInfo.indexc > 0) {
+			/*
+			* Use the first value in the list supplied to -index as the offset
+			* of the element within each group by which to sort.
+			*/
+			groupoffset = sortInfo.indexv[0];
+			if (groupoffset <= SORTIDX_END) {
+				groupoffset = (groupoffset - SORTIDX_END) + groupsize - 1;
+			}
+			if (groupoffset < 0 || groupoffset >= groupsize) {
+				Tcl_AppendResult(interp,
+				  "when used with \"-group\", the leading \"-index\" value ",
+				  " must be within the group", NULL);
+				sortInfo.resultCode = TCL_ERROR;
+				goto done;
+			}
+			if (sortInfo.indexc == 1) {
+				sortInfo.indexc = 0;
+				sortInfo.indexv = NULL;
+			} else {
+				sortInfo.indexc--;
+				new_indexv = (int *) ckalloc(sizeof(int) * sortInfo.indexc);
+				for (i = 0; i < sortInfo.indexc; i++) {
+					new_indexv[i] = sortInfo.indexv[i+1];
+				}
+				ckfree((char *) sortInfo.indexv);
+				sortInfo.indexv = new_indexv;
+			}
+		}
+	}
+
     sortInfo.numElements = length;
 
     indexc = sortInfo.indexc;
@@ -3706,16 +3778,17 @@
     elementArray = (SortElement *) ckalloc( length * sizeof(SortElement));
 
     for (i=0; i < length; i++){
+	idx = groupsize * i + groupoffset;
 	if (indexc) {
 	    /*
 	     * If this is an indexed sort, retrieve the corresponding element
 	     */
-	    indexPtr = SelectObjFromSublist(listObjPtrs[i], &sortInfo);
+	    indexPtr = SelectObjFromSublist(listObjPtrs[idx], &sortInfo);
 	    if (sortInfo.resultCode != TCL_OK) {
 		goto done1;
 	    }
 	} else {
-	    indexPtr = listObjPtrs[i];
+	    indexPtr = listObjPtrs[idx];
 	}
 
 	/*
@@ -3747,7 +3820,11 @@
 	 * the objPtr itself, or its index in the original list.
 	 */
 
-	elementArray[i].objPtr = (indices ? INT2PTR(i) : listObjPtrs[i]);
+	if (indices || group) {
+		elementArray[i].objPtr = INT2PTR(idx);
+	} else {
+		elementArray[i].objPtr = listObjPtrs[idx];
+	}
 
 	/*
 	 * Merge this element in the pre-existing sublists (and merge together
@@ -3785,10 +3862,25 @@
 	Tcl_Obj **newArray, *objPtr;
 	int i;
 
-	resultPtr = Tcl_NewListObj(sortInfo.numElements, NULL);
+	resultPtr = Tcl_NewListObj(sortInfo.numElements * groupsize, NULL);
 	listRepPtr = (List *) resultPtr->internalRep.twoPtrValue.ptr1;
 	newArray = &listRepPtr->elements;
-	if (indices) {
+	if (group) {
+		for (i = 0; elementPtr != NULL ; elementPtr = elementPtr->nextPtr) {
+			idx = PTR2INT(elementPtr->objPtr);
+			for (j = 0; j < groupsize; j++) {
+				if (indices) {
+					objPtr = Tcl_NewIntObj(idx + j - groupoffset);
+					newArray[i++] = objPtr;
+					Tcl_IncrRefCount(objPtr);
+				} else {
+					objPtr = listObjPtrs[idx + j - groupoffset];
+					newArray[i++] = objPtr;
+					Tcl_IncrRefCount(objPtr);
+				}
+			}
+		}
+	} else if (indices) {
 	    for (i = 0; elementPtr != NULL ; elementPtr = elementPtr->nextPtr){
 		objPtr = Tcl_NewIntObj(PTR2INT(elementPtr->objPtr));
 		newArray[i++] = objPtr;