Tcl Source Code

Artifact [1b1aa6dc2e]
Login

Artifact 1b1aa6dc2e6e08994b20dcf56a2e17ce787cfca7ac583420edaa1ddffe2f40ea:

Attachment "0007-tclHash-Fix-strict-aliasing-violations-breaking-CHER.patch" to ticket [37108037b9] added by jrtc27 2022-08-12 23:21:45. (unpublished)
From d9adfffdcabe5b37800abd97063f72e8818d890e Mon Sep 17 00:00:00 2001
From: Jessica Clarke <[email protected]>
Date: Fri, 12 Aug 2022 23:47:57 +0100
Subject: [PATCH 7/8] tclHash: Fix strict aliasing violations breaking CHERI
 support

This is needed to support CHERI, and thus Arm's experimental Morello
prototype, where pointers are implemented using unforgeable capabilities
that include bounds and permissions metadata to provide fine-grained
spatial and referential memory safety, as well as revocation by sweeping
memory to provide heap temporal memory safety.

The current tclHash implementation used int[] as the underlying storage
for arbitrary objects. However, this violates strict aliasing, as int[]
can only be used to store integers, and int * cannot alias non-integers.
On most architectures this ends up being harmless (though with LTO may
be less harmless as the translation unit boundary no longer exists to
hide this violation from the compiler), but on CHERI copying pointers as
an array of integers loses the set tag bit marking the capability as
valid/not forged (unless using a non-inlined memcpy) since the
capabilities used to represent those pointers must be copied as a single
unit. This strict aliasing violation is perhaps also why Tcl_GetHashKey
did not distinguish the string key case from the array key case, always
using the string member; using the words member would have made the
strict aliasing violation visible outside the translation unit and been
at risk of breaking due to compiler optimisations.

Fix this by using a char[] as the underlying storage instead, with
memcpy and memcmp used to implement copying and comparing respectively.
Since this is the same type as is already used for string we can unify
the two union members into a single bytes member. This also simplifies
the implementation.

Note that for API compatibility keyType continues to be in units of
sizeof(int) for that case rather than bytes.
---
 generic/tcl.h     |  7 ++-----
 generic/tclHash.c | 28 +++++++---------------------
 2 files changed, 9 insertions(+), 26 deletions(-)

diff --git a/generic/tcl.h b/generic/tcl.h
index 683df500e..87e03f0cc 100644
--- a/generic/tcl.h
+++ b/generic/tcl.h
@@ -1010,10 +1010,7 @@ struct Tcl_HashEntry {
     union {			/* Key has one of these forms: */
 	char *oneWordValue;	/* One-word value for key. */
 	Tcl_Obj *objPtr;	/* Tcl_Obj * key value. */
-	int words[1];		/* Multiple integer words for key. The actual
-				 * size will be as large as necessary for this
-				 * table's keys. */
-	char string[1];		/* String for key. The actual size will be as
+	char bytes[1];		/* Bytes for key. The actual size will be as
 				 * large as needed to hold the key. */
     } key;			/* MUST BE LAST FIELD IN RECORD!! */
 };
@@ -2443,7 +2440,7 @@ EXTERN const char *TclZipfs_AppHook(int *argc, char ***argv);
 	((void *) (((tablePtr)->keyType == TCL_ONE_WORD_KEYS || \
 		    (tablePtr)->keyType == TCL_CUSTOM_PTR_KEYS) \
 		   ? (h)->key.oneWordValue \
-		   : (h)->key.string))
+		   : (h)->key.bytes))
 
 /*
  * Macros to use for clients to use to invoke find and create functions for
diff --git a/generic/tclHash.c b/generic/tclHash.c
index 3ca269cc4..2f69d6085 100644
--- a/generic/tclHash.c
+++ b/generic/tclHash.c
@@ -659,21 +659,16 @@ AllocArrayEntry(
     Tcl_HashTable *tablePtr,	/* Hash table. */
     void *keyPtr)			/* Key to store in the hash table entry. */
 {
-    int *array = (int *) keyPtr;
-    int *iPtr1, *iPtr2;
     Tcl_HashEntry *hPtr;
-    int count = tablePtr->keyType;
-    TCL_HASH_TYPE size = sizeof(Tcl_HashEntry) + (count*sizeof(int)) - sizeof(hPtr->key);
+    int count = tablePtr->keyType * sizeof(int);
+    TCL_HASH_TYPE size = sizeof(Tcl_HashEntry) + count - sizeof(hPtr->key);
 
     if (size < sizeof(Tcl_HashEntry)) {
 	size = sizeof(Tcl_HashEntry);
     }
     hPtr = (Tcl_HashEntry *)Tcl_Alloc(size);
 
-    for (iPtr1 = array, iPtr2 = hPtr->key.words;
-	    count > 0; count--, iPtr1++, iPtr2++) {
-	*iPtr2 = *iPtr1;
-    }
+    memcpy(hPtr->key.bytes, keyPtr, count);
     Tcl_SetHashValue(hPtr, NULL);
 
     return hPtr;
@@ -701,20 +696,11 @@ CompareArrayKeys(
     void *keyPtr,			/* New key to compare. */
     Tcl_HashEntry *hPtr)	/* Existing key to compare. */
 {
-    const int *iPtr1 = (const int *)keyPtr;
-    const int *iPtr2 = hPtr->key.words;
     Tcl_HashTable *tablePtr = hPtr->tablePtr;
     int count;
 
-    for (count = tablePtr->keyType; ; count--, iPtr1++, iPtr2++) {
-	if (count == 0) {
-	    return 1;
-	}
-	if (*iPtr1 != *iPtr2) {
-	    break;
-	}
-    }
-    return 0;
+    count = tablePtr->keyType * sizeof(int);
+    return !memcmp((char *)keyPtr, hPtr->key.bytes, count);
 }
 
 /*
@@ -782,7 +768,7 @@ AllocStringEntry(
     }
     hPtr = (Tcl_HashEntry *)Tcl_Alloc(offsetof(Tcl_HashEntry, key) + allocsize);
     memset(hPtr, 0, sizeof(Tcl_HashEntry) + allocsize - sizeof(hPtr->key));
-    memcpy(hPtr->key.string, string, size);
+    memcpy(hPtr->key.bytes, string, size);
     Tcl_SetHashValue(hPtr, NULL);
     return hPtr;
 }
@@ -809,7 +795,7 @@ CompareStringKeys(
     void *keyPtr,			/* New key to compare. */
     Tcl_HashEntry *hPtr)	/* Existing key to compare. */
 {
-    return !strcmp((char *)keyPtr, hPtr->key.string);
+    return !strcmp((char *)keyPtr, hPtr->key.bytes);
 }
 
 /*
-- 
2.34.GIT