Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Work toward dgp's suggestion that the underlying engine handle both [string insert] and [string replace]. Ref: https://sourceforge.net/p/tcl/mailman/message/36002472/ |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | amg-string-insert |
Files: | files | file ages | folders |
SHA1: |
0cfeecb7408bf011b0da5670cb98a1b7 |
User & Date: | andy 2017-08-20 02:44:56 |
Context
2017-08-20
| ||
03:20 | Correct NULL dereference, and optimize short-circuit logical operation check-in: fa3278ec35 user: andy tags: amg-string-insert | |
02:44 | Work toward dgp's suggestion that the underlying engine handle both [string insert] and [string repl... check-in: 0cfeecb740 user: andy tags: amg-string-insert | |
2017-08-17
| ||
18:32 | Correct bug in getting second Unicode character sequence check-in: 87dadea232 user: andy tags: amg-string-insert | |
Changes
Changes to generic/tclCmdMZ.c.
︙ | ︙ | |||
1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 | static int StringInsertCmd( ClientData dummy, /* Not used */ Tcl_Interp *interp, /* Current interpreter */ int objc, /* Number of arguments */ Tcl_Obj *const objv[]) /* Argument objects */ { Tcl_Obj *outObj; /* Output object */ if (objc != 4) { Tcl_WrongNumArgs(interp, 1, objv, "string index insertString"); return TCL_ERROR; } | > > > > > > > | | 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 | static int StringInsertCmd( ClientData dummy, /* Not used */ Tcl_Interp *interp, /* Current interpreter */ int objc, /* Number of arguments */ Tcl_Obj *const objv[]) /* Argument objects */ { int length; /* String length */ int index; /* Insert index */ Tcl_Obj *outObj; /* Output object */ if (objc != 4) { Tcl_WrongNumArgs(interp, 1, objv, "string index insertString"); return TCL_ERROR; } length = Tcl_GetCharLength(objv[1]); if (TclGetIntForIndexM(interp, objv[2], length, &index) != TCL_OK) { return TCL_ERROR; } if (!(outObj = TclStringReplace(interp, objv[1], index, 0, objv[3]))) { return TCL_ERROR; } Tcl_SetObjResult(interp, outObj); return TCL_OK; } |
︙ | ︙ |
Changes to generic/tclInt.h.
︙ | ︙ | |||
3152 3153 3154 3155 3156 3157 3158 | MODULE_SCOPE void * TclStackRealloc(Tcl_Interp *interp, void *ptr, int numBytes); MODULE_SCOPE int TclStringCatObjv(Tcl_Interp *interp, int inPlace, int objc, Tcl_Obj *const objv[], Tcl_Obj **objPtrPtr); MODULE_SCOPE int TclStringFind(Tcl_Obj *needle, Tcl_Obj *haystack, int start); | | | | 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 | MODULE_SCOPE void * TclStackRealloc(Tcl_Interp *interp, void *ptr, int numBytes); MODULE_SCOPE int TclStringCatObjv(Tcl_Interp *interp, int inPlace, int objc, Tcl_Obj *const objv[], Tcl_Obj **objPtrPtr); MODULE_SCOPE int TclStringFind(Tcl_Obj *needle, Tcl_Obj *haystack, int start); MODULE_SCOPE Tcl_Obj * TclStringReplace(Tcl_Interp *interp, Tcl_Obj *strObj, int startIndex, int removeCount, Tcl_Obj *insObj); MODULE_SCOPE int TclStringLast(Tcl_Obj *needle, Tcl_Obj *haystack, int last); MODULE_SCOPE int TclStringMatch(const char *str, int strLen, const char *pattern, int ptnLen, int flags); MODULE_SCOPE int TclStringMatchObj(Tcl_Obj *stringObj, Tcl_Obj *patternObj, int flags); MODULE_SCOPE Tcl_Obj * TclStringObjReverse(Tcl_Obj *objPtr); |
︙ | ︙ |
Changes to generic/tclStringObj.c.
︙ | ︙ | |||
3278 3279 3280 3281 3282 3283 3284 | return -1; } } /* *--------------------------------------------------------------------------- * | | > | > > > | > > | | | | > > > > > | | | | > | > > | | > | > > > | | < > > | | | | | > | > > | > > > > > | > > > > | > | > | < | < | | < < > | | > > | > | | | | > > | < | > > | > > > | | > > > > | | > | > > > > > | | > > > > | > > > > > > > > > > | | | > | > | > | > > > | > > | < | > | > > > > > > > | > > > > > | | > > > > > > > > > > > > > > > > > > > > > > > > > > > | > | > | | | | | | > > > > > > > | | > | | > > > > | > > > > | > > > > > > | > | | | > > > > > | | | | > > > > > | | | | | | | | < | | | | | > | 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 | return -1; } } /* *--------------------------------------------------------------------------- * * TclStringReplace -- * * Inserts, replaces, or removes characters in a string. Implements the * [string insert] and [string replace] operations. Observe that inserting * a string is the same as replacing an empty substring with a non-empty * substring. This function can also be used to implement removing a * substring by replacing a non-empty substring with an empty substring. * * Results: * Removes removeCount characters from strObj starting at startIndex and * inserts insObj at that same location. removeCount may be 0 to insert * without removing, and insObj may be NULL or empty string to remove * without inserting. On memory allocation failure, returns NULL and * places error information in the interpreter result. * * Side effects: * strObj and insObj may have their Tcl_ObjType changed to tclStringType or * properByteArrayType, and either one's value may be modified if unshared. * * TODO: * Memory allocation failure is only checked when concatenating shared, * non-pure byte array, non-pure Unicode character array strings. Need to * commit to a consistent memory allocation failure handling policy. * *--------------------------------------------------------------------------- */ Tcl_Obj * TclStringReplace( Tcl_Interp *interp, /* Interpreter for error logging, may be NULL */ Tcl_Obj *strObj, /* String being modified */ int startIndex, /* Index at which to insert/remove/replace */ int removeCount, /* Number of characters to remove or replace */ Tcl_Obj *insObj) /* Substring to insert/replace, may be NULL */ { int idx; /* Effective start index */ int del; /* Effective number of characters to remove */ int pureBin; /* 1 if byte array with no string rep */ int pureUni; /* 1 if Unicode with no string representation */ /* Representations of strObj (string being modified) */ unsigned char *strBin; /* String being modified, byte array */ Tcl_UniChar *strUni; /* String being modified, Unicode char array */ int strLen; /* String being modified, byte or char count */ /* Representations of insObj (substring to insert or replace) */ unsigned char *insBin; /* String to insert, byte array */ Tcl_UniChar *insUni; /* String to insert, Unicode character array */ int insLen; /* String to insert, byte or character count */ /* Representations of output string */ unsigned char *outBin; /* Output string, byte array */ Tcl_UniChar *outUni; /* Output string, Unicode character array */ String *outStr; /* Output string, Tcl string buffer */ Tcl_Obj *outObj; /* Output string, Tcl object */ /* * Check if strObj is a pure byte array or pure Unicode character array. * Get its length and byte array or Unicode character array representation. */ if ((pureBin = TclIsPureByteArray(strObj))) { strBin = Tcl_GetByteArrayFromObj(strObj, &strLen); } else { pureUni = !strObj->bytes; strUni = Tcl_GetUnicodeFromObj(strObj, &strLen); } /* * Do the same for insObj, and update pureBin or pureUni to indicate if both * strObj and insObj are pure byte arrays or pure Unicode character arrays. * If insObj is NULL, treat it as empty string, which is effectively both a * pure byte array and a pure Unicode character array. */ if (!insObj) { insLen = 0; } else if (pureBin && (pureBin &= TclIsPureByteArray(insObj))) { insBin = Tcl_GetByteArrayFromObj(insObj, &insLen); } else { pureUni &= !insObj->bytes; insUni = Tcl_GetUnicodeFromObj(insObj, &insLen); } /* * Clip start index and removal count to lie within string length limits. */ if (startIndex < 0) { idx = 0; } else if (startIndex > strLen) { idx = strLen; } else { idx = startIndex; } if (removeCount < 0) { del = 0; } else if (idx + removeCount > strLen) { del = strLen - idx; } else { del = removeCount; } /* * Setup is complete. Now perform the actual string replacement. */ if (!insLen && !del) { /* * If insObj is empty/NULL and no removal is being done, return strObj. */ outObj = strObj; } else if (!strLen) { /* * If strObj is empty, return insObj, which is guaranteed non-NULL. */ outObj = insObj; } else if (pureBin) { /* * Optimize the pure byte array case to avoid shimmering. If either * byte array argument is unshared, modify it in place. If both * arguments are shared, create a new, unshared byte array result. * ASCII strings are frequently represented as pure byte arrays. */ if (Tcl_IsShared(strObj) && (!insObj || Tcl_IsShared(insObj))) { /* * Both arguments are shared, so fill a new byte array with bytes * from strObj and insObj. */ outObj = Tcl_NewByteArrayObj(NULL, strLen + insLen - del); outBin = Tcl_GetByteArrayFromObj(outObj, NULL); if (idx) { memcpy(outBin, strBin, idx); } if (insLen) { memcpy(outBin + idx, insBin, insLen); } if (strLen != idx + del) { memcpy(outBin + idx + insLen, strBin + idx + del, strLen - idx - del); } } else { /* * An argument is unshared and can be modified in place. Note: At * this point, if insObj is NULL, strObj is necessarily unshared. */ if (!Tcl_IsShared(strObj)) { outObj = strObj; } else { outObj = insObj; } /* * If more bytes are about to be inserted than deleted, increase the * allocation now. Defer decreasing allocation until later to avoid * accessing memory after deallocating it. */ if (insLen > del) { outBin = Tcl_SetByteArrayLength(outObj, strLen + insLen - del); } else { outBin = Tcl_GetByteArrayFromObj(outObj, NULL); } if (!Tcl_IsShared(strObj)) { /* * Handle modifying strObj in place. The bytes before idx are * already where they need to be. Adjust the position of the * bytes after the end of the insert/replace/delete region, then * insert the substring. */ if (insLen != del && strLen != idx + del) { memmove(outBin + idx + insLen, outBin + idx + del, strLen - idx - del); } if (insLen) { memcpy(outBin + idx, insBin, insLen); } } else { /* * Handle modifying insObj in place. Move the insObj bytes to * idx, then copy strObj in two parts before and after insObj. */ if (idx && insLen) { memmove(outBin + idx, outBin, insLen); } if (idx) { memcpy(outBin, strBin, idx); } if (insLen != del && strLen != idx + del) { memcpy(outBin + idx + insLen, strBin + idx + del, strLen - idx - del); } } /* * Now that all bytes are in place, decrease allocation if more * bytes were deleted than were inserted. */ if (del > insLen) { Tcl_SetByteArrayLength(outObj, strLen + insLen - del); } } } else if (pureUni || !Tcl_IsShared(strObj) || (insObj && !Tcl_IsShared(insObj))) { /* * Same as above, but for pure Unicode character arrays and for unshared * arguments. Modify the unshared argument in place, or create a new * output object if the arguments are shared. */ if (!Tcl_IsShared(strObj)) { outObj = strObj; } else if (insObj && !Tcl_IsShared(insObj)) { outObj = insObj; } else { outObj = Tcl_NewUnicodeObj(strUni, idx); } /* * Increase the string allocation as necessary, and get access to the * Unicode character array internals. */ outStr = GET_STRING(outObj); if (strLen + insLen - del > outStr->maxChars) { GrowUnicodeBuffer(outObj, strLen + insLen - del); outStr = GET_STRING(outObj); } outUni = outStr->unicode; outStr->numChars = strLen + insLen - del; /* * Convert output to a pure Unicode character array if not already so. */ if (!pureUni) { TclInvalidateStringRep(outObj); outStr->allocated = 0; } /* * Rearrange the character array to accomplish the replace operation. */ if (!Tcl_IsShared(strObj)) { /* * Handle modifying strObj in place. The characters before idx are * already where they need to be. Adjust the position of the * characters after the end of the insert/replace/delete region, * then insert the substring. */ memmove(outUni + idx + insLen, outUni + idx + del, (strLen - idx - del) * sizeof(Tcl_UniChar)); memcpy(outUni + idx, insUni, insLen * sizeof(Tcl_UniChar)); } else if (insObj && !Tcl_IsShared(insObj)) { /* * Handle modifying insObj in place. Move the insObj characters to * idx, then copy strObj in two parts before and after insObj. */ memmove(outUni + idx, outUni, insLen * sizeof(Tcl_UniChar)); memcpy(outUni, strUni, idx * sizeof(Tcl_UniChar)); memcpy(outUni + idx + insLen, strUni + idx + del, (strLen - idx - del) * sizeof(Tcl_UniChar)); } else { /* * Both arguments are shared, so fill the new Unicode character * array with characters from strObj and insObj. */ memcpy(outUni + idx, insUni, insLen * sizeof(Tcl_UniChar)); memcpy(outUni + idx + insLen, strUni + idx + del, (strLen - idx - del) * sizeof(Tcl_UniChar)); } } else if ((!idx || idx == strLen) && !del) { /* * Inserting at the beginning or end of the string is nothing more than * concatenating the two strings in either order. */ Tcl_Obj *parts[2]; if (idx) { parts[0] = strObj; parts[1] = insObj; } else { parts[0] = insObj; parts[1] = strObj; } if (TclStringCatObjv(interp, 1, 2, parts, &outObj) != TCL_OK) { return NULL; } } else { /* * Non-byte array, non-Unicode, non-prepend, non-append, non-empty, * non-unshared case. Build a new string by concatenating the parts. */ outObj = Tcl_NewUnicodeObj(strUni, idx); Tcl_AppendObjToObj(outObj, insObj); if (strLen != idx + del) { Tcl_AppendUnicodeToObj(outObj, strUni + idx + del, strLen - idx - del); } } return outObj; } /* |
︙ | ︙ |