编辑
characterSet time: 1.36803us 12.5 / 1.00 memory: 64 bytes (via Nikolai Ruhe)
original RKL time: 1.20686us 11.0 / 0.88 memory: 16 bytes (via Dave DeLong)
modified RKL time: 1.07631us 9.9 / 0.78 memory: 16 bytes (me, changed regex to \d+)
scannerScanInt time: 0.49951us 4.6 / 0.36 memory: 32 bytes (via Nikolai Ruhe)
intValue time: 0.16739us 1.5 / 0.12 memory: 0 bytes (via zpasternack)
rklIntValue time: 0.10925us 1.0 / 0.08 memory: 0 bytes (me, modified RKL example)
正如我在这条消息的其他地方指出的那样,我最初将其放入我用于RegexKitLite的单元测试工具中。好吧,作为单元测试工具意味着我正在使用我的RegexKitLite私人副本进行测试。..恰好在跟踪用户的错误报告时添加了一堆调试内容。上述计时结果大致相当于调用
[valueString flushCachedRegexData];
characterSet time: 1.36803us 12.5 / 1.00 memory: 64 bytes (via Nikolai Ruhe)
original RKL time: 0.58446us 5.3 / 0.43 memory: 16 bytes (via Dave DeLong)
modified RKL time: 0.54628us 5.0 / 0.40 memory: 16 bytes (me, changed regex to \d+)
scannerScanInt time: 0.49951us 4.6 / 0.36 memory: 32 bytes (via Nikolai Ruhe)
intValue time: 0.16739us 1.5 / 0.12 memory: 0 bytes (via zpasternack)
rklIntValue time: 0.10925us 1.0 / 0.08 memory: 0 bytes (me, modified RKL example)
-DRKL_FAST_MUTABLE_CHECK
编译时选项:
original RKL time: 0.51188us 4.7 / 0.37 memory: 16 bytes using intValue
modified RKL time: 0.47665us 4.4 / 0.35 memory: 16 bytes using intValue
original RKL time: 0.44337us 4.1 / 0.32 memory: 16 bytes using rklIntValue
modified RKL time: 0.42128us 3.9 / 0.31 memory: 16 bytes using rklIntValue
RegexKitLite Fast Hex Conversion
“scannerScanInt”和“intValue”都存在一个问题,即要提取的数字必须在字符串的开头。我认为两者都会跳过任何前导空格。
这里基本上有两个不同的功能类:一个可以容忍额外的“东西”,但仍然能得到数字(characterSet、RegexKitLite匹配器和rklIntValue),另一个基本上需要数字作为字符串中的第一件事,最多容忍开头的一些空格填充(scannerScanInt和intValue)。
(记住我写了RegexKitLite,所以用你认为合适的任何大小的盐来服用以下内容)。
根据OP的要求,这是我用于执行测试的代码的修剪和缩小版本。有一件事需要注意:在把这些放在一起的时候,我注意到Dave DeLong的原始正则表达式不太有效。问题在于否定字符集——集合内的元字符序列(即[^\d]+)意味着字面字符,而不是它们在字符集外的特殊含义。替换为[^\p{DecimalNumber}]*,具有预期效果。
shell% gcc -DNS_BLOCK_ASSERTIONS -mdynamic-no-pic -std=gnu99 -O -o stackOverflow stackOverflow.m RegexKitLite.m -framework Foundation -licucore -lauto
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <limits.h>
#include <stdint.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <objc/objc-auto.h>
#include <malloc/malloc.h>
#import <Foundation/Foundation.h>
#import "RegexKitLite.h"
static double cpuTimeUsed(void);
static double cpuTimeUsed(void) {
struct rusage currentRusage;
getrusage(RUSAGE_SELF, ¤tRusage);
double userCPUTime = ((((double)currentRusage.ru_utime.tv_sec) * 1000000.0) + ((double)currentRusage.ru_utime.tv_usec)) / 1000000.0;
double systemCPUTime = ((((double)currentRusage.ru_stime.tv_sec) * 1000000.0) + ((double)currentRusage.ru_stime.tv_usec)) / 1000000.0;
double CPUTime = userCPUTime + systemCPUTime;
return(CPUTime);
}
@interface NSString (IntConversion)
-(int)rklIntValue;
@end
@implementation NSString (IntConversion)
-(int)rklIntValue
{
CFStringRef cfSelf = (CFStringRef)self;
UInt8 buffer[64];
const char *cptr, *optr;
char c;
if((cptr = optr = CFStringGetCStringPtr(cfSelf, kCFStringEncodingMacRoman)) == NULL) {
CFRange range = CFRangeMake(0L, CFStringGetLength(cfSelf));
CFIndex usedBytes = 0L;
CFStringGetBytes(cfSelf, range, kCFStringEncodingUTF8, '?', false, buffer, 60L, &usedBytes);
buffer[usedBytes] = 0U;
cptr = optr = (const char *)buffer;
}
while(((cptr - optr) < 60) && (!((((c = *cptr) >= '0') && (c <= '9')) || (c == '-') || (c == '+'))) ) { cptr++; }
return((int)strtoimax(cptr, NULL, 0));
}
@end
int main(int argc __attribute__((unused)), char *argv[] __attribute__((unused))) {
NSAutoreleasePool *pool = [[NSAutoreleasePool alloc] init];
#ifdef __OBJC_GC__
objc_start_collector_thread();
objc_clear_stack(OBJC_CLEAR_RESIDENT_STACK);
objc_collect(OBJC_EXHAUSTIVE_COLLECTION | OBJC_WAIT_UNTIL_DONE);
#endif
BOOL gcEnabled = ([objc_getClass("NSGarbageCollector") defaultCollector] != NULL) ? YES : NO;
NSLog(@"Garbage Collection is: %@", gcEnabled ? @"ON" : @"OFF");
NSLog(@"Architecture: %@", (sizeof(void *) == 4UL) ? @"32-bit" : @"64-bit");
double startTime = 0.0, csTime = 0.0, reTime = 0.0, re2Time = 0.0, ivTime = 0.0, scTime = 0.0, rklTime = 0.0;
NSString *valueString = @"foo 2020hello", *value2String = @"2020hello";
NSString *reRegex = @"[^\\p{DecimalNumber}]*(\\d+)", *re2Regex = @"\\d+";
int value = 0;
NSUInteger x = 0UL;
{
NSCharacterSet *digits = [NSCharacterSet decimalDigitCharacterSet];
NSCharacterSet *nonDigits = [digits invertedSet];
NSScanner *scanner = [NSScanner scannerWithString:value2String];
NSString *csIntString = [valueString stringByTrimmingCharactersInSet:nonDigits];
NSString *reString = [valueString stringByMatching:reRegex capture:1L];
NSString *re2String = [valueString stringByMatching:re2Regex];
[scanner scanInt:&value];
NSLog(@"digits : %p, size: %lu", digits, malloc_size(digits));
NSLog(@"nonDigits : %p, size: %lu", nonDigits, malloc_size(nonDigits));
NSLog(@"scanner : %p, size: %lu, int: %d", scanner, malloc_size(scanner), value);
NSLog(@"csIntString : %p, size: %lu, '%@' int: %d", csIntString, malloc_size(csIntString), csIntString, [csIntString intValue]);
NSLog(@"reString : %p, size: %lu, '%@' int: %d", reString, malloc_size(reString), reString, [reString intValue]);
NSLog(@"re2String : %p, size: %lu, '%@' int: %d", re2String, malloc_size(re2String), re2String, [re2String intValue]);
NSLog(@"intValue : %d", [value2String intValue]);
NSLog(@"rklIntValue : %d", [valueString rklIntValue]);
}
for(x = 0UL, startTime = cpuTimeUsed(); x < 100000UL; x++) { value = [[valueString stringByTrimmingCharactersInSet:[[NSCharacterSet decimalDigitCharacterSet] invertedSet]] intValue]; } csTime = (cpuTimeUsed() - startTime) / (double)x;
for(x = 0UL, startTime = cpuTimeUsed(); x < 100000UL; x++) { value = [[valueString stringByMatching:reRegex capture:1L] intValue]; } reTime = (cpuTimeUsed() - startTime) / (double)x;
for(x = 0UL, startTime = cpuTimeUsed(); x < 100000UL; x++) { value = [[valueString stringByMatching:re2Regex] intValue]; } re2Time = (cpuTimeUsed() - startTime) / (double)x;
for(x = 0UL, startTime = cpuTimeUsed(); x < 100000UL; x++) { value = [valueString rklIntValue]; } rklTime = (cpuTimeUsed() - startTime) / (double)x;
for(x = 0UL, startTime = cpuTimeUsed(); x < 100000UL; x++) { value = [value2String intValue]; } ivTime = (cpuTimeUsed() - startTime) / (double)x;
for(x = 0UL, startTime = cpuTimeUsed(); x < 100000UL; x++) { [[NSScanner scannerWithString:value2String] scanInt:&value]; } scTime = (cpuTimeUsed() - startTime) / (double)x;
NSLog(@"csTime : %.5lfus", csTime * 1000000.0);
NSLog(@"reTime : %.5lfus", reTime * 1000000.0);
NSLog(@"re2Time: %.5lfus", re2Time * 1000000.0);
NSLog(@"scTime : %.5lfus", scTime * 1000000.0);
NSLog(@"ivTime : %.5lfus", ivTime * 1000000.0);
NSLog(@"rklTime: %.5lfus", rklTime * 1000000.0);
[NSString clearStringCache];
[pool release]; pool = NULL;
return(0);
}