Fun with mb strlen/code

From MediaWiki.org
Jump to: navigation, search
<?php
 
mb_internal_encoding('UTF-8');
 
function old_mb_strlen( $str ) {
        preg_match_all( '/(.)/us', $str, $matches );
        //return count($matches);
        return count($matches[1]);
}
 
function new_mb_strlen( $str ) {
        $counts = count_chars( $str );
        $total = 0;
 
        // Count ASCII bytes
        for( $i = 0; $i < 0x80; $i++ ) {
                $total += $counts[$i];
        }
 
        // Count multibyte sequence heads
        for( $i = 0xc0; $i < 0xff; $i++ ) {
                $total += $counts[$i];
        }
        return $total;
}
 
$benchme = array(
        'strlen',
        'mb_strlen',
        'old_mb_strlen',
        'new_mb_strlen' );
 
$testfiles = array(
        'washington.txt',
        'berlin.txt',
        'bulgakov.txt',
        'tokyo.txt',
        'young.txt' );
 
$rounds = 5;
 
foreach( $testfiles as $filename ) {
        $data = file_get_contents( $filename );
        print "Testing $filename:\n";
        foreach( $benchme as $function ) {
                $start = microtime( true );
                for( $i = 0; $i < $rounds; $i++ ) {
                        $result = $function( $data );
                }
                $delta = ((microtime( true ) - $start) / $rounds) * 1000.0;
                printf( "%20s %10d chars %8.3fms\n", $function, $result, $delta );
        }
        print "\n";
}
?>
Personal tools
Namespaces
Variants
Actions
Site
Support
Download
Development
Communication
Print/export
Toolbox