| Index: trunk/phase3/includes/DefaultSettings.php |
| — | — | @@ -341,6 +341,11 @@ |
| 342 | 342 | # code, so that if it breaks, only zh will be affected |
| 343 | 343 | $wgDisableLangConversion = true; |
| 344 | 344 | |
| | 345 | +# Whether to use zhdaemon to perform Chinese text processing |
| | 346 | +$wgUseZhdaemon = false; |
| | 347 | +$wgZhdaemonHost="localhost"; |
| | 348 | +$wgZhdaemonPort=2004; |
| | 349 | + |
| 345 | 350 | # Miscellaneous configuration settings |
| 346 | 351 | # |
| 347 | 352 | |
| Index: trunk/phase3/languages/LanguageZh.php |
| — | — | @@ -1,20 +1,9 @@ |
| 2 | 2 | <?php |
| | 3 | +require_once( "includes/ZhClient.php" ); |
| 3 | 4 | require_once( "LanguageZh_cn.php"); |
| 4 | 5 | require_once( "LanguageZh_tw.php"); |
| 5 | 6 | require_once( "LanguageZh_sg.php"); |
| 6 | 7 | require_once( "LanguageZh_hk.php"); |
| 7 | | -/* caching the conversion tables */ |
| 8 | | -$zh2TW = $wgMemc->get($key1 = "$wgDBname:zhConvert:tw"); |
| 9 | | -$zh2CN = $wgMemc->get($key2 = "$wgDBname:zhConvert:cn"); |
| 10 | | -$zh2SG = $wgMemc->get($key3 = "$wgDBname:zhConvert:sg"); |
| 11 | | -$zh2HK = $wgMemc->get($key4 = "$wgDBname:zhConvert:hk"); |
| 12 | | -if(empty($zh2TW) || empty($zh2CN) || empty($zh2SG) || empty($zh2HK)) { |
| 13 | | - require_once("includes/ZhConversion.php"); |
| 14 | | - $wgMemc->set($key1, $zh2TW); |
| 15 | | - $wgMemc->set($key2, $zh2CN); |
| 16 | | - $wgMemc->set($key3, $zh2SG); |
| 17 | | - $wgMemc->set($key4, $zh2HK); |
| 18 | | -} |
| 19 | 8 | |
| 20 | 9 | /* class that handles both Traditional and Simplified Chinese |
| 21 | 10 | right now it only distinguish zh_cn and zh_tw (actuall, zh_cn and |
| — | — | @@ -23,9 +12,20 @@ |
| 24 | 13 | class LanguageZh extends LanguageZh_cn { |
| 25 | 14 | |
| 26 | 15 | var $mZhLanguageCode=false; |
| 27 | | - |
| | 16 | + var $mZhClient=false; |
| 28 | 17 | function LanguageZh() { |
| | 18 | + global $wgUseZhdaemon, $wgZhdaemonHost, $wgZhdaemonPort; |
| | 19 | + global $wgDisableLangConversion; |
| | 20 | + |
| 29 | 21 | $this->mZhLanguageCode = $this->getPreferredVariant(); |
| | 22 | + if($wgUseZhdaemon) { |
| | 23 | + $this->mZhClient=new ZhClient($wgZhdaemonHost, $wgZhdaemonPort); |
| | 24 | + if(!$this->mZhClient->isconnected()) |
| | 25 | + $this->mZhClient = false; |
| | 26 | + } |
| | 27 | + // fallback to fake client |
| | 28 | + if($this->mZhClient == false) |
| | 29 | + $this->mZhClient=new ZhClientFake(); |
| 30 | 30 | } |
| 31 | 31 | |
| 32 | 32 | /* |
| — | — | @@ -56,48 +56,13 @@ |
| 57 | 57 | } |
| 58 | 58 | |
| 59 | 59 | |
| 60 | | - /* the Simplified/Traditional conversion stuff */ |
| 61 | | - |
| 62 | | - function zh2tw($text) { |
| 63 | | - global $zh2TW; |
| 64 | | - return strtr($text, $zh2TW); |
| 65 | | - } |
| 66 | | - |
| 67 | | - function zh2cn($text) { |
| 68 | | - global $zh2CN; |
| 69 | | - return strtr($text, $zh2CN); |
| 70 | | - } |
| 71 | | - |
| 72 | | - function zh2sg($text) { |
| 73 | | - global $zh2SG, $zh2CN; |
| 74 | | - return strtr(strtr($text, $zh2CN), $zh2SG); |
| 75 | | - } |
| 76 | | - |
| 77 | | - function zh2hk($text) { |
| 78 | | - global $zh2HK, $zh2TW; |
| 79 | | - return strtr(strtr($text, $zh2TW), $zh2HK); |
| 80 | | - } |
| 81 | 60 | |
| 82 | 61 | function autoConvert($text, $toVariant=false) { |
| 83 | 62 | if(!$toVariant) |
| 84 | 63 | $toVariant = $this->getPreferredVariant(); |
| 85 | 64 | $fname="zhconvert"; |
| 86 | 65 | wfProfileIn( $fname ); |
| 87 | | - $t = $text; |
| 88 | | - switch($toVariant) { |
| 89 | | - case 'zh-cn': |
| 90 | | - $t = $this->zh2cn($text); |
| 91 | | - break; |
| 92 | | - case 'zh-tw': |
| 93 | | - $t = $this->zh2tw($text); |
| 94 | | - break; |
| 95 | | - case 'zh-sg': |
| 96 | | - $t = $this->zh2sg($text); |
| 97 | | - break; |
| 98 | | - case 'zh-hk': |
| 99 | | - $t = $this->zh2hk($text); |
| 100 | | - break; |
| 101 | | - } |
| | 66 | + $t = $this->mZhClient->convert($text, $toVariant); |
| 102 | 67 | wfProfileOut( $fname ); |
| 103 | 68 | return $t; |
| 104 | 69 | } |
| — | — | @@ -127,5 +92,15 @@ |
| 128 | 93 | } |
| 129 | 94 | return false; |
| 130 | 95 | } |
| | 96 | + |
| | 97 | + // word segmentation through ZhClient |
| | 98 | + function stripForSearch( $string ) { |
| | 99 | + $fname="zhsegment"; |
| | 100 | + wfProfileIn( $fname ); |
| | 101 | + $t = $this->mZhClient->segment($string); |
| | 102 | + wfProfileOut( $fname ); |
| | 103 | + return $t; |
| | 104 | + |
| | 105 | + } |
| 131 | 106 | } |
| 132 | 107 | ?> |