Toolserver:Django

From mediawiki.org

This page was moved from the Toolserver wiki.
Toolserver has been replaced by Toolforge. As such, the instructions here may no longer work, but may still be of historical interest.
Please help by updating examples, links, template links, etc. If a page is still relevant, move it to a normal title and leave a redirect.

I tried to use Django on the Toolserver, but due to the schema being used in MediaWiki (binary UTF-8 strings), MySQLdb module's lack of useOldUTF8Behavior (in MySQL JDBC driver) troubled me a lot.

I wrote a hack of Django to solve this problem, see below. Add a line:
'OPTIONS': { 'x_use_binary_utf8_hack': True }
to the database settings to use it.
Index: django/db/backends/mysql/base.py
===================================================================
--- django/db/backends/mysql/base.py	(revision 13359)
+++ django/db/backends/mysql/base.py	(working copy)
@@ -78,10 +78,31 @@
     """
     codes_for_integrityerror = (1048,)
 
-    def __init__(self, cursor):
+    def __init__(self, cursor, x_use_binary_utf8_hack):
         self.cursor = cursor
+        self.x_use_binary_utf8_hack = x_use_binary_utf8_hack
 
+    def _x_clean_unicode(self, uni):
+        if self.x_use_binary_utf8_hack and isinstance(uni, unicode):
+            uni = uni.encode('utf8')
+        return uni
+
+    def _x_clean_query_args(self, query, args):
+        query = self._x_clean_unicode(query)
+        if args:
+            if hasattr(args, 'keys'): # mapping
+                newargs = {}
+                for k in args:
+                    newargs[k] = self._x_clean_unicode(args[k])
+            else:
+                newargs = []
+                for v in args:
+                    newargs.append(self._x_clean_unicode(v))
+            args = newargs
+        return (query, args)
+
     def execute(self, query, args=None):
+        query, args = self._x_clean_query_args(query, args)
         try:
             return self.cursor.execute(query, args)
         except Database.IntegrityError, e:
@@ -96,6 +117,7 @@
             raise utils.DatabaseError, utils.DatabaseError(*tuple(e)), sys.exc_info()[2]
 
     def executemany(self, query, args):
+        query, args = self._x_clean_query_args(query, args)
         try:
             return self.cursor.executemany(query, args)
         except Database.IntegrityError, e:
@@ -109,6 +131,33 @@
         except Database.DatabaseError, e:
             raise utils.DatabaseError, utils.DatabaseError(*tuple(e)), sys.exc_info()[2]
 
+    def _x_clean_row(self, row):
+        if self.x_use_binary_utf8_hack and row:
+            newrow = []
+            for col in row:
+                if isinstance(col, str):
+                    # Django does not support blob. So there's no expected str.
+                    col = col.decode('utf8', 'replace')
+                newrow.append(col)
+            row = type(row)(newrow)
+        return row
+
+    def _x_clean_rows(self, rows):
+        newrows = []
+        for row in rows:
+            newrows.append(self._x_clean_row(row))
+        # I don't check whether it's modified. Just build a group of new rows.
+        return type(rows)(newrows)
+
+    def fetchone(self):
+        return self._x_clean_row(self.cursor.fetchone())
+
+    def fetchmany(self, size=None):
+        return self._x_clean_rows(self.cursor.fetchmany(size))
+
+    def fetchall(self):
+        return self._x_clean_rows(self.cursor.fetchall())
+
     def __getattr__(self, attr):
         if attr in self.__dict__:
             return self.__dict__[attr]
@@ -116,7 +165,11 @@
             return getattr(self.cursor, attr)
 
     def __iter__(self):
-        return iter(self.cursor)
+        # itertools.starmap only works correctly on Python 2.6+
+        def it():
+            for row in self.cursor:
+                yield self._x_clean_row(row)
+        return iter(it())
 
 class DatabaseFeatures(BaseDatabaseFeatures):
     empty_fetchmany_value = ()
@@ -293,12 +346,21 @@
             # We need the number of potentially affected rows after an
             # "UPDATE", not the number of changed rows.
             kwargs['client_flag'] = CLIENT.FOUND_ROWS
-            kwargs.update(settings_dict['OPTIONS'])
+            if 'x_use_binary_utf8_hack' in settings_dict['OPTIONS']:
+                self._x_use_binary_utf8_hack = settings_dict['OPTIONS']['x_use_binary_utf8_hack']
+                connect_options = settings_dict['OPTIONS'].copy()
+                del connect_options['x_use_binary_utf8_hack']
+                kwargs['use_unicode'] = False
+                del kwargs['charset']
+            else:
+                self._x_use_binary_utf8_hack = False
+                connect_options = settings_dict['OPTIONS']
+            kwargs.update(connect_options)
             self.connection = Database.connect(**kwargs)
             self.connection.encoders[SafeUnicode] = self.connection.encoders[unicode]
             self.connection.encoders[SafeString] = self.connection.encoders[str]
             connection_created.send(sender=self.__class__)
-        cursor = CursorWrapper(self.connection.cursor())
+        cursor = CursorWrapper(self.connection.cursor(), self._x_use_binary_utf8_hack)
         return cursor
 
     def _rollback(self):