Flow/Database

See for the up-to-date SQL code, though without these example queries.

flow_workflow
CREATE TABLE `flow_workflow` ( `workflow_id` binary(16) NOT NULL,  `workflow_wiki` varchar(64) CHARACTER SET utf8 COLLATE utf8_bin NOT NULL,  `workflow_namespace` int(11) NOT NULL,  `workflow_page_id` int(10) unsigned NOT NULL,  `workflow_title_text` varchar(255) CHARACTER SET utf8 COLLATE utf8_bin NOT NULL,  `workflow_name` varchar(255) CHARACTER SET utf8 COLLATE utf8_bin NOT NULL,  `workflow_last_update_timestamp` binary(14) NOT NULL,  `workflow_lock_state` int(10) unsigned NOT NULL,  `workflow_type` binary(16) NOT NULL,  PRIMARY KEY (`workflow_id`),  KEY `flow_workflow_lookup` (`workflow_wiki`,`workflow_namespace`,`workflow_title_text`),  KEY `flow_workflow_update_timestamp` (`workflow_last_update_timestamp`); ) ENGINE=InnoDB DEFAULT CHARSET=utf8 mysql> explain select * from flow_workflow where workflow_wiki = "mediawikiwiki" and workflow_namespace = "103" and workflow_title_text = "LDAP_Authentication" order by workflow_id DESC limit 1; +--+-+---+--+--+--+-+---+--++ +--+-+---+--+--+--+-+---+--++ +--+-+---+--+--+--+-+---+--++ 1 row in set (0.00 sec)
 * defines a flow instance, which is based on workflow_namespace and workflow_title_text (corresponding to a MediaWiki page title/ns).
 * index `flow_workflow_lookup` is used to look up a particular workflow on a page based on workflow namespace and title, example query:
 * id  | select_type | table         | type | possible_keys        | key                  | key_len | ref               | rows | Extra                                              |
 * 1 | SIMPLE     | flow_workflow | ref  | flow_workflow_lookup | flow_workflow_lookup | 327     | const,const,const |  365 | Using index condition; Using where; Using filesort |

Better to use page id if you have it:

mysql> explain select * from flow_workflow where workflow_wiki = "mediawikiwiki" and workflow_page_id = 461183 order by workflow_id DESC limit 1; +--+-+---+---+--+-+-+--+--+-+ +--+-+---+---+--+-+-+--+--+-+ +--+-+---+---+--+-+-+--+--+-+ 1 row in set (0.00 sec)
 * id  | select_type | table         | type  | possible_keys        | key     | key_len | ref  | rows | Extra       |
 * 1 | SIMPLE     | flow_workflow | index | flow_workflow_lookup | PRIMARY | 11      | NULL |    2 | Using where |

flow_topic_list
CREATE TABLE `flow_topic_list` ( `topic_list_id` binary(16) NOT NULL,  `topic_id` binary(16) DEFAULT NULL,  UNIQUE KEY `flow_topic_list_pk` (`topic_list_id`,`topic_id`),  KEY `flow_topic_list_topic_id` (`topic_id`) ) ENGINE=InnoDB DEFAULT CHARSET=utf8 mysql> explain SELECT * FROM `flow_topic_list`   WHERE topic_list_id = unhex('050B8FE7DE931041240478')  ORDER BY topic_id DESC LIMIT 500; +--+-+-+--+---+-+-+---+--+--+ +--+-+-+--+---+-+-+---+--+--+ +--+-+-+--+---+-+-+---+--+--+ 1 row in set (0.01 sec) mysql> explain SELECT * FROM `flow_topic_list`,`flow_tree_revision`,`flow_revision`  WHERE (tree_rev_id = rev_id) AND (tree_rev_descendant_id = topic_id) AND topic_list_id = unhex('050B8FE7DE931041240478') ORDER BY rev_id DESC LIMIT 500; +--+-+++-+-+-+---+--+---+ +--+-+++-+-+-+---+--+---+ +--+-+++-+-+-+---+--+---+ 3 rows in set (0.00 sec) EWWW temporary AND filesort. Anyways...
 * discussion flow to topic flow association so we can pull a list of topics for a particular discussion
 * index `flow_topic_list_topic_id` used to pull a list of topic_id for a topic_list_id, example query:
 * id  | select_type | table           | type | possible_keys | key     | key_len | ref   | rows | Extra                    |
 * 1 | SIMPLE     | flow_topic_list | ref  | PRIMARY       | PRIMARY | 11      | const |  650 | Using where; Using index |
 * id  | select_type | table              | type   | possible_keys                       | key                         | key_len | ref                                   | rows | Extra                                                     |
 * 1 | SIMPLE     | flow_topic_list    | ref    | PRIMARY,flow_topic_list_topic_id    | PRIMARY                     | 11      | const                                 |  650 | Using where; Using index; Using temporary; Using filesort |
 * 1 | SIMPLE     | flow_tree_revision | ref    | PRIMARY,flow_tree_descendant_rev_id | flow_tree_descendant_rev_id | 11      | flowdb.flow_topic_list.topic_id       |    1 |                                                           |
 * 1 | SIMPLE     | flow_revision      | eq_ref | PRIMARY                             | PRIMARY                     | 11      | flowdb.flow_tree_revision.tree_rev_id |    1 |                                                           |

mysql > explain SELECT * FROM `flow_topic_list` WHERE topic_id = unhex('050B8FE7DEB71041240478') LIMIT 1; +--+-+-+--+--+--+-+---+--+--+ +--+-+-+--+--+--+-+---+--+--+ +--+-+-+--+--+--+-+---+--+--+ 1 row in set (0.00 sec)
 * index `flow_topic_list_topic_id` is used to look up the topic_list_id for a topic_id, example query:
 * id  | select_type | table           | type | possible_keys            | key                      | key_len | ref   | rows | Extra                    |
 * 1 | SIMPLE     | flow_topic_list | ref  | flow_topic_list_topic_id | flow_topic_list_topic_id | 11      | const |    1 | Using where; Using index |

flow_tree_revision
flow_tree_revision | CREATE TABLE `flow_tree_revision` ( `tree_rev_descendant_id` binary(16) NOT NULL,  `tree_rev_id` binary(16) NOT NULL,  `tree_orig_user_id` bigint(20) unsigned NOT NULL,  `tree_orig_user_ip` varbinary(39) DEFAULT NULL,  `tree_orig_user_wiki` varbinary(64) NOT NULL,  `tree_parent_id` binary(16) DEFAULT NULL,  PRIMARY KEY (`tree_rev_id`),  UNIQUE KEY `flow_tree_descendant_rev_id` (`tree_rev_descendant_id`,`tree_rev_id`) ) ENGINE=InnoDB DEFAULT CHARSET=utf8
 * topic/post content revisions, it has one to many relation to flow_revision table
 * index `flow_tree_descendant_rev_id` - mainly joins with flow_revision table and accessed by either `tree_rev_descendant_id` or `tree_rev_id`

example query: mysql> explain SELECT * FROM `flow_tree_revision` JOIN `flow_revision` `rev` ON ((tree_rev_id = rev_id))  WHERE tree_rev_descendant_id = unhex('04934C2C5C049D5BF77CEA')  ORDER BY rev_id DESC LIMIT 1; +--+-+++-+-+-+---+--++ +--+-+++-+-+-+---+--++ +--+-+++-+-+-+---+--++ 2 rows in set (0.00 sec)
 * id  | select_type | table              | type   | possible_keys                       | key                         | key_len | ref                                   | rows | Extra                                                  |
 * 1 | SIMPLE     | flow_tree_revision | ref    | PRIMARY,flow_tree_descendant_rev_id | flow_tree_descendant_rev_id | 11      | const                                 |    3 | Using index condition; Using temporary; Using filesort |
 * 1 | SIMPLE     | rev                | eq_ref | PRIMARY                             | PRIMARY                     | 11      | flowdb.flow_tree_revision.tree_rev_id |    1 |                                                        |

flow_revision
CREATE TABLE `flow_revision` ( `rev_id` binary(16) NOT NULL,  `rev_type` varchar(16) CHARACTER SET utf8 COLLATE utf8_bin NOT NULL,  `rev_user_id` bigint unsigned NOT NULL,  `rev_user_ip` varbinary(39) DEFAULT NULL,  `rev_user_wiki` varbinary(64) NOT NULL,  `rev_parent_id` binary(16) DEFAULT NULL,  `rev_flags` tinyblob NOT NULL,  `rev_content` mediumblob NOT NULL,  `rev_change_type` varbinary(255) DEFAULT NULL,  `rev_mod_state` varchar(32) CHARACTER SET utf8 COLLATE utf8_bin NOT NULL,  `rev_mod_user_id` bigint(20) unsigned DEFAULT NULL,  `rev_mod_user_ip` varbinary(39) DEFAULT NULL,  `rev_mod_user_wiki` varbinary(64) DEFAULT NULL,  `rev_mod_timestamp` varchar(14) CHARACTER SET utf8 COLLATE utf8_bin DEFAULT NULL,  `rev_mod_reason` varchar(255) CHARACTER SET utf8 COLLATE utf8_bin DEFAULT NULL,  `rev_last_edit_id` binary(16) DEFAULT NULL,  `rev_edit_user_id` bigint(20) unsigned DEFAULT NULL, `rev_edit_user_ip` varbinary(39) DEFAULT NULL, `rev_edit_user_wiki` varbinary(64) DEFAULT NULL, `rev_content_length` int(11) NOT NULL DEFAULT '0', `rev_previous_content_length` int(11) NOT NULL DEFAULT '0', PRIMARY KEY (`rev_id`), UNIQUE KEY `flow_revision_unique_parent` (`rev_parent_id`), KEY `flow_revision_user` (`rev_user_id`,`rev_user_ip`,`rev_user_wiki`), KEY `flow_revision_type_id` (`rev_type`,`rev_type_id`) ) ENGINE=InnoDB DEFAULT CHARSET=utf8
 * individual revision storage for header/topic/post
 * This table is primarily accessed by the primary key rev_id

flow_tree_node
CREATE TABLE `flow_tree_node` ( `tree_ancestor_id` binary(16) NOT NULL,  `tree_descendant_id` binary(16) NOT NULL,  `tree_depth` smallint(6) NOT NULL,  PRIMARY KEY (`tree_ancestor_id`,`tree_descendant_id`),  UNIQUE KEY `flow_tree_constraint` (`tree_descendant_id`,`tree_depth`) ) ENGINE=InnoDB DEFAULT CHARSET=utf8 mysql> explain SELECT tree_ancestor_id,tree_descendant_id FROM `flow_tree_node`  WHERE tree_ancestor_id IN (unhex('04934C2C5B0CA0771C21C2'), unhex('04934C2C5C049D5BF77CEA'));+--+-++---+---+-+-+--+--+--+ +--+-++---+---+-+-+--+--+--+ +--+-++---+---+-+-+--+--+--+ 1 row in set (0.00 sec) mysql> explain SELECT tree_ancestor_id, tree_depth FROM `flow_tree_node` WHERE tree_descendant_id = unhex('04C0E28D6C1CB60D82B91A'); +--+-++--+--+--+-+---+--+--+ +--+-++--+--+--+-+---+--+--+ +--+-++--+--+--+-+---+--+--+ 1 row in set (0.00 sec)
 * closure table implementation of tree storage in sql
 * example queries:
 * id  | select_type | table          | type  | possible_keys | key     | key_len | ref  | rows | Extra                    |
 * 1 | SIMPLE     | flow_tree_node | range | PRIMARY       | PRIMARY | 11      | NULL |   83 | Using where; Using index |
 * id  | select_type | table          | type | possible_keys        | key                  | key_len | ref   | rows | Extra                    |
 * 1 | SIMPLE     | flow_tree_node | ref  | flow_tree_constraint | flow_tree_constraint | 11      | const |    7 | Using where; Using index |

flow_wiki_ref
CREATE TABLE `flow_wiki_ref` ( `ref_src_object_id` binary(11) NOT NULL,  `ref_src_object_type` varbinary(32) NOT NULL,  `ref_src_workflow_id` binary(11) NOT NULL,  `ref_src_namespace` int(11) NOT NULL,  `ref_src_title` varbinary(255) NOT NULL,  `ref_target_namespace` int(11) NOT NULL,  `ref_target_title` varbinary(255) NOT NULL,  `ref_type` varbinary(16) NOT NULL,  `ref_src_wiki` varbinary(16) NOT NULL,  `ref_id` binary(11) NOT NULL,  PRIMARY KEY (`ref_id`),  KEY `flow_wiki_ref_idx_v2` (`ref_src_wiki`,`ref_src_namespace`,`ref_src_title`,`ref_type`,`ref_target_namespace`,`ref_target_title`,`ref_src_object_type`,`ref_src_ KEY `flow_wiki_ref_revision_v2` (`ref_src_wiki`,`ref_src_namespace`,`ref_src_title`,`ref_src_object_type`,`ref_src_object_id`,`ref_type`,`ref_target_namespace`,`re ) ENGINE=InnoDB DEFAULT CHARSET=binary

flow_ext_ref
CREATE TABLE `flow_ext_ref` ( `ref_src_object_id` binary(11) NOT NULL,  `ref_src_object_type` varbinary(32) NOT NULL,  `ref_src_workflow_id` binary(11) NOT NULL,  `ref_src_namespace` int(11) NOT NULL,  `ref_src_title` varbinary(255) NOT NULL,  `ref_target` blob NOT NULL,  `ref_type` varbinary(16) NOT NULL,  `ref_src_wiki` varbinary(16) NOT NULL,  `ref_id` binary(11) NOT NULL,  PRIMARY KEY (`ref_id`),  KEY `flow_ext_ref_idx_v2` (`ref_src_wiki`,`ref_src_namespace`,`ref_src_title`,`ref_type`,`ref_target`(255),`ref_src_object_type`,`ref_src_object_id`),  KEY `flow_ext_ref_revision_v2` (`ref_src_wiki`,`ref_src_namespace`,`ref_src_title`,`ref_src_object_type`,`ref_src_object_id`,`ref_type`,`ref_target`(255)) ) ENGINE=InnoDB DEFAULT CHARSET=binary

Sample queries
For non-private wikis on the WMF cluster, these should be run on analytics-store. From stat1003, run:

If you are not using a separate Flow cluster, you can connect to the right database (mywiki), remove the DB prefixes below (flowdb., mediawikiwiki.).