Template:SchemaDiagram/sql/wikibase-repo-1.39.txt

-- MySQL version of the database schema for the Wikibase extension. -- Licence: GNU GPL v2+

-- TODO: figure out which lengths to use for some of the varchar fields.

-- Unique ID generator. CREATE TABLE IF NOT EXISTS /*_*/wb_id_counters ( id_value                   INT unsigned        NOT NULL,  id_type                    VARBINARY(32)       NOT NULL ) /*$wgDBTableOptions*/;

CREATE UNIQUE INDEX /*i*/wb_id_counters_type ON /*_*/wb_id_counters (id_type);

-- Derived storage. -- Links site+title pairs to item ids. CREATE TABLE IF NOT EXISTS /*_*/wb_items_per_site ( ips_row_id                 BIGINT unsigned     NOT NULL PRIMARY KEY AUTO_INCREMENT, -- row ID  ips_item_id                INT unsigned        NOT NULL, -- Id of the item  ips_site_id                VARBINARY(32)       NOT NULL, -- Site identifier (global)  ips_site_page              VARCHAR(310)        NOT NULL -- Prefixed title of the page ) /*$wgDBTableOptions*/;

CREATE UNIQUE INDEX /*i*/wb_ips_item_site_page ON /*_*/wb_items_per_site (ips_site_id, ips_site_page); CREATE INDEX /*i*/wb_ips_item_id ON /*_*/wb_items_per_site (ips_item_id);

-- Source: extensions/Wikibase/repo/sql/abstract/wb_property_info.json CREATE TABLE IF NOT EXISTS /*_*/wb_property_info ( pi_property_id             INT unsigned        NOT NULL PRIMARY KEY,  pi_type                    VARBINARY(32)       NOT NULL,  pi_info                    BLOB                NOT NULL  INDEX pi_type (pi_type) ) /*$wgDBTableOptions*/;

CREATE INDEX /*i*/pi_type ON /*_*/wb_property_info (pi_type);

-- Lookup table for entity terms (ie labels, aliases, descriptions). -- NOTE: keep the Wikimedia specific terms.wmf.sql in sync with this! CREATE TABLE IF NOT EXISTS /*_*/wb_terms ( term_row_id                BIGINT unsigned     NOT NULL PRIMARY KEY AUTO_INCREMENT, -- row ID  term_entity_id             INT unsigned        NOT NULL, -- Id of the entity  term_full_entity_id        VARBINARY(32)       DEFAULT NULL, -- Full id of the entity (not only numeric part)  term_entity_type           VARBINARY(32)       NOT NULL, -- Type of the entity  term_language              VARBINARY(32)       NOT NULL, -- Language code  term_type                  VARBINARY(32)       NOT NULL, -- Term type  term_text                  VARCHAR(255) binary NOT NULL, -- The term text  term_search_key            VARCHAR(255) binary NOT NULL, -- The term text, lowercase for case-insensitive lookups  term_weight                FLOAT UNSIGNED     NOT NULL DEFAULT 0.0 -- weight for ranking ) /*$wgDBTableOptions*/;

-- Indexes and comments below adopted from the suggestions Sean Pringle made -- at https://phabricator.wikimedia.org/T47529#518941 based on a -- live analysis of queries on wikidata.org in January 2014. -- NOTE: keep these in sync with UpdateTermIndexes.sql

-- Some wb_terms queries use term_full_entity_id_s=X which is good selectivity. CREATE INDEX /*i*/term_full_entity ON /*_*/wb_terms (term_full_entity_id);

-- T204837 CREATE INDEX /*i*/wb_terms_text ON /*_*/wb_terms (term_text);

-- T204838 CREATE INDEX /*i*/wb_terms_search_key ON /*_*/wb_terms (term_search_key);

-- This index has good selectivity while still allowing ICP for short string values. CREATE INDEX /*i*/term_search_full ON /*_*/wb_terms (term_language, term_full_entity_id, term_type, term_search_key(16));

-- T202265 CREATE INDEX /*i*/tmp1 ON /*_*/wb_terms (`term_language`, `term_type`, `term_entity_type`, `term_search_key`);

-- T204836 CREATE INDEX /*i*/wb_terms_entity_id ON /*_*/wb_terms (`term_entity_id`);

-- General info and links: -- --- -- -- These tables represent a normalized form of the wb_terms table. -- -- The wb_terms table history leading to this point can be all followed starting -- from this epic https://phabricator.wikimedia.org/T208425 -- -- The current solution is part of that epic. work on it can be followed starting -- from this ticket https://phabricator.wikimedia.org/T219175 -- -- This design is normalized for the sake of reducing duplication and redundancy -- that accounted for ~70% of the disk space occupied by wb_terms table as of -- March 2019. It requires few joins for queries, and impact on performance -- will be addressed as the need arise. -- -- Note on splitting Items and Properties: -- --- -- wb_item_terms an wb_property_terms are two tables with similar structure. -- original draft had them combined in one table, but further discussion lead -- to the decision of splitting them for the following reasons: -- - items are lot more and will grow faster than properties, which means --  when querying only for properties we need not to look at a gigantic --  index that includes items as well, but rather a very small one. -- - conceptually, items and properties are two different entity types, and --  since different entity types might have different constraints and maybe --  even structure, splitting them into corresponding tables saves us from --  ending up with a wide polymorphic table again (lesson learned from --   wb_terms table itself) -- -- Note on Entity IDs: -- -- -- Since Wikidata entity ids are fixed and known to always be Q (like in Q123), -- entity ids are stored as integers after dropping that prefix, in both -- wb_item_terms and wb_property_terms tables. -- e.g. entity id Q123 has integer id of 123. -- -- when a new entity type gets introduced into the instance then one can either: -- - store it in wb_item_terms/wb_property_items if it has the same prefix (Q), or -- - create a separate table similar to wb_item_terms in structure for those --  entities if the prefix is different.

-- stores a record per term per item per language. this table is expected to be the longest one -- in this group of tables. -- -- term text, type and language are normalized further through wb_term_in_lang -- table. CREATE TABLE IF NOT EXISTS /*_*/wbt_item_terms ( wbit_id                                BIGINT unsigned       NOT NULL PRIMARY KEY AUTO_INCREMENT,  wbit_item_id                           INT    unsigned       NOT NULL,  wbit_term_in_lang_id                   INT    unsigned       NOT NULL ) /*$wgDBTableOptions*/;

CREATE INDEX /*i*/wbt_item_terms_item_id ON /*_*/wbt_item_terms (wbit_item_id); CREATE UNIQUE INDEX /*i*/wbt_item_terms_term_in_lang_id_item_id ON /*_*/wbt_item_terms (wbit_term_in_lang_id, wbit_item_id);

-- stores a record per term per property per language. -- -- term text, type and language are normalized further through wb_term_in_lang -- table. CREATE TABLE IF NOT EXISTS /*_*/wbt_property_terms ( wbpt_id                                INT unsigned       NOT NULL PRIMARY KEY AUTO_INCREMENT,  wbpt_property_id                       INT unsigned       NOT NULL,  wbpt_term_in_lang_id                   INT unsigned       NOT NULL ) /*$wgDBTableOptions*/;

CREATE INDEX /*i*/wbt_property_terms_property_id ON /*_*/wbt_property_terms (wbpt_property_id); CREATE UNIQUE INDEX /*i*/wbt_property_terms_term_in_lang_id_property_id ON /*_*/wbt_property_terms (wbpt_term_in_lang_id, wbpt_property_id);

-- stores a record per term per text per language. -- -- term text and language are normalized further through wb_text_in_lang table. CREATE TABLE IF NOT EXISTS /*_*/wbt_term_in_lang ( wbtl_id                          INT unsigned       NOT NULL PRIMARY KEY AUTO_INCREMENT,  wbtl_type_id                     INT unsigned       NOT NULL,  wbtl_text_in_lang_id             INT unsigned       NOT NULL ) /*$wgDBTableOptions*/;

CREATE INDEX /*i*/wbt_term_in_lang_type_id_text_in ON /*_*/wbt_term_in_lang (wbtl_type_id); CREATE UNIQUE INDEX /*i*/wbt_term_in_lang_text_in_lang_id_lang_id ON /*_*/wbt_term_in_lang (wbtl_text_in_lang_id, wbtl_type_id);

-- stores a record per term text per language. -- -- text is normalized through wb_term_text table. CREATE TABLE IF NOT EXISTS /*_*/wbt_text_in_lang ( wbxl_id                         INT unsigned       NOT NULL PRIMARY KEY AUTO_INCREMENT,  wbxl_language                   VARBINARY(20)      NOT NULL,  wbxl_text_id                    INT unsigned       NOT NULL ) /*$wgDBTableOptions*/;

CREATE INDEX /*i*/wbt_text_in_lang_language ON /*_*/wbt_text_in_lang (wbxl_language); CREATE UNIQUE INDEX /*i*/wbt_text_in_lang_text_id_text_id ON /*_*/wbt_text_in_lang (wbxl_text_id, wbxl_language);

-- stores a record per text value that are used in different terms -- in different languages. CREATE TABLE IF NOT EXISTS /*_*/wbt_text ( wbx_id                             INT unsigned         NOT NULL PRIMARY KEY AUTO_INCREMENT,  wbx_text                           VARBINARY(255)       NOT NULL ) /*$wgDBTableOptions*/;

CREATE UNIQUE INDEX /*i*/wbt_text_text ON /*_*/wbt_text (wbx_text);

-- normalized term type names CREATE TABLE IF NOT EXISTS /*_*/wbt_type ( wby_id                                INT unsigned         NOT NULL PRIMARY KEY AUTO_INCREMENT,  wby_name                              VARBINARY(45)        NOT NULL ) /*$wgDBTableOptions*/;

CREATE UNIQUE INDEX /*i*/wbt_type_name ON /*_*/wbt_type (wby_name);