SQLCode

Loading A Key Space Table

How to add a new key space to the database.
  1. Set the table name, description, and URL
  2. Set path/name of local file that holds the data for the key space
  3. Select and run the (now filled in) Code. The Code does these things:
    1. Create the Key Space Table
    2. Load the data from the file
    3. Pack the data, making the table smaller, and therefore faster to load and use
    4. Rebuild the indexes for the compacted table. Since all the data is there (a 'packed' tree is read-only), can make best possible (i.e. smallest and fastest) B-Tree indexes.
  4. Add links (see next item)

SET @tableName = '';
SET @tableDesc = '';
SET @tableURL = '';
SET @dataFileName = '';
INSERT INTO KeySpaces (name, description, url) VALUES (@tableName, @tableDesc, @tableURL);

SET @createStatement = CONCAT(
   "CREATE TABLE `",
   @tableName,
   "` (",
   "id INT UNSIGNED NOT NULL AUTO_INCREMENT DEFAULT NULL, ",
   "name VARCHAR(30) NOT NULL UNIQUE, ",
   "description VARCHAR(255) NOT NULL DEFAULT '', ",
   "PRIMARY KEY (id)) ENGINE = MyISAM;");
PREPARE createKSTable FROM @createStatement;

SET @loadStatement = CONCAT(
   "LOAD DATA LOCAL INFILE '",
   @dataFileName,
   "' INTO TABLE `",
   @tableName,
   "` FIELDS TERMINATED BY '\\t' LINES TERMINATED BY '\\n'",
   " (name, description)");
#PREPARE loadKSTable FROM @loadStatement;   # Not a legal Prepared Statement
SELECT @loadStatement;   # Print out the command, copy, paste, execute.

EXECUTE createKSTable;
#EXECUTE loadKSTable;   # Example Load command
LOAD DATA LOCAL INFILE 'Locus-Desc.tab' INTO TABLE `SGD_LOCUS` FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n' (name, description);
DEALLOCATE PREPARE createKSTable;
#DEALLOCATE PREPARE loadKSTable;

shell> myisampack '<DB Table file name and path>'
shell> myisamchk -rq --sort-index --analyze '<DB Table file name and path>'.MYI

Building Its Link Table

How to add a new link table to the database.
  1. (First time only) Create the tempLinks table, where we load in the name::name links to make id::id links
  2. Set the link table name
  3. Run the link table creation code.
  4. Disable the indexes for the link table, since rebuilding them each time is a waste of time
  5. For each key space that this key space has links to:
    1. Set path/name of local file that holds the data for the key space links
    2. Run the code to load the link data from that file into Table tempLinks
    3. Run BuildLinkTable to map all the names to ids (since it's faster to look up an integer than it is to look up a string)
  6. Enable indexes, so can build them now that we have all the data
  7. Pack the data, making the table smaller, and therefore faster to load and use
  8. Rebuild the indexes for the compacted table. Since all the data is there (a 'packed' tree is read-only), can make best possible (i.e. smallest and fastest) B-Tree indexes.

Create Temp Link Table

The table that holds the name::name mappings. Only created once, ever.
CREATE TABLE tempLinks (
   geneName VARCHAR(30) NOT NULL,               # Starting gene
   targetName VARCHAR(30) NOT NULL,            # Ending gene
   mutiplicity ENUM('1-1', '1-M', 'M-1', 'M-M') NOT NULL DEFAULT '1-1',
   biDirectional BOOLEAN NOT NULL DEFAULT TRUE)   # Does link go both ways?
      ENGINE = MyISAM;

Create Link Table

#SET @tableName = '';   # From above
SET @linkTableName = CONCAT(@tableName, "Links");
SET @createStatement = CONCAT(
   "CREATE TABLE `",
   @linkTableName,
   "` (",
   "geneID INT UNSIGNED NOT NULL, ",
   "targetID INT UNSIGNED NOT NULL, ",
   "targetName VARCHAR(30) NOT NULL, ",
   "mutiplicity Enum('1-1', '1-M', 'M-1', 'M-M') NOT NULL DEFAULT '1-1', ",
   "biDirectional Boolean NOT NULL DEFAULT TRUE, ",
   "INDEX (geneID), ",
   "INDEX (targetName(4))) ENGINE = MyISAM;");
PREPARE createLinkTable FROM @createStatement;
EXECUTE createLinkTable;
DEALLOCATE PREPARE createLinkTable;

Stop Indexes

#SET @linkTableName = CONCAT(@tableName, "Links");   # From above code
SET @stopIndexesStatement = CONCAT("ALTER TABLE ", @linkTableName, " DISABLE KEYS;");
PREPARE stopIndexes FROM @stopIndexesStatement;
EXECUTE stopIndexes;
DEALLOCATE PREPARE stopIndexes;

Load Links

Repeat for each link file

Load Name::Name Links

TRUNCATE TABLE tempLinks;   # Get rid of old data
SET @linkFileName = '';
SET @linkStatement = CONCAT(
   "LOAD DATA LOCAL INFILE '",
   @linkFileName,
   "' INTO TABLE tempLinks FIELDS TERMINATED BY '\\t' LINES TERMINATED BY '\\n'");
#PREPARE loadLinkTable FROM @linkStatement;   # Not a legal Prepared Statement
#EXECUTE loadLinkTable;
#DEALLOCATE PREPARE loadLinkTable;
SELECT @linkStatement;   # Print out the command, copy, paste, execute.
# Example Load command
LOAD DATA LOCAL INFILE 'Locus-ID Links.tab' INTO TABLE tempLinks FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n';

Populate ID::ID Link Table

SET @targetTable = '';
call BuildLinkTable (@tableName, @targetTable, @linkTableName);

Start Indexes

#SET @linkTableName = CONCAT(@tableName, "Links");   # From above code
SET @startIndexesStatement = CONCAT("ALTER TABLE ", @linkTableName, " ENABLE KEYS;");
PREPARE startIndexes FROM @startIndexesStatement;
EXECUTE startIndexes;
DEALLOCATE PREPARE startIndexes;

Pack Table and Create Indexes

shell> myisampack '<Link Table file name and path>'
shell> myisamchk -rq --sort-index --analyze '<Link Table file name and path>'.MYI

-- Main.gregd - 21 Jun 2007