1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170
| 'use strict';
/**
* Creates a menu in the UI.
*/
function onOpen() {
SpreadsheetApp.getUi()
.createMenu('Sheets to BigQuery')
.addItem('Upload', 'runFromUI')
.addToUi();
}
/**
* Function to run from the UI menu.
*
* Uploads the sheets defined in the active sheet into BigQuery.
*/
function runFromUI() {
// Column indices.
const SHEET_URL = 0;
const PROJECT_ID = 1;
const DATASET_ID = 2;
const TABLE_ID = 3;
const APPEND = 4;
const STATUS = 5;
// Get the data range rows, skipping the header (first) row.
let sheet = SpreadsheetApp.getActiveSheet();
let rows = sheet.getDataRange().getValues().slice(1);
// Run the sheetToBigQuery function for every row and write the status.
rows.forEach((row, i) => {
let status = sheetToBigQuery(
row[SHEET_URL],
row[PROJECT_ID],
row[DATASET_ID],
row[TABLE_ID],
row[APPEND],
);
sheet.getRange(i+2, STATUS+1).setValue(status);
});
}
/**
* Uploads a single sheet to BigQuery.
*
* @param {string} sheetUrl - The Google Sheet Url containing the data to upload.
* @param {string} projectId - Google Cloud Project ID.
* @param {string} datasetId - BigQuery Dataset ID.
* @param {string} tableId - BigQuery Table ID.
* @param {bool} append - Appends to BigQuery table if true, otherwise replaces the content.
*
* @return {string} status - Returns the status of the job.
*/
function sheetToBigQuery(sheetUrl, projectId, datasetId, tableId, append) {
try {
createDatasetIfDoesntExist(projectId, datasetId);
} catch (e) {
return `${e}: Please verify your "Project ID" exists and you have permission to edit BigQuery`;
}
let sheet;
try {
sheet = openSheetByUrl(sheetUrl);
} catch (e) {
return `${e}: Please verify the "Sheet URL" is pasted correctly`;
}
// Get the values from the sheet's data range as a matrix of values.
let rows = sheet.getDataRange().getValues();
// Normalize the headers (first row) to valid BigQuery column names.
// https://cloud.google.com/bigquery/docs/schemas#column_names
rows[0] = rows[0].map((header) => {
header = header.toLowerCase().replace(/[^\w]+/g, '_');
if (header.match(/^\d/))
header = '_' + header;
return header;
});
// Create the BigQuery load job config. For more information, see:
// https://developers.google.com/apps-script/advanced/bigquery
let loadJob = {
configuration: {
load: {
destinationTable: {
projectId: projectId,
datasetId: datasetId,
tableId: tableId
},
autodetect: true, // Infer schema from contents.
writeDisposition: append ? 'WRITE_APPEND' : 'WRITE_TRUNCATE',
}
}
};
// BigQuery load jobs can only load files, so we need to transform our
// rows (matrix of values) into a blob (file contents as string).
// For convenience, we convert the rows into a CSV data string.
// https://cloud.google.com/bigquery/docs/loading-data-local
let csvRows = rows.map(values =>
// We use JSON.stringify() to add "quotes to strings",
// but leave numbers and booleans without quotes.
// If a string itself contains quotes ("), JSON escapes them with
// a backslash as \" but the CSV format expects them to be
// escaped as "", so we replace all the \" with "".
values.map(value => JSON.stringify(value).replace(/\\"/g, '""'))
);
let csvData = csvRows.map(values => values.join(',')).join('\n');
let blob = Utilities.newBlob(csvData, 'application/octet-stream');
// Run the BigQuery load job.
try {
BigQuery.Jobs.insert(loadJob, projectId, blob);
} catch (e) {
return e;
}
Logger.log(
'Load job started. Click here to check your jobs: ' +
`https://console.cloud.google.com/bigquery?project=${projectId}&page=jobs`
);
// The status of a successful run contains the timestamp.
return `Last run: ${new Date()}`;
}
/**
* Creates a dataset if it doesn't exist, otherwise does nothing.
*
* @param {string} projectId - Google Cloud Project ID.
* @param {string} datasetId - BigQuery Dataset ID.
*/
function createDatasetIfDoesntExist(projectId, datasetId) {
try {
BigQuery.Datasets.get(projectId, datasetId);
} catch (err) {
let dataset = {
datasetReference: {
projectId: projectId,
datasetId: datasetId,
},
};
BigQuery.Datasets.insert(dataset, projectId);
Logger.log(`Created dataset: ${projectId}:${datasetId}`);
}
}
/**
* Opens the spreadsheet sheet (tab) with the given URL.
*
* @param {string} sheetUrl - Google Sheet Url.
*
* @returns {Sheet} - The sheet corresponding to the URL.
*
* @throws Throws an error if the sheet doesn't exist.
*/
function openSheetByUrl(sheetUrl) {
// Extract the sheet (tab) ID from the Url.
let sheetIdMatch = sheetUrl.match(/gid=(\d+)/);
let sheetId = sheetIdMatch ? sheetIdMatch[1] : null;
// From the open spreadsheet, get the sheet (tab) that matches the sheetId.
let spreadsheet = SpreadsheetApp.openByUrl(sheetUrl);
let sheet = spreadsheet.getSheets().filter(sheet => sheet.getSheetId() == sheetId)[0];
if (!sheet)
throw 'Sheet tab ID does not exist';
return sheet;
} |
Partager