User:Statsrick/BIGQUERY udf
Appearance
Note All these UDF's use Standard SQL Dialect ... in "Show Options" in the console, uncheck the "Use Legacy SQL" box before running
Code that does this - bq cli example |
---|
Standard sql:
bq --allow_large_results=True --use_legacy_sql=False --replace=True --destination_table=omniture.hmg_agg "sql script”
Legacy sql:
bq query --udf_resource=<file_path_or_URI> <sql_query>
|
Code that does this - using bigquery python example |
---|
Standard sql:
client.write_to_table(
"""sql script""",
'dfp_dataset',
'dfp_ad_unit_rollups',
allow_large_results=True,
use_legacy_sql=flase)
Legacy sql:
client.write_to_table(
"""sql script""",
'dfp_dataset',
'dfp_ad_unit_rollups',
allow_large_results=True,
external_udf_uris=[])
|
UDFs
Classy...classify web traffic based on referrer and url into categories |
---|
CREATE TEMPORARY FUNCTION classy(r STRING,u string,type string)
RETURNS STRING
LANGUAGE js AS """
if (type == 'ref_type') {
return ref_url(r, u);
};
if (type == 'domain' || type == 'site_id' || type == 'bu' || type == 'country') {
return url(u,type);
};
function ref_url(r, u) {
if (r == 'direct' || r == 'undefined' || r == null || r == undefined) {
return 'direct';
};
if (r || u) {
try {
var unicode = {
escape: function(s) {
return s.replace(/^[-~]|/g, function(m) {
var code = m.charCodeAt(0);
return 'u' + ((code < 0x10) ? '000' : ((code < 0x100) ? '00' : ((code < 0x1000) ? '0' : ''))) + code.toString(16);
});
},
unescape: function(s) {
return s.replace(/\\u([a-fA-F0-9]{4})/g, function(matched, g1) {
return String.fromCharCode(parseInt(g1, 16))
})
}
}
var myRegexp = /^(?:https?:\\/\\/)?(?:www\\.)?((?:(?!www\\.|\\.).)+\\.[a-zA-Z0-9.]+)/g;
var rdom = myRegexp.exec(unescape(r))[1];
var rdom = rdom.toLowerCase();
var u = u.toLowerCase();
if ((u.indexOf('src=ai') > -1 || u.indexOf('src=sem') > -1 || u.indexOf('src=myli') > -1 || u.indexOf('src=myli') > -1 || u.indexOf('src=rb') > -1 || u.indexOf('src=arb') > -1 ) {
return 'paid';
} else if ((rdom.indexOf('t.co') > -1 || rdom.indexOf('twit') > -1 || rdom.indexOf('facebook.') > -1) {
return 'social';
} else if (rdom.indexOf('bing.') > -1 || rdom.indexOf('baidu') > -1 || rdom.indexOf('yahoo') > -1 || rdom.indexOf('ww.goog') > -1) {
return 'search';
};
var internal = url(r,'');
var obj = JSON.parse(internal);
if (obj.domain != 'XXXXX') {
return "internal:" + obj.domain;
} else {
return 'websites:other';
};
} catch (err) {
return 'direct';
}
}
}
function url(s,t) {
if (s) {
try {
var unicode = {
escape: function(s) {
return s.replace(/^[-~]|/g, function(m) {
var code = m.charCodeAt(0);
return 'u' + ((code < 0x10) ? '000' : ((code < 0x100) ? '00' : ((code < 0x1000) ? '0' : ''))) + code.toString(16);
});
},
unescape: function(s) {
return s.replace(/\\u([a-fA-F0-9]{4})/g, function(matched, g1) {
return String.fromCharCode(parseInt(g1, 16))
})
}
}
var myRegexp = /^(?:https?:\\/\\/)?(?:www\\.)?((?:(?!www\\.|\\.).)+\\.[a-zA-Z0-9.]+)/g;
var udom = myRegexp.exec(unescape(s))[1];
var udom = udom.toLowerCase();
var s = s.toLowerCase();
if (udom.indexOf('answer.') > -1 && udom.indexOf('.com') > -1) {
var d = 'US~MG~~202~answer.com';
} else if (udom.indexOf('hjjh.') > -1 && udom.indexOf('.com') > -1) {
var d = 'US~TV~104~hjjh.com';
} else if (udom.indexOf('aboutsoap.') > -1 && udom.indexOf('.uk') > -1) {
var d = 'UK~MI~191~aboutsoap.co.uk';
} else {
var d = 'XX~XX~unknown~XXXXX~unknown';
};
} catch (err) {
var d = 'XX~XX~unknown~XXXXX~unknown';
}
var myArr = d.split('~');
var output = {
country:myArr[0],
bu:myArr[1],
site_id:myArr[2],
domain:myArr[3]
};
if (t == 'domain') {
return myArr[3];
} else if (t == 'site_id') {
return myArr[2];
} else if (t == 'bu') {
return myArr[1];
} else if (t == 'country') {
return myArr[0];
} else {
return JSON.stringify(output);
};
}
}""";
SELECT
page_url
,classy('', page_url, 'domain') AS domain
,classy(referrer,page_url,'ref_type') AS ref_type
FROM omniture.hmg_web_20160723
limit 100;
|
Back to Rick McFarland's Library