User:Statsrick/REDSHIFT code
Appearance
Rick's Redshift Tips
[edit]How to "create table as if not exists" |
---|
CREATE TABLE IF NOT EXISTS new_tbl (LIKE old_tbl);
INSERT INTO new_tbl
SELECT * FROM old_tbl
WHERE NOT EXISTS ( SELECT * FROM new_tbl );
|
Performing a Deep Copy can be faster than a vacuum |
---|
/*http://docs.aws.amazon.com/redshift/latest/dg/performing-a-deep-copy.html*/
create table likeihits (like i_HITS);
insert into likeihits (select * from newiHITSdata);
drop table i_HITS;
alter table likeihits rename to i_HITS;
|
Loading tables from S3 into REDSHIFT |
---|
drop table O_ICROSS_REF;
create table O_ICROSS_REF(
Client char(5),
BU int,
Description varchar(30),
CountryCode char(2),
site char(40),
title char(30),
dt datatime
);
copy O_ICROSS_REF from 's3://hearsticrossing/icrossing_ref_table.csv' CREDENTIALS 'aws_access_key_id=[YOUR_AWS_KEY];aws_secret_access_key=[YOUR_AWS_SECRET_KEY]'
delimiter '\t' gzip ignoreblanklines truncatecolumns removequotes emptyasnull blanksasnull ACCEPTINVCHARS maxerror 1 dateformat 'auto' timeformat 'auto' COMPUPDATE ON;
drop table O_ICROSS_REF;
create table O_ICROSS_REF(
client char(5) ENCODE LZO,
bu int ENCODE RAW,
description varchar(30) ENCODE LZO,
countrycode char(2) ENCODE LZO,
site char(40) ENCODE LZO,
title char(30) ENCODE LZO,
dt datatime ENCODE RAW,
);
/* json read in with json auto...note that keys need to be in all lower case */
copy O_ICROSS_REF from 's3://hearsticrossing/icrossing_ref_table.csv' CREDENTIALS 'aws_access_key_id=[YOUR_AWS_KEY];aws_secret_access_key=[YOUR_AWS_SECRET_KEY]'
json 'auto' ACCEPTINVCHARS COMPUPDATE OFF;
/* json read in with json pathfile...keys can be mixed case */
copy O_ICROSS_REF from 's3://hearsticrossing/icrossing_ref_table.csv' CREDENTIALS 'aws_access_key_id=[YOUR_AWS_KEY];aws_secret_access_key=[YOUR_AWS_SECRET_KEY]'
json 's3://hearsticrossing/pathfiles/pathfile.json' ACCEPTINVCHARS COMPUPDATE OFF;
|
Dump data from Redshift to S3 |
---|
''NOTE: the bucket must exist already in the same region as the Redshift cluster''
UNLOAD ('select * from tmp_peyman') TO 's3://hearstcontent/hmg_article_dump2/'
CREDENTIALS 'aws_access_key_id=[YOUR_AWS_KEY];aws_secret_access_key=[YOUR_AWS_SECRET_KEY]'
DELIMITER '\t' ESCAPE ALLOWOVERWRITE;
|
How to change test table to production table |
---|
CREATE TABLE new_table([new table columns]);
INSERT INTO new_table (SELECT * FROM prod_table);
ALTER TABLE prod_table RENAME TO old_table;
ALTER TABLE new_table RENAME TO prod_table;
DROP TABLE old_table;
|
How to determine the value of another variable in a row that corresponds to the max/min of a numeric variable |
---|
,trim(SUBSTRING(MIN(TO_CHAR(hit_time_gmt,'YYYYMMDDHH24MISS')||page_url_clean) from 15 for 100)) as entryurl
,trim(SUBSTRING(MAX(TO_CHAR(hit_time_gmt,'YYYYMMDDHH24MISS')||page_url_clean) from 15 for 100)) as exiturl
|
Convert to and from Unix time |
---|
select extract (epoch from timestamp '2011-08-08 11:11:58') as unix_time from tbl_name;
select TIMESTAMP 'epoch' + unix_time * INTERVAL '1 second' as datetime from tbl_name;
|
Add seconds/days to datetime |
---|
dateadd(h,-5,entrydatetime) as dateminus
dateadd(m,37,entrydatetime) as dateplus
|
Decode URLs that have been encoded...or use a UDF! |
---|
REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(
encoded_url,'%3A',':'),'%2F','/'),'%28','('),'%29',')'),'%3B',';'),'%21','!'),'%40','@'),'%26','&'),'%3D','='),'%2B','+'),'%24','$'),'%2C',','),'%2A','*'),'%3F','?'),'%
23','#'),'%5B','['),'%5D',']'),'%25','%'),'%3A',':'),'%2F','/'),'%20',' ') as decoded_url
|
REGEXP_SUBSTR |
---|
split_part(url,'/',3)||'/'||regexp_substr(url, 'a\\d+|g\\d+') as url_id
|
Parse strings with split_part |
---|
,split_part(trim(url),'/',3) as domain
,split_part(url,'/',4) as section
|
Diagnostic queries |
---|
/* disk space used */ SELECT to_char(SUM(used-tossed),'999,999,999,999') used, to_char(SUM(capacity),'999,999,999,999') capacity
,to_char(100*float4(SUM(used-tossed))/float4(SUM(capacity)),'999.999') perc FROM stv_partitions
/* last load errors */ SELECT * FROM stl_load_errors ORDER BY starttime DESC LIMIT 100;
/* last udf errors */ SELECT * FROM svl_udf_log ORDER BY created DESC LIMIT 100;
/* table list with size */ SELECT b.name,to_char(b.nr,'999,999,999,999') nrows,to_char(a.sz_gb,'9,999.99') sz_gb
FROM (SELECT tbl, ROUND(CONVERT(REAL,COUNT(*))/1024,2) sz_gb FROM stv_blocklist GROUP BY 1) a,(SELECT id tbl,name, SUM(ROWS) nr FROM STV_TBL_PERM GROUP BY 1,2) b
WHERE a.tbl=b.tbl ORDER BY 3 DESC;
/* column list */ SELECT * FROM pg_table_def WHERE tablename = 'tmp';
/* list sessions */ SELECT * FROM STV_SESSIONS;
/* kill sessions */ SELECT pg_terminate_backend(pid);
/* kill all sessions */ SELECT pg_terminate_backend(process) FROM STV_SESSIONS where user_name='admin' and process != pg_backend_pid();
/* column encoding */ select "column", type, encoding from pg_table_def where tablename = 'omniture_hmgweb';
|
Back to Rick's Library