User:Statsrick/PHP code
Appearance
Get range of IP addresses used by AWS EC2 |
---|
<?php
function cidrToRange($cidr) {
$range = array();
$cidr = explode('/', $cidr);
$range[0] = long2ip((ip2long($cidr[0])) & ((-1 << (32 - (int)$cidr[1]))));
$range[1] = long2ip((ip2long($cidr[0])) + pow(2, (32 - (int)$cidr[1])) - 1);
return $range;
}
$data = json_decode(file_get_contents('https://ip-ranges.amazonaws.com/ip-ranges.json'),TRUE);
print "aws_ip_cidr\tipstart\tipstop\n";
foreach($data['prefixes'] as $item) { //foreach element in $data
$range=cidrToRange($item['ip_prefix']);
print $item['ip_prefix']."\t".$range[0]."\t".$range[1]."\n";
}
?>
|
Increase memory allocation |
---|
ini_set('memory_limit', '5000M');
|
Quick way to output an array to a file |
---|
$output=array(); $x=0;
foreach($data as $row){
$c=explode("\t",$row);
$output[$x]=$c[3];
$x++;
} //end foreach
file_put_contents('icross_merged2.tsv', print_r($output, true));
|
Read a file into memory and scan through it |
---|
<?php
ini_set('memory_limit', '5000M'); //increase memory limit
$data = file_get_contents("input.txt"); //read the file
$convert = explode("\n", $data); //create array separate by new line
//or you could do the above in one step: $convert=file("input.txt");
for ($i=0;$i<count($convert);$i++)
{
echo $convert[$i].', '; //write value by index
}
?>
|
Program to execute a query in Redshift and map the results to a csv file and push it to S3 |
---|
#!/usr/bin/php -q
// php query2.php outfilename
// php query2.php videoplay.csv s3:://hearstdashboards/hadoop/grafly_data/1321/1321.csv
<?php
exec('/usr/bin/psql -d hdw -h hearst-dw-instance.chekqimmelgb.us-west-2.redshift.amazonaws.com -p 5439 -U admin -c"
select
to_char(client_datetime,\'MM/DD/YYYY\') as snapshot_day
,video_title
,video_trainer
,to_char(sum(case when event_type=\'Video_Play_Start\' then 1 else 0 end),\'9,999,999\') as video_starts
,to_char(float4(avg(float4(videoplaypercentage))),\'99.99%\') as avg_videoplaypercentage
from o_cosmobody_icross group by 1,2,3
having sum(case when event_type=\'Video_Play_Start\' then 1 else 0 end)>0
order by 1,4;
"',$rows);
//print_r($rows);
$fout = fopen("/home/ec2-user/REDSHIFT/$argv[1]",'w');
$cell=array();
$i=0;
foreach($rows as $row){
if (strrpos($row,'|')>0) {
$cell=explode('|',$row);
$numItems = count($cell);
$i=0;
foreach($cell as $c){
if(++$i === $numItems) { fwrite($fout,'"'.trim($c).'"'); }
else { fwrite($fout,'"'.trim($c).'",');}
} //foreach
fwrite($fout,"\n");
} //strrpos
} //foreach
fclose($fout);
if (!empty($argv[2]) and strrpos(strtolower($argv[2]),'s3:')!==FALSE) {
system("/usr/bin/s3cmd put /home/ec2-user/REDSHIFT/$argv[1] $argv[2]");
}
?>
|
Function to show array mapping of complex, multi-level arrays (up to 12 levels deep) |
---|
function rm_arraymap12($farray) {
while (list($var1, $val1) = each($farray)) {
if (!is_array($val1)) {print "[$var1]= $val1\n"; } else {
while (list($var2, $val2) = each($val1)) {
if (!is_array($val2)) {print "[$var1][$var2] = $val2\n"; } else {
while (list($var3, $val3) = each($val2)) {
if (!is_array($val3)) {print "[$var1][$var2][$var3] = $val3\n"; } else {
while (list($var4, $val4) = each($val3)) {
if (!is_array($val4)) {print "[$var1][$var2][$var3][$var4] = $val4\n"; } else {
while (list($var5, $val5) = each($val4)) {
if (!is_array($val5)) {print "[$var1][$var2][$var3][$var4][$var5] = $val5\n"; } else {
while (list($var6, $val6) = each($val5)) {
if (!is_array($val6)) {print "[$var1][$var2][$var3][$var4][$var5][$var6] = $val6\n"; } else {
while (list($var7, $val7) = each($val6)) {
if (!is_array($val7)) {print "[$var1][$var2][$var3][$var4][$var5][$var6][$var7] = $val7\n"; } else {
while (list($var8, $val8) = each($val7)) {
if (!is_array($val8)) {print "[$var1][$var2][$var3][$var4][$var5][$var6][$var7][$var8] = $val8\n"; } else {
while (list($var9, $val9) = each($val8)) {
if (!is_array($val9)) {print "[$var1][$var2][$var3][$var4][$var5][$var6][$var7][$var8][$var9] = $val9\n"; } else {
while (list($var10, $val10) = each($val9)) {
if (!is_array($val10)) {print "[$var1][$var2][$var3][$var4][$var5][$var6][$var7][$var8][$var9][$var10] = $val10\n"; } else {
while (list($var11, $val11) = each($val10)) {
if (!is_array($val11)) {print "[$var1][$var2][$var3][$var4][$var5][$var6][$var7][$var8][$var9][$var10][$var11] = $val11\n"; } else {
while (list($var12, $val12) = each($val11)) {
if (!is_array($val12)) {print "[$var1][$var2][$var3][$var4][$var5][$var6][$var7][$var8][$var9][$var10][$var11][$var12] = $val12\n"; }
}}}}}}}}}}}}}}}}}}}}}}}
}
|
Yesterday |
---|
date_default_timezone_set('GMT') ;
$yesterday = date('Y-m-d',time() - 60 * 60 * 24);
|
Scrape source code for something |
---|
<?php
function findinside($start, $end, $string) {
preg_match_all('/' . preg_quote($start, '/') . '(.*?)'. preg_quote($end, '/').'/i', $string, $m);
return $m[1];
}
//$url=urldecode('http%3A%2F%2Fwww.wcvb.com%2Fweather%2Fhourbyhour-predicted-snowfall-totals%2F-%2F9850416%2F17313882%2F-%2Ffn6b8x%2F-%2Findex.html');
//$url=urldecode('http%3A%2F%2Fwww.mysanantonio.com%2Fdefault%2Farticle%2FWhen-Air-Force-left-Brooks-city-arrived-with-a-3854278.php');
//$url=urldecode('http%3A%2F%2Fwww.esquire.com%2Ffeatures%2Fdrinking%2Fgood-tequila-0809');
$url=urldecode('http%3A%2F%2Fwww.esquire.com%2Fblogs%2Fnews%2Fcarla-bruni-apocalypse');
//$url=urldecode('http%3A%2F%2Fblog.mysanantonio.com%2Fartbeat%2F2013%2F12%2Froad-trip-lopez-watercolors-on-display-at-brownsville-historical-association%2F');
//echo $url;
$src=file_get_contents($url);
$src=preg_replace('/\s+/', '', $src); //get rid of spaces
$src = str_replace("'", '', $src); //get rid of single quotes ... str_replace is faster than preg_replace
if(strpos($src,'omni_bizObjectId="') !== false) {$c=findinside('omni_bizObjectId="', '"', $src);} //NP
else if(strpos($src,'omni_blogID=') !== false) {$c=findinside('omni_blogID=', ';', $src);} //NP
else if(strpos($src,'"contentId":') !== false) {$c=findinside('"contentId":', ',', $src);} //TV
else if(strpos($src,'articleId:"') !== false) {$c=findinside('articleId:"', '"', $src);} //MG
else if(strpos($src,'articleID:') !== false) {$c=findinside('articleID:', ',', $src);} //MG
if(!empty($c)){$c=array_unique($c); $contentID=$c[0];}
echo $contentID;
?>
|
Scrape keywords |
---|
<?php
// Read in social data urls into array
$file = fopen('/home/ec2-user/PHP_PROGRAMS/newsocial', 'r');
$urls=array();
$row=0;
while (!feof($file) ) {
$line = fgets($file);
$bits = explode('|', $line);
if (!empty($bits[4])) {
$urls[$row]=trim(urldecode($bits[4]));
++$row;
}}
$arr=array_values(array_unique($urls)); // Create unique list of urls = arr
// Scrape the source code for keywords and store in res_scrape_stack
$fout = fopen('/home/ec2-user/PHP_PROGRAMS/res_scrape_stack','w');
$i=0; $j=0;
while ($i<=count($arr)) {
$out=@get_meta_tags($arr[$i]); // Get source code and use @ in front suppresses the warnings
if (array_key_exists('keywords', $out)) {
$keywordlist = explode(',', $out['keywords']);
foreach($keywordlist as $keyword)
{
fwrite($fout,$arr[$i]."\t".strtolower(trim($keyword))."\n");
$arr2[$j]=strtolower(trim($keyword)); // arr2 has multiple rows per url for all the keywords
++$j;
} //foreach
} //if
++$i;
} //while
fclose($fout);
// Count occurrences of keywords and keep only those with multiple occurrences
$arr3=preg_replace('/[^\da-z\s]/i', '', $arr2); // Count values only words on letters and integers so keep only keywords with letters
$top_kwds=array(); // top_kwds has the reduced set of keyords and thier counts
$ar = array_count_values($arr3); // Count occurences of values
arsort($ar); // Reverse sort by values
foreach($ar as $a => $v) {
if ($v>=10 and !empty($v)){ // Keep only keywords with 10 or more occurrences
$top_kwds[trim($a)]=$v;
}}
$file=fopen('/home/ec2-user/PHP_PROGRAMS/res_scrape_stack','r');
$kwds=array();
while (!feof($file)) {
$line = fgets($file);
$bits = explode("\t", $line);
if (array_key_exists(trim($bits[1]),$top_kwds)) {
$num=trim( $top_kwds[ trim($bits[1]) ] );
if (strlen($num)<10) { $kwds[trim($bits[0])]=trim($bits[1]).' ('.trim($num).')'; }
else { $kwds[trim($bits[0])]=$bits[1]; }
}}
//read in original file and score
$file2 = fopen('/home/ec2-user/PHP_PROGRAMS/newsocial', 'r');
$fout = fopen('/home/ec2-user/PHP_PROGRAMS/newsocial_scored_w_kwds.csv','w');
while (!feof($file2) ) {
$line = fgets($file2);
$bits = explode('|', $line);
$url=trim(urldecode($bits[4]));
if (!empty($url)) {
foreach($kwds as $u => $k){
if( trim($u)===$url and $k!=''){
fwrite($fout,
trim($bits[0])."\t".
trim($bits[1])."\t".
trim($bits[2])."\t".
trim($bits[3])."\t".
$url."\t".
trim($bits[5])."\t".
trim($bits[6])."\t".
trim($bits[7])."\t".
trim($bits[8])."\t".
trim($bits[9])."\t".
trim($bits[10])."\t".
trim($bits[11])."\t".
trim($k)."\n"
);
} //if
} //for each
} //empty
} //while
fclose($fout);
?>
|
Read a delimited (csv) file with header row to an array |
---|
<?php
function parse_csv($file, $options = null) {
$delimiter = empty($options['delimiter']) ? "," : $options['delimiter'];
$to_object = empty($options['to_object']) ? false : true;
$str = file_get_contents($file);
$lines = explode("\n", $str);
pr($lines);
$field_names = explode($delimiter, array_shift($lines));
foreach ($lines as $line) {
// Skip the empty line
if (empty($line)) continue;
$fields = explode($delimiter, $line);
$_res = $to_object ? new stdClass : array();
foreach ($field_names as $key => $f) {
if ($to_object) {
$_res->{$f} = $fields[$key];
} else {
$_res[$f] = $fields[$key];
}
}
$res[] = $_res;
}
return $res;
}
?>
NOTE:
Line number 1 of the csv file will be considered as header (field names).
TODO:
- Enclosure handling
- Escape character handling
- Other features/enhancements as you need
EXAMPLE USE:
Content of /path/to/file.csv:
CODE,COUNTRY
AD,Andorra
AE,United Arab Emirates
AF,Afghanistan
AG,Antigua and Barbuda
<?php
$arr_csv = parse_csv("/path/to/file.csv");
print_r($arr_csv);
?>
// Output:
Array
(
[0] => Array
(
[CODE] => AD
[COUNTRY] => Andorra
)
[1] => Array
(
[CODE] => AE
[COUNTRY] => United Arab Emirates
)
[2] => Array
(
[CODE] => AF
[COUNTRY] => Afghanistan
)
[3] => Array
(
[CODE] => AG
[COUNTRY] => Antigua and Barbuda
)
)
<?php
$obj_csv = parse_csv("/path/to/file.csv", array("to_object" => true));
print_r($obj_csv);
?>
// Output:
Array
(
[0] => stdClass Object
(
[CODE] => AD
[COUNTRY] => Andorra
)
[1] => stdClass Object
(
[CODE] => AE
[COUNTRY] => United Arab Emirates
)
[2] => stdClass Object
(
[CODE] => AF
[COUNTRY] => Afghanistan
)
[3] => stdClass Object
(
[CODE] => AG
[COUNTRY] => Antigua and Barbuda
)
[4] => stdClass Object
(
[CODE] =>
[COUNTRY] =>
)
)
// If you use character | (pipe) as delimiter in your csv file, use:
<?php
$arr_csv = parse_csv("/path/to/file.csv", array("delimiter"=>"|"));
?>
|
Simple way to check if a url exists |
---|
var_dump(gethostbynamel('http://-blue--crush-.tumblr.com')); //will return false if doesn't exist
//using curl
$mylinks="http://site.com/page.html";
$handlerr = curl_init($mylinks);
curl_setopt($handlerr, CURLOPT_RETURNTRANSFER, TRUE);
$resp = curl_exec($handlerr);
$ht = curl_getinfo($handlerr, CURLINFO_HTTP_CODE);
if ($ht == '404') { echo 'OK';} else { echo 'NO';}
function url_exists($url) {
if (!$fp = curl_init($url)) return false;
return true;
}
|
Simplest way to create a two-way hash |
---|
//source: http://aesencryption.net/#PHP-aes-encryption-example
$em='user@gmail.com';
echo $em."\n";
$key ='8675309';
$encrypted = base64_encode(mcrypt_encrypt(MCRYPT_RIJNDAEL_256, md5($key), strtoupper( trim( $em ) ), MCRYPT_MODE_CBC, md5(md5($key))));
$decrypted = rtrim(mcrypt_decrypt(MCRYPT_RIJNDAEL_256, md5($key), base64_decode($encrypted), MCRYPT_MODE_CBC, md5(md5($key))), "\0");
echo $encrypted."\n";
echo $decrypted."\n";
|
Read and write gzipped data without uncompressing |
---|
<?php
$filename='/home/ec2-user/CSV/stack.gz';
$zd = gzopen($filename, "r");
$fout = gzopen("stack$argv[1].gz", "w9");
while (!feof($zd)) {
$line = fgets($zd);
$bits = explode("\t", $line);
$var01=substr(preg_replace("/[^a-zA-Z0-9\-=+\!@#$%^&*()\[\]{};:,<.>\/?]/","",$bits[0]),0,min(36,strlen($bits[0])));
$var02=substr(preg_replace("/[^a-zA-Z0-9\-=+\!@#$%^&*()\[\]{};:,<.>\/?]/","",$bits[1]),0,min(8,strlen($bits[1])));
gzwrite($fout,$var01.'|'.$var02."\n";
}
gzclose($fout);
gzclose($zd);
?>
|
Back to Rick McFarland's Library