Overview
  • Namespace
  • Class

Namespaces

  • Hug
    • Http

Classes

  • Hug\Http\Http
  1 <?php
  2 
  3 namespace Hug\Http;
  4 
  5 use LayerShifter\TLDExtract\Extract;
  6 
  7 /**
  8  *
  9  */
 10 class Http
 11 {
 12     /**
 13      * Execute shell nslookup command
 14      *
 15      * This function is used to accelerate Domain Name Availability : if a domain name responds to nslookup command then it's not available for purchase !
 16      *
 17      * @param string $url
 18      *
 19      * @return string|null Url corresponding IP address or null
 20      *
 21      */
 22     public static function nslookup($url)
 23     {
 24         $ip = null;
 25         $ret = shell_exec('nslookup '.$url);
 26         $ret = explode("\n", $ret);
 27         $ret = array_reverse($ret);
 28         foreach ($ret as $r)
 29         {
 30             if(substr($ret[2], 0, 9)=='Address: ')
 31             {
 32                 $ip = substr($ret[2], 9); break;
 33             } 
 34         }
 35         return $ip;
 36     }
 37 
 38     /**
 39      * Check if an url is accessible (means not a 404)
 40      * 
 41      * @param string $url
 42      * @return bool is_url_accessible
 43      * 
 44      * @todo check speed against is_url_accessible2
 45      */
 46     public static function is_url_accessible($url)
 47     {
 48         $ch = curl_init($url);
 49         # SET NOBODY TO SPEED
 50         curl_setopt($ch, CURLOPT_NOBODY, true);
 51         curl_exec($ch);
 52         $retcode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
 53         # 400 means not found, 200 means found.
 54         curl_close($ch);
 55         if($retcode===200 || $retcode===301 || $retcode===302 || $retcode===307)
 56         {
 57             return true;
 58         }
 59         else
 60         {
 61             return false;
 62         }
 63     }
 64 
 65     /**
 66      * Returns HTTP code for given URL
 67      *
 68      * @param string $utl
 69      * @return int HTTP code
 70      */
 71     public static function get_http_code($url)
 72     {
 73         $ch = curl_init($url);
 74         # SET NOBODY TO SPEED
 75         curl_setopt($ch, CURLOPT_NOBODY, true);
 76         curl_exec($ch);
 77         $retcode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
 78         # 400 means not found, 200 means found.
 79         curl_close($ch);
 80         return $retcode;
 81     }
 82 
 83 
 84     /**
 85      * Cleans an url from its query parameters
 86      *
 87      * Example : 
 88      * input : http://www.monsite.fr/fr/coucou/index.php?id=3&num=50
 89      * output : http://www.monsite.fr/fr/coucou/index.php
 90      *
 91      * @param string $url
 92      * @return string $clean_url
 93      */
 94     public static function url_remove_query($url)
 95     {
 96         $url_pieces = parse_url($url);
 97         $clean_url = (isset($url_pieces['scheme']) ? $url_pieces['scheme'] : 'http') . '://' . (isset($url_pieces['host']) ? $url_pieces['host'] : '');
 98         if(isset($url_pieces['path']))
 99         {
100             $clean_url .= $url_pieces['path'];
101         }
102         return $clean_url;
103     }
104 
105     /**
106      * Cleans an url from its query parameters and path
107      *
108      * Example : 
109      * input : http://www.monsite.fr/fr/coucou/index.php?id=3&num=50
110      * output : http://www.monsite.fr
111      *
112      * @param string $url
113      * @return string $clean_url
114      */
115     public static function url_remove_query_and_path($url)
116     {
117         $url_pieces = parse_url($url);
118         //$clean_url = $url_pieces['scheme'] . '://' . $url_pieces['host'];
119         $clean_url = (isset($url_pieces['scheme']) ? $url_pieces['scheme'] : 'http') . '://' . (isset($url_pieces['host']) ? $url_pieces['host'] : '');
120         if($clean_url==='http://')
121         {
122             $clean_url = '';
123         }
124         return $clean_url;
125     }
126 
127     /**
128      * Quick and dirty function to save an image (or any binary data transfered by url) from the internet
129      *
130      * http://stackoverflow.com/questions/6476212/save-image-from-url-with-curl-php
131      *
132      * @param string $url
133      * @param string $save_to
134      * @return bool 
135      */
136     public static function grab_image($url, $save_to)
137     {
138         $grab = false;
139 
140         try
141         {
142             # Basic Curl Request to download image
143             $ch = curl_init ($url);
144             curl_setopt($ch, CURLOPT_HEADER, 0);
145             curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
146             curl_setopt($ch, CURLOPT_BINARYTRANSFER, 1);
147             # Set User-Agent because lots of servers deny request with empty UA
148             curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.13) Gecko/20080311 Firefox/2.0.0.13');
149             $raw = curl_exec($ch);
150             $retcode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
151             curl_close ($ch);
152 
153             // error_log('retcode : ' . $retcode);
154             // error_log('raw : ' . $raw);
155 
156             if($retcode===200)
157             {
158                 # Remove existing file
159                 if(file_exists($save_to))
160                 {
161                     unlink($save_to);
162                 }
163 
164                 # Save 
165                 $fp = fopen($save_to, 'x');
166                 fwrite($fp, $raw);
167                 fclose($fp);
168 
169                 $grab = true;
170             }
171             else
172             {
173                 // error_log('grab_image '. $url .' returns ' . $retcode);
174                 # Throw warning
175                 trigger_error('InvalidImageHttpCode', E_USER_NOTICE);
176             }
177         }
178         catch(Exception $e)
179         {
180             error_log($e->getMessage());
181         }
182         return $grab;
183     }
184 
185     /**
186      * Returns basic HTTP headers for a CURL request
187      *
188      * @param strnig $host (ex: www.google.fr)
189      * @return array $headers (Connection, Accept, Accept-Charset, Keep-Alive, Accept-Language, Host)
190      */
191     public static function get_default_headers($host)
192     {
193         $headers = [
194             'Connection' => 'keep-alive',
195             'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
196             'Accept-Charset' => 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
197             'Keep-Alive' => '115',
198             'Accept-Language' => 'fr;q=0.8,en-us;q=0.5,en;q=0.3',
199             'Host' => $host,
200         ];
201         
202         $compiledHeaders = [];
203         foreach($headers as $k=>$v)
204         {
205             $compiledHeaders[] = $k.': '.$v;
206         }
207 
208         return $compiledHeaders;
209     }
210 
211 
212     
213 
214     /**
215      * Extracts a TLD (Top Level Domain) from an URL
216      *
217      * @param string $url
218      * @return string $tld 
219      */
220     public static function extract_tld_from_url($url)
221     {
222         $tld = '';
223         
224         $components = tld_extract($url);
225         if($components->hostname!=='')
226         {
227             $tld .= $components->hostname . '.';
228         }
229         $tld .= $components->suffix;
230 
231         return $tld;
232     }
233 
234     /**
235      * In PHP : http://w-shadow.com/blog/2012/08/28/tldextract/
236      * In nodejs : https://github.com/oncletom/tld.js
237      *
238      * @param string $url
239      * @return string $domain
240      */
241     public static function extract_domain_from_url($url)
242     {
243         $domain = '';
244 
245         $components = tld_extract($url);
246         if($components->subdomain!=='')
247         {
248             $domain = $components->subdomain.'.';
249         }
250         if($components->hostname!=='')
251         {
252             $domain .= $components->hostname.'.';
253         }
254         $domain .= $components->suffix;
255 
256         return $domain;
257     }
258 
259     /**
260      * Extracts a sub-domain from an URL
261      *
262      * @param string $url
263      * @return string $tld 
264      */
265     public static function extract_subdomain_from_url($url)
266     {
267         $subdomain = '';
268 
269         $components = tld_extract($url);
270         if($components->subdomain!=='')
271         {
272             $subdomain = $components->subdomain;
273         }
274 
275         return $subdomain;
276     }
277 
278     /**
279      *
280      */
281     public static function extract_request_headers_body($html_with_headers)
282     {
283         # METHODE 1
284         //list($header, $body) = explode("\r\n\r\n", $html_with_headers, 2);
285         
286         # METHODE 2 : http://stackoverflow.com/questions/9183178/php-curl-retrieving-response-headers-and-body-in-a-single-request
287         /*$header_size = curl_getinfo($curl, CURLINFO_HEADER_SIZE);
288         error_log("header_size : ".$header_size);
289         $header = substr($response, 0, $header_size);
290         saveIntoFile("header1.txt", "/home/backrub.fr/public_html/php", "", $header);
291         $body = substr($html_with_headers, $header_size);*/
292 
293         # METHODE 3 : http://stackoverflow.com/questions/11359276/php-curl-exec-returns-both-http-1-1-100-continue-and-http-1-1-200-ok-separated-b
294         // $header = array();
295         // $body = array();
296         // foreach(explode("\r\n\r\n", $html_with_headers) as $frag)
297         // {
298         //     if(preg_match('/^HTTP\/[0-9\.]+ [0-9]+/', $frag))
299         //     {
300         //        $header[] = $frag;
301         //     }
302         //     else
303         //     {
304         //         $body[] = $frag;
305         //     }
306         // }
307         // $header = implode("\r\n", $header);
308         // $body = implode($body);
309         // $Response = array("HEADER" => http_parse_headers($header), "BODY" => $body);
310         // return $Response;
311 
312         # METHODE 4 : http://stackoverflow.com/questions/9183178/php-curl-retrieving-response-headers-and-body-in-a-single-request
313         $parts = explode("\r\n\r\nHTTP/", $html_with_headers);
314         if(count($parts) > 1)
315         {
316             $first_headers = $parts[0];
317             $last_parts = array_pop($parts);
318             $parts = implode("\r\n\r\n", [$first_headers, $last_parts]);
319         }
320         else
321         {
322             $parts = $parts[0];
323         }
324         
325         list($headers, $body) = explode("\r\n\r\n", $parts, 2);
326         $Response = array("HEADER" => http_parse_headers($headers), "BODY" => $body);
327         return $Response;
328     }
329 
330 
331     /**
332      * Sets a php script desired status code (usefull for API)
333      * 
334      * @link http://stackoverflow.com/questions/4162223/how-to-send-500-internal-server-error-error-from-a-php-script
335      *
336      * @param int $status_code
337      * 
338      * @return bool $response Has header status been set or not
339      */
340     public static function header_status($statusCode)
341     {
342         static $status_codes = null;
343 
344         if ($status_codes === null)
345         {
346             $status_codes = [
347                 100 => 'Continue',
348                 101 => 'Switching Protocols',
349                 102 => 'Processing',
350                 200 => 'OK',
351                 201 => 'Created',
352                 202 => 'Accepted',
353                 203 => 'Non-Authoritative Information',
354                 204 => 'No Content',
355                 205 => 'Reset Content',
356                 206 => 'Partial Content',
357                 207 => 'Multi-Status',
358                 300 => 'Multiple Choices',
359                 301 => 'Moved Permanently',
360                 302 => 'Found',
361                 303 => 'See Other',
362                 304 => 'Not Modified',
363                 305 => 'Use Proxy',
364                 307 => 'Temporary Redirect',
365                 400 => 'Bad Request',
366                 401 => 'Unauthorized',
367                 402 => 'Payment Required',
368                 403 => 'Forbidden',
369                 404 => 'Not Found',
370                 405 => 'Method Not Allowed',
371                 406 => 'Not Acceptable',
372                 407 => 'Proxy Authentication Required',
373                 408 => 'Request Timeout',
374                 409 => 'Conflict',
375                 410 => 'Gone',
376                 411 => 'Length Required',
377                 412 => 'Precondition Failed',
378                 413 => 'Request Entity Too Large',
379                 414 => 'Request-URI Too Long',
380                 415 => 'Unsupported Media Type',
381                 416 => 'Requested Range Not Satisfiable',
382                 417 => 'Expectation Failed',
383                 422 => 'Unprocessable Entity',
384                 423 => 'Locked',
385                 424 => 'Failed Dependency',
386                 426 => 'Upgrade Required',
387                 500 => 'Internal Server Error',
388                 501 => 'Not Implemented',
389                 502 => 'Bad Gateway',
390                 503 => 'Service Unavailable',
391                 504 => 'Gateway Timeout',
392                 505 => 'HTTP Version Not Supported',
393                 506 => 'Variant Also Negotiates',
394                 507 => 'Insufficient Storage',
395                 509 => 'Bandwidth Limit Exceeded',
396                 510 => 'Not Extended'
397             ];
398         }
399 
400         if(isset($_SERVER['SERVER_PROTOCOL']))
401         {
402             if ($status_codes[$statusCode] !== null)
403             {
404                 $status_string = $statusCode . ' ' . $status_codes[$statusCode];
405                 header($_SERVER['SERVER_PROTOCOL'] . ' ' . $status_string, true, $statusCode);
406                 return true;
407             }
408             else
409             {
410                 # Throw warning
411                 trigger_error('StatusCodeNotFound', E_USER_WARNING);
412             }
413         }
414         else
415         {
416             # Throw warning
417             trigger_error('ServerProtocolNotFound', E_USER_WARNING);
418         }
419         return false;
420     }
421 
422 
423     /**
424      * Gets the address that the provided URL redirects to,
425      * or false if there's no redirect. 
426      *
427      * @param string $url
428      * @return string
429      *
430      * @link http://stackoverflow.com/questions/3799134/how-to-get-final-url-after-following-http-redirections-in-pure-php
431      */
432     public static function get_redirect_url($url)
433     {
434         $redirect_url = null; 
435 
436         $url_parts = @parse_url($url);
437         if (!$url_parts) return false;
438         if (!isset($url_parts['host'])) return false; //can't process relative URLs
439         if (!isset($url_parts['path'])) $url_parts['path'] = '/';
440 
441         $sock = fsockopen($url_parts['host'], (isset($url_parts['port']) ? (int)$url_parts['port'] : 80), $errno, $errstr, 30);
442         if (!$sock) return false;
443 
444         $request = "HEAD " . $url_parts['path'] . (isset($url_parts['query']) ? '?'.$url_parts['query'] : '') . " HTTP/1.1\r\n"; 
445         $request .= 'Host: ' . $url_parts['host'] . "\r\n"; 
446         $request .= "User-Agent: Mozilla/5.0 (Linux; U; Android 4.0.3; ko-kr; LG-L160L Build/IML74K) AppleWebkit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30\r\n";
447         $request .= "Connection: Close\r\n\r\n"; 
448         fwrite($sock, $request);
449         $response = '';
450 
451         while(!feof($sock)) $response .= fread($sock, 8192);
452         fclose($sock);
453 
454         if (preg_match('/^Location: (.+?)$/m', $response, $matches))
455         {
456             if ( substr($matches[1], 0, 1) == "/" )
457             {
458                 return $url_parts['scheme'] . "://" . $url_parts['host'] . trim($matches[1]);
459             }
460             else
461             {
462                 return trim($matches[1]);
463             }
464 
465         }
466         else
467         {
468             return false;
469         }
470 
471     }
472 
473     /**
474      * get_all_redirects()
475      * Follows and collects all redirects, in order, for the given URL. 
476      *
477      * @param string $url
478      * @return array
479      */
480     public static function get_all_redirects($url)
481     {
482         $redirects = [];
483         while ($newurl = Http::get_redirect_url($url))
484         {
485             if (in_array($newurl, $redirects))
486             {
487                 break;
488             }
489             $redirects[] = $newurl;
490             $url = $newurl;
491         }
492         return $redirects;
493     }
494 
495     /**
496      * get_final_url()
497      * Gets the address that the URL ultimately leads to. 
498      * Returns $url itself if it isn't a redirect.
499      *
500      * @param string $url
501      * @return string
502      */
503     public static function get_final_url($url)
504     {
505         $redirects = Http::get_all_redirects($url);
506         if (count($redirects)>0)
507         {
508             return array_pop($redirects);
509         }
510         else
511         {
512             return $url;
513         }
514     }
515 
516 }
517 
518 
API documentation generated by ApiGen