<?php

// ------------------------------------------------------------------------- //
// Contrôle la validité d'une URL et récupére le status                      //
// ------------------------------------------------------------------------- //
// Auteur: Pierre Crevoisier <pierre.crevoisier@epfl.ch>                     //
// Web:                                                                      //
// ------------------------------------------------------------------------- //

?>

<?php

// urlCode.inc.php
// ---------------
// Il est nécessaire de joindre le code suivant dans un fichier et le nommer
// "urlCodes.inc.php":

$codes = array();

$codes["200"] = "Client Request Successful: OK";
$codes["201"] = "Client Request Successful: Created";
$codes["202"] = "Client Request Successful: Accepted";
$codes["203"] = "Client Request Successful: Non-Authoritative Information";
$codes["204"] = "Client Request Successful: No Content";
$codes["205"] = "Client Request Successful: Reset Content";
$codes["206"] = "Client Request Successful: Partial Content";

$codes["300"] = "Redirection: Multiple Choice";
$codes["301"] = "Redirection: Moved Permanently";
$codes["302"] = "Redirection: Moved Temporarily";
$codes["303"] = "Redirection: See Other";
$codes["304"] = "Redirection: Not Modified";
$codes["305"] = "Redirection: Use Proxy";

$codes["400"] = "Client Request Incomplete: Bad Request";
$codes["401"] = "Client Request Incomplete: Unauthorized";
$codes["402"] = "Client Request Incomplete: Payment Required";
$codes["403"] = "Client Request Incomplete: Forbidden";
$codes["404"] = "Client Request Incomplete: Not Found";
$codes["405"] = "Client Request Incomplete: Method Not Allowed";
$codes["406"] = "Client Request Incomplete: Not Acceptable";
$codes["407"] = "Client Request Incomplete: Proxy Authentication Required";
$codes["408"] = "Client Request Incomplete: Request Time-Out";
$codes["409"] = "Client Request Incomplete: Conflict";
$codes["410"] = "Client Request Incomplete: Gone";
$codes["411"] = "Client Request Incomplete: Length Required";
$codes["412"] = "Client Request Incomplete: Precondition Failed";
$codes["413"] = "Client Request Incomplete: Request Entity Too Large";
$codes["414"] = "Client Request Incomplete: Request-URI Too Long";
$codes["415"] = "Client Request Incomplete: Unsupported Media Type";

$codes["500"] = "Server Errors: Internal Server Error";
$codes["501"] = "Server Errors: Not Implemented";
$codes["502"] = "Server Errors: Bad Gateway";
$codes["503"] = "Server Errors: Service Unavailable";
$codes["504"] = "Server Errors: Gateway Time-Out";
$codes["505"] = "Server Errors: HTTP Version not supported";

$codes["999"] = "Request timed out or server does not exist.";

?>

<?php

/*
# check_url.php
# -------------
# argument: $url
#
# annex: "urlCodes.inc.php"
# this file allows a nice display of the returned http status code
# for more informations, have a look at the W3C site
# http://www.w3.org/Protocols/rfc2616/rfc2616-sec6.html#sec6.1.1

# This function performs the url check by:
# 1) opening a socket
# 2) sending a GET request (HEAD didn't work with some servers)
# 3) collecting data
  
# usage:
   
        // call the function
        $http = link_check($url);
        
        // print the result
        echo "<TABLE border='1' cellpadding=3 cellspacing=0>\n";
        echo "<TR align='left' valign='top'><TD>\n";
        echo "Current check: <b>".$url."</b>";
        echo "</TD></TR>\n";
        echo "<TR align='left' valign='top'><TD>\n";
        while(list($k,$value)=each($http)) {
                if ($k=="code") echo "[".$k."]: ".$codes[$value]."<br>";
                else echo "[".$k."]: ".$value."<br>";
        }
        echo "</TD></TR>\n";
        echo "<TR>\n";
        echo "<TD align='left'>\n";
        echo "More informations:<br><a
href=\"http://www.w3.org/Protocols/rfc2616/rfc2616-sec6.html#sec6.1.1\"
TARGET='_blank'> <small>Hypertext Transfer Protocol -- HTTP/1.1</a><br>";
        echo "<a href=\"http://www.w3.org/\">World Wide Web Consortium
[W3C]</a></TD>\n";
        echo "</TR>\n";
        echo "</TABLE>\n";  
  
*/

require_once 'urlCodes.inc.php';

function
link_check($url)
{
    
// timeout_limit can be set manually just below to control how long
    // we want to wait for a header (software)  
    
    
set_time_limit(0);
    
$timeout_limit = 10000;
    
    
$parts = parse_url($url);
    
    
// port 80 is the WWW port
    // when there is no path the funtion returns an error (so we add a slash
    // to the end of adresses like http://www.hotbot.com )
    
if(!$parts[port]) $parts[port] = 80;
    if(!
$parts[path]) $parts[path] = "/";
    
    if(
$sockd = fsockopen($parts[host], 80, &$errno, &$errstr, 30))
    {
        
set_socket_blocking($sockd, 1);
        
fputs($sockd, "GET ".$parts[path]." HTTP/1.0\n");
        
fputs($sockd, "Host: ".$parts["host"]."\n\n");
        
        
$timeout = 0;
        while (!
feof ($sockd))
        {
            
$line = fgets($sockd, 128);
            
$file .= $line;
            if(
substr($line,0,4)=="HTTP") {
                    
$http["version"]        = substr($line,5,3);
                    
$http["code"]           = substr($line,9,3);
                    
$http["status"]         = trim(substr($line,13));
                    
// If we get a redirection code we don't close the socket
                    // and wait for a Location: header to come
                    
if ($http["code"] <300 || $http["code"]> 400)
                    
$timeout = $timeout_limit + 1;
            } elseif (
eregi("Content-Location",$line)) {
                    
// Catch the location Header and close the socket
                    
$http["location"] = substr($line,17);
                    
$timeout = $timeout_limit + 1;
            }
            
$timeout++;
        }
        
        
fclose($sockd);
        if (empty(
$http["location"])) $http["location"] = $url;
        if (!
is_array($http)) {

                
/* Handle the case where the check timed out or if
                 * the socket could not be opened
                 * 999 is arbitrary ...
                */
                
$http = array();
                
$http["code"] = 999;

        } else {

                
/* Handle the case where the server gives a line like:
                 * HTTP 200 Document follows
                 * i.e without a version number
                 * HTTP/1.1 200 OK
                */
                
if ($http["code"] == 0 && $http["version"] == 200)
                
$http["code"] == 200;
        }
        return
$http;
    }
}

?>