Resolving Relative URLs in PHP

Tags: , December 28, 2006 (0 comments)

This is deprecated, and has known bugs. See here for a replacement.

There are plenty of cases for needing to resolve relative URLs - RFC 3986 (Generic URI Syntax) has a whole section on how to go about it. SimplePie has code for this, written by me in it's entirety (although based on the pseudo-code in RFC 3986), used to deal with relative URLs in feeds (which happens to be possible pretty much everywhere). As I am the soul author of it, I've rearranged it slightly into a single function (in SimplePie it's in several methods within a larger class, as most of the methods are also called in other places), and re-licensed it under the 3 clause BSD license, LGPL, and zlib/libpng license (although of course if you redistribute it you must attach the appropriate notice as stated by one of the above licenses).

Without further ado, here's the code:

<?phpfunction absolutize_url($relative$base)
{
    
$relative trim($relative);
    
$base trim($base);
    if (!empty(
$relative))
    {
        
preg_match('/^(([^:\/?#]+):)?(\/\/([^\/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?$/i'$relative$match);
        for (
$i count($match); $i <= 9$i++)
        {
            if (!isset(
$match[$i]))
            {
                
$match[$i] = '';
            }
        }
        
$relative = array('scheme' => $match[2], 'authority' => $match[4], 'path' => $match[5], 'query' => $match[7], 'fragment' => $match[9]);
        if (!empty(
$relative['scheme']))
        {
            
$target $relative;
        }
        else if (!empty(
$base))
        {
            
preg_match('/^(([^:\/?#]+):)?(\/\/([^\/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?$/i'$base$match);
            for (
$i count($match); $i <= 9$i++)
            {
                if (!isset(
$match[$i]))
                {
                    
$match[$i] = '';
                }
            }
            
$base = array('scheme' => $match[2], 'authority' => $match[4], 'path' => $match[5], 'query' => $match[7], 'fragment' => $match[9]);
            
$target = array('scheme' => '''authority' => '''path' => '''query' => '''fragment' => '');
            if (!empty(
$relative['authority']))
            {
                
$target $relative;
                
$target['scheme'] = $base['scheme'];
            }
            else
            {
                
$target['scheme'] = $base['scheme'];
                
$target['authority'] = $base['authority'];
                if (!empty(
$relative['path']))
                {
                    if (
strpos($relative['path'], '/') === 0)
                    {
                        
$target['path'] = $relative['path'];
                    }
                    else
                    {
                        if (
$base['path'] == '/' || empty($base['path']))
                        {
                            
$target['path'] = '/' $relative['path'];
                        }
                        else
                        {
                            
$target['path'] = preg_replace('/^(.*)((\/)([^\/]*))?$/sU''\\1'$base['path']) . '/' $relative['path'];
                        }
                    }
                    if (!empty(
$relative['query']))
                    {
                        
$target['query'] = $relative['query'];
                    }
                    
$input $target['path'];
                    while (!empty(
$input))
                    {
                        
// A: If the input buffer begins with a prefix of "../" or "./", then remove that prefix from the input buffer; otherwise,
                        
if (strpos($input'../') === 0)
                        {
                            
$input substr($input3);
                        }
                        else if (
strpos($input'./') === 0)
                        {
                            
$input substr($input2);
                        }
                        
// B: if the input buffer begins with a prefix of "/./" or "/.", where "." is a complete path segment, then replace that prefix with "/" in the input buffer; otherwise,
                        
else if (strpos($input'/./') === 0)
                        {
                            
$input substr_replace($input'/'03);
                        }
                        else if (
$input == '/.')
                        {
                            
$input '/';
                        }
                        
// C: if the input buffer begins with a prefix of "/../" or "/..", where ".." is a complete path segment, then replace that prefix with "/" in the input buffer and remove the last segment and its preceding "/" (if any) from the output buffer; otherwise,
                        
else if (strpos($input'/../') === 0)
                        {
                            
$input substr_replace($input'/'04);
                            
$target['path'] = preg_replace('/(\/)?([^\/]+)$/U'''$target['path']);
                        }
                        else if (
$input == '/..')
                        {
                            
$input '/';
                            
$target['path'] = preg_replace('/(\/)?([^\/]+)$/U'''$target['path']);
                        }
                        
// D: if the input buffer consists only of "." or "..", then remove that from the input buffer; otherwise,
                        
else if ($input == '.' || $input == '..')
                        {
                            
$input '';
                        }
                        
// E: move the first path segment in the input buffer to the end of the output buffer, including the initial "/" character (if any) and any subsequent characters up to, but not including, the next "/" character or the end of the input buffer
                        
else
                        {
                            if (
preg_match('/^([^\/]+|(\/)[^\/]*)(\/|$)/'$input$match))
                            {
                                
$target['path'] .= $match[1];
                                
$input substr_replace($input''0strlen($match[1]));
                            }
                            else
                            {
                                
// We've ended up in a recursive loop, so do what we otherwise never will: return false.
                                
return false;
                            }
                        }
                    }
                }
                else
                {
                    if (!empty(
$base['path']))
                    {
                        
$target['path'] = $base['path'];
                    }
                    else
                    {
                        
$target['path'] = '/';
                    }
                    if (!empty(
$relative['query']))
                    {
                        
$target['query'] = $relative['query'];
                    }
                    else if (!empty(
$base['query']))
                    {
                        
$target['query'] = $base['query'];
                    }
                }
            }
            if (!empty(
$relative['fragment']))
            {
                
$target['fragment'] = $relative['fragment'];
            }
        }
        else
        {
            
// No base URL, just return the relative URL
            
$target $relative;
        }
        
$return '';
        if (!empty(
$target['scheme']))
        {
            
$return .= "$target[scheme]:";
        }
        if (!empty(
$target['authority']))
        {
            
$return .= "//$target[authority]";
        }
        if (!empty(
$target['path']))
        {
            
$return .= $target['path'];
        }
        if (!empty(
$target['query']))
        {
            
$return .= "?$target[query]";
        }
        if (!empty(
$target['fragment']))
        {
            
$return .= "#$target[fragment]";
        }
    }
    else
    {
        
$return $base;
    }
    return 
$return;
}
?>

Comments

There aren't any comments yet, but feel free to leave one regardless.

Leave a Reply