Strong Server-Side Parameter Validation

Most web applications require the end-user to submit form data to the server, whether through a tradition web form or via AJAX. Anytime a server receives a request with parameters, the server-side code should assume that the request is malicious. At the very least a developer should ensure that SQL and/or JavaScript code is not injected. However, a developer should know what the acceptable values are for a parameter and be able to validate against those acceptable values. Today we will look at validating parameters using regular expressions to restrict the allowed characters.

Example 1: RequestParameterParser Class

class RequestParameterParser {

public function __construct($data) {
	$this->_data = $data;
}

////
// Validation Methods
////

// convert the parameter to a Boolean
private function _convertToBoolean($value) {
	if (null != $value) {$value = strtolower($value);}
	return $value ? ("t" === $value || "true" === $value || "on" === $value) : null;
}
public function getBoolean($parameterName, $allowNull=true) {
	$value = $this->_validateParameter($parameterName, $this->BOOLEAN, $allowNull);
	return $this->_convertToBoolean($value);
}
public function getBooleanOrFalse($parameterName) {
	$value = $this->getBoolean($parameterName, true);
	return null != $value && $value;
}

// convert the parameter to a Date
function _convertToDate($value) {
	return $value ? strtotime($value) : null;
}
function getDate($parameterName, $allowNull=true) {
	$value = $this->_validateParameter($parameterName, $this->DATE, $allowNull);
	return $this->_convertToDate($value);
}

// convert the parameter to a Double
function _convertToDouble($value) {return $value ? 0 + $value : $value;}
function getDouble($parameterName, $allowNull=true) {
	$value = $this->_validateParameter($parameterName, $this->NUMERIC, $allowNull);
	return $this->_convertToDouble($value);
}

// convert the parameter to a Integer
function _convertToInteger($value) {return $value ? 0 + $value : $value;}
function getInteger($parameterName, $allowNull=true) {
	$value = $this->_validateParameter($parameterName, $this->NUM, $allowNull);
	return $this->_convertToInteger($value);
}

// convert the parameter to a String containing only alpha characters
function getStringAlpha($parameterName, $allowNull=true) {
	return $this->_validateParameter($parameterName, $this->ALPHA, $allowNull);
}

// convert the parameter to an email String
function getStringEmail($parameterName, $allowNull=true) {
	return $this->_validateParameter($parameterName, $this->EMAIL, $allowNull);
}

// convert the parameter to a String containing only numbers
function getStringNum($parameterName, $allowNull=true) {
	return $this->_validateParameter($parameterName, $this->NUM, $allowNull);
}

// convert the parameter to a String containing numbers plus sign and decimal
function getStringNumeric($parameterName, $allowNull=true) {
	return $this->_validateParameter($parameterName, $this->NUMERIC, $allowNull);
}

// convert the parameter to a String containing characters matching the provided regular expression
function getStringOther($parameterName, $rx, $allowNull=true) {
	return $this->_validateParameter($parameterName, $rx, $allowNull);
}

// convert the parameter to a String containing any valid character we currently support
function getStringUnrestricted($parameterName, $allowNull=true, $allowHTML=false, $isPassword=false) {
	$parameterValue = $this->_validateParameter($parameterName, $this->ALL, $allowNull);
	$m = 0;

	// check for HTML injection
	if (! $allowHTML && $parameterValue) {
		$m = preg_match($this->HTML, $parameterValue);
	}

	if ($m) {
		throw new Exception("The parameterName=\"" + parameterName + "\" contains HTML, parameterValue=\"" + ($isPassword ? "PASSWORD_NOT_SHOWN" : $parameterValue) + "\".");
	}

	return $parameterValue;
}

function getStringPassword($parameterName, $allowNull=true) {
	return $this->getStringUnrestricted(parameterName, allowNull, false, true);
}

// convert the parameter to a String containing only word characters (no spaces either)
function getStringWord($parameterName, $allowNull=true) {
	return $this->_validateParameter($parameterName, $this->WORD, $allowNull);
}

// convert the parameter to a String containing only word, space, and comma characters
function getStringWords($parameterName, $allowNull=true) {
	return trim($this->_validateParameter($parameterName, $this->WORDS, $allowNull));
}

// convert the parameter to a zipcode String
function getStringZipcode($parameterName, $allowNull=true) {
	return $this->_validateParameter($parameterName, $this->ZIPCODE, $allowNull);
}

/**
 * Validate the parameter or throw an exception.
 * @param $parameterName {String} Required. The name of the parameter.
 * @param $regex {Pattern} Required. The regex to use for validation.
 * @param $allowNull {boolean} Required. Allow the parameter to be NULL.
 * @param $isPassword {boolean} Required. This is a password, dont log.
 * @return {String} The validated parameter value.
 * @throws Exception
 */
private function _validateParameter($parameterName, $regex, $allowNull, $isPassword=false) {
	$parameterValue = array_key_exists($parameterName, $this->_data) ? $this->_data[$parameterName] : null;

	if (! $parameterValue) {
		if ($allowNull=true) {
			return $parameterValue;
		}
		else {
			// this happens when a form does not a contain a required parameter
			throw new Exception("The parameterName=\"$parameterName\" is NULL and should not be.");
		}
	}

	if (! preg_match($regex, $parameterValue)) {
		$parameterValue = preg_replace("/[\w\s@#%&\$!\^\*\(\)<>,\[\]\{\}\\?\/\.\-=:\+|;`\"~]+/", "", $parameterValue);
		if ($isPassword) {$parameterValue="PASSWORD_NOT_SHOWN";}
		throw new Exception("The parameterName=\"$parameterName\" is invalid for parameterValue=\"$parameterValue\" and regex=\"$regex\".");
	}

	return $parameterValue;
}

// Validation Regular Expressions

private $ALL = "/^[\w\s@#%&\$!\^\*\(\)<>,\[\]\{\}\\\?\/\.\-=:\+|;`\"~]+$/";
private $ALPHA = "/^[a-z]+/i" ;
private $BOOLEAN = "/^(t|f|true|false|on|off)$/i" ;
private $DATE = "/^\d{2}(\/|\.|-)\d{2}(\/|\.|-)\d{4}$/";
private $EMAIL = "/^\w(\+?\.?-?\w)*\-?@\w(\.?[\-\w])*\.[a-z]{2,4}$/";
private $HTML = "/<\/?\s*[a-z][^>]+>/i";
private $NUM = "/^[\-\+]?[\d]+$/";
private $NUMERIC = "/^\-?[\d]+\.?[\d]*$/";
private $WORD = "/^[\w]+$/";
private $WORDS = "/^[\w,\|\s]+$/";
private $ZIPCODE = "/^(\d{5}|\d{5}\-\d{4})$/";

private $_data = null;

}

Example 1 is written in PHP, because PHP is easy to write and understand. This pattern can be ported to other languages; I have also written something similar in JAVA using the HttpServletRequest object.

There are a bunch of public helper methods, each corresponding to one of the common validation regular expressions at the bottom of the class. These methods call validateParameter where the actual validation work occurs. validateParameter first attempts to find the parameter in the request (this->_data). If a value is not found, the method will throw an Exception, unless allowNull value is true. By default allowNull is true and null values are OK. Next the value is compared against the provided regular expression and returned, or an exception is thrown if the parameter did not match. There is some special logic around passwords, which masks the value, because (for liability/security reasons) we do not want to see passwords in our logs.

To use this class, simply instantiate RequestParameterParser, passing it either $_GET or $_POST, depending on the type of request. Then call the appropriate method on your newly instantiated parameter parser.

Example 2: Using RequestParameterParser

$rpp = new RequestParameterParser($_POST);
try {
	$task = $rpp->getStringWord(task);
	$message = $rpp->getStringUnrestricted(message);
	$id = $rpp->getStringInteger(id);
}
catch (Exception $e) {
	echo Message: &rsquot; .$e->getMessage();
}

In Example 2 we are parsing the $_POST parameters and looking for values: task as an alpha-numeric sequence, message as any character on a standard keyboard, and id as an integer. If a parameter does not validate, then an exception is thrown and should be handled by developers; ideally a message will be returned to the end-user.

Most of these validation should be self explanatory, but lets look at a few that might be a little more difficult to understand. There is a getBoolean method and a getBooleanOrFalse method. The difference is getBoolean can return null for situations where null matters and getBooleanOrFalse always returns a boolean value. The getData requires dates to be formatted like "##/##/####", "##.##.####", or "##-##-####". The getStringUnrestricted method has extra options allowHTML and isPassword. The allowHTML is false by default and when false it will throw an exception if HTML is found in the value. The isPassword should be set to true if the parameter is a password, so exception logging does not log the password. There is a getStringOther that allow the developer to specify an arbitrary regular expression to validate by. And lastly, the getWords method is used to evaluate a list of comma, space, or pipe separated words.

I hope you use this or a similar parameter parser in your applications. Using strong parameter validation will make your web applications more robust and secure.

If you port this into another language, please leave a comment below linking to your code.