SlideShare a Scribd company logo
@asgrim
Climbing the
Abstract Syntax Tree
James Titcumb
International PHP Conference 2017
$ whoami
James Titcumb
www.jamestitcumb.com
www.roave.com
@asgrim
@asgrim
How PHP works
PHP code
OpCache
Execute (VM)
Lexer + Parser
Compiler
@asgrim
The PHP Lexer
zend_language_scanner.l
@asgrim
zend_language_scanner.l
<ST_IN_SCRIPTING>"exit" {
RETURN_TOKEN(T_EXIT);
}
<ST_IN_SCRIPTING>"die" {
RETURN_TOKEN(T_EXIT);
}
<ST_IN_SCRIPTING>"function" {
RETURN_TOKEN(T_FUNCTION);
}
@asgrim
zend_language_scanner.l
<ST_IN_SCRIPTING>"exit" {
RETURN_TOKEN(T_EXIT);
}
<ST_IN_SCRIPTING>"die" {
RETURN_TOKEN(T_EXIT);
}
<ST_IN_SCRIPTING>"function" {
RETURN_TOKEN(T_FUNCTION);
}
@asgrim
zend_language_scanner.l
<ST_IN_SCRIPTING>"exit" {
RETURN_TOKEN(T_EXIT);
}
<ST_IN_SCRIPTING>"die" {
RETURN_TOKEN(T_EXIT);
}
<ST_IN_SCRIPTING>"function" {
RETURN_TOKEN(T_FUNCTION);
}
@asgrim
zend_language_scanner.l
<ST_IN_SCRIPTING>"exit" {
RETURN_TOKEN(T_EXIT);
}
<ST_IN_SCRIPTING>"die" {
RETURN_TOKEN(T_EXIT);
}
<ST_IN_SCRIPTING>"function" {
RETURN_TOKEN(T_FUNCTION);
}
@asgrim
zend_language_scanner.l
<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" {
yy_push_state(ST_LOOKING_FOR_VARNAME);
RETURN_TOKEN(T_DOLLAR_OPEN_CURLY_BRACES);
}
<ST_LOOKING_FOR_VARNAME>{LABEL}[[}] {
yyless(yyleng - 1);
zend_copy_value(zendlval, yytext, yyleng);
yy_pop_state();
yy_push_state(ST_IN_SCRIPTING);
RETURN_TOKEN(T_STRING_VARNAME);
}
@asgrim
zend_language_scanner.l
<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" {
yy_push_state(ST_LOOKING_FOR_VARNAME);
RETURN_TOKEN(T_DOLLAR_OPEN_CURLY_BRACES);
}
<ST_LOOKING_FOR_VARNAME>{LABEL}[[}] {
yyless(yyleng - 1);
zend_copy_value(zendlval, yytext, yyleng);
yy_pop_state();
yy_push_state(ST_IN_SCRIPTING);
RETURN_TOKEN(T_STRING_VARNAME);
}
@asgrim
zend_language_scanner.l
<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" {
yy_push_state(ST_LOOKING_FOR_VARNAME);
RETURN_TOKEN(T_DOLLAR_OPEN_CURLY_BRACES);
}
<ST_LOOKING_FOR_VARNAME>{LABEL}[[}] {
yyless(yyleng - 1);
zend_copy_value(zendlval, yytext, yyleng);
yy_pop_state();
yy_push_state(ST_IN_SCRIPTING);
RETURN_TOKEN(T_STRING_VARNAME);
}
@asgrim
zend_language_scanner.l
<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" {
yy_push_state(ST_LOOKING_FOR_VARNAME);
RETURN_TOKEN(T_DOLLAR_OPEN_CURLY_BRACES);
}
<ST_LOOKING_FOR_VARNAME>{LABEL}[[}] {
yyless(yyleng - 1);
zend_copy_value(zendlval, yytext, yyleng);
yy_pop_state();
yy_push_state(ST_IN_SCRIPTING);
RETURN_TOKEN(T_STRING_VARNAME);
}
@asgrim
zend_language_scanner.l
<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" {
yy_push_state(ST_LOOKING_FOR_VARNAME);
RETURN_TOKEN(T_DOLLAR_OPEN_CURLY_BRACES);
}
<ST_LOOKING_FOR_VARNAME>{LABEL}[[}] {
yyless(yyleng - 1);
zend_copy_value(zendlval, yytext, yyleng);
yy_pop_state();
yy_push_state(ST_IN_SCRIPTING);
RETURN_TOKEN(T_STRING_VARNAME);
}
@asgrim
zend_language_scanner.l
<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" {
yy_push_state(ST_LOOKING_FOR_VARNAME);
RETURN_TOKEN(T_DOLLAR_OPEN_CURLY_BRACES);
}
<ST_LOOKING_FOR_VARNAME>{LABEL}[[}] {
yyless(yyleng - 1);
zend_copy_value(zendlval, yytext, yyleng);
yy_pop_state();
yy_push_state(ST_IN_SCRIPTING);
RETURN_TOKEN(T_STRING_VARNAME);
}
@asgrim
zend_language_scanner.l
<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" {
yy_push_state(ST_LOOKING_FOR_VARNAME);
RETURN_TOKEN(T_DOLLAR_OPEN_CURLY_BRACES);
}
<ST_LOOKING_FOR_VARNAME>{LABEL}[[}] {
yyless(yyleng - 1);
zend_copy_value(zendlval, yytext, yyleng);
yy_pop_state();
yy_push_state(ST_IN_SCRIPTING);
RETURN_TOKEN(T_STRING_VARNAME);
}
@asgrim
zend_language_scanner.l
<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" {
yy_push_state(ST_LOOKING_FOR_VARNAME);
RETURN_TOKEN(T_DOLLAR_OPEN_CURLY_BRACES);
}
<ST_LOOKING_FOR_VARNAME>{LABEL}[[}] {
yyless(yyleng - 1);
zend_copy_value(zendlval, yytext, yyleng);
yy_pop_state();
yy_push_state(ST_IN_SCRIPTING);
RETURN_TOKEN(T_STRING_VARNAME);
}
@asgrim
zend_language_scanner.l
<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" {
yy_push_state(ST_LOOKING_FOR_VARNAME);
RETURN_TOKEN(T_DOLLAR_OPEN_CURLY_BRACES);
}
<ST_LOOKING_FOR_VARNAME>{LABEL}[[}] {
yyless(yyleng - 1);
zend_copy_value(zendlval, yytext, yyleng);
yy_pop_state();
yy_push_state(ST_IN_SCRIPTING);
RETURN_TOKEN(T_STRING_VARNAME);
}
@asgrim
The PHP Lexer
zend_language_scanner.l
@asgrim
The PHP Lexer
zend_language_scanner.l
re2c
@asgrim
The PHP Lexer
zend_language_scanner.l
re2c
zend_language_scanner.c
@asgrim
The PHP Parser
zend_language_parser.y
@asgrim
zend_language_parser.y
if_stmt:
if_stmt_without_else %prec T_NOELSE { $$ = $1; }
| if_stmt_without_else T_ELSE statement
{ $$ = zend_ast_list_add($1, zend_ast_create(ZEND_AST_IF_ELEM, NULL, $3)); }
;
if_stmt_without_else:
T_IF '(' expr ')' statement
{ $$ = zend_ast_create_list(1, ZEND_AST_IF,
zend_ast_create(ZEND_AST_IF_ELEM, $3, $5)); }
| if_stmt_without_else T_ELSEIF '(' expr ')' statement
{ $$ = zend_ast_list_add($1,
zend_ast_create(ZEND_AST_IF_ELEM, $4, $6)); }
;
@asgrim
if_stmt:
if_stmt_without_else %prec T_NOELSE { $$ = $1; }
| if_stmt_without_else T_ELSE statement
{ $$ = zend_ast_list_add($1, zend_ast_create(ZEND_AST_IF_ELEM, NULL, $3)); }
;
if_stmt_without_else:
T_IF '(' expr ')' statement
{ $$ = zend_ast_create_list(1, ZEND_AST_IF,
zend_ast_create(ZEND_AST_IF_ELEM, $3, $5)); }
| if_stmt_without_else T_ELSEIF '(' expr ')' statement
{ $$ = zend_ast_list_add($1,
zend_ast_create(ZEND_AST_IF_ELEM, $4, $6)); }
;
zend_language_parser.y
@asgrim
if_stmt:
if_stmt_without_else %prec T_NOELSE { $$ = $1; }
| if_stmt_without_else T_ELSE statement
{ $$ = zend_ast_list_add($1, zend_ast_create(ZEND_AST_IF_ELEM, NULL, $3)); }
;
if_stmt_without_else:
T_IF '(' expr ')' statement
{ $$ = zend_ast_create_list(1, ZEND_AST_IF,
zend_ast_create(ZEND_AST_IF_ELEM, $3, $5)); }
| if_stmt_without_else T_ELSEIF '(' expr ')' statement
{ $$ = zend_ast_list_add($1,
zend_ast_create(ZEND_AST_IF_ELEM, $4, $6)); }
;
zend_language_parser.y
@asgrim
if_stmt:
if_stmt_without_else %prec T_NOELSE { $$ = $1; }
| if_stmt_without_else T_ELSE statement
{ $$ = zend_ast_list_add($1, zend_ast_create(ZEND_AST_IF_ELEM, NULL, $3)); }
;
if_stmt_without_else:
T_IF '(' expr ')' statement
{ $$ = zend_ast_create_list(1, ZEND_AST_IF,
zend_ast_create(ZEND_AST_IF_ELEM, $3, $5)); }
| if_stmt_without_else T_ELSEIF '(' expr ')' statement
{ $$ = zend_ast_list_add($1,
zend_ast_create(ZEND_AST_IF_ELEM, $4, $6)); }
;
zend_language_parser.y
@asgrim
if_stmt:
if_stmt_without_else %prec T_NOELSE { $$ = $1; }
| if_stmt_without_else T_ELSE statement
{ $$ = zend_ast_list_add($1, zend_ast_create(ZEND_AST_IF_ELEM, NULL, $3)); }
;
if_stmt_without_else:
T_IF '(' expr ')' statement
{ $$ = zend_ast_create_list(1, ZEND_AST_IF,
zend_ast_create(ZEND_AST_IF_ELEM, $3, $5)); }
| if_stmt_without_else T_ELSEIF '(' expr ')' statement
{ $$ = zend_ast_list_add($1,
zend_ast_create(ZEND_AST_IF_ELEM, $4, $6)); }
;
zend_language_parser.y
@asgrim
if_stmt:
if_stmt_without_else %prec T_NOELSE { $$ = $1; }
| if_stmt_without_else T_ELSE statement
{ $$ = zend_ast_list_add($1, zend_ast_create(ZEND_AST_IF_ELEM, NULL, $3)); }
;
if_stmt_without_else:
T_IF '(' expr ')' statement
{ $$ = zend_ast_create_list(1, ZEND_AST_IF,
zend_ast_create(ZEND_AST_IF_ELEM, $3, $5)); }
| if_stmt_without_else T_ELSEIF '(' expr ')' statement
{ $$ = zend_ast_list_add($1,
zend_ast_create(ZEND_AST_IF_ELEM, $4, $6)); }
;
zend_language_parser.y
@asgrim
if_stmt:
if_stmt_without_else %prec T_NOELSE { $$ = $1; }
| if_stmt_without_else T_ELSE statement
{ $$ = zend_ast_list_add($1, zend_ast_create(ZEND_AST_IF_ELEM, NULL, $3)); }
;
if_stmt_without_else:
T_IF '(' expr ')' statement
{ $$ = zend_ast_create_list(1, ZEND_AST_IF,
zend_ast_create(ZEND_AST_IF_ELEM, $3, $5)); }
| if_stmt_without_else T_ELSEIF '(' expr ')' statement
{ $$ = zend_ast_list_add($1,
zend_ast_create(ZEND_AST_IF_ELEM, $4, $6)); }
;
zend_language_parser.y
@asgrim
if_stmt:
if_stmt_without_else %prec T_NOELSE { $$ = $1; }
| if_stmt_without_else T_ELSE statement
{ $$ = zend_ast_list_add($1, zend_ast_create(ZEND_AST_IF_ELEM, NULL, $3)); }
;
if_stmt_without_else:
T_IF '(' expr ')' statement
{ $$ = zend_ast_create_list(1, ZEND_AST_IF,
zend_ast_create(ZEND_AST_IF_ELEM, $3, $5)); }
| if_stmt_without_else T_ELSEIF '(' expr ')' statement
{ $$ = zend_ast_list_add($1,
zend_ast_create(ZEND_AST_IF_ELEM, $4, $6)); }
;
zend_language_parser.y
@asgrim
if ($a == 1)
{
a();
}
else if ($b == 1)
{
b();
}
else
{
c();
}
Using the rules to parse
@asgrim
if ($a == 1)
{
a();
}
else if ($b == 1)
{
b();
}
else
{
c();
}
Using the rules to parse
if_stmt_without_else (A)
@asgrim
if ($a == 1)
{
a();
}
else if ($b == 1)
{
b();
}
else
{
c();
}
Using the rules to parse
if_stmt_without_else (A)
if_stmt_without_else (B)
@asgrim
if ($a == 1)
{
a();
}
else if ($b == 1)
{
b();
}
else
{
c();
}
Using the rules to parse
if_stmt_without_else (A)
if_stmt_without_else (B)
if_stmt
@asgrim
Zend_language_parser.y (PHP 7.0.10)
if_stmt:
if_stmt_without_else %prec T_NOELSE { $$ = $1; }
| if_stmt_without_else T_ELSE statement
{ $$ = zend_ast_list_add($1, zend_ast_create(ZEND_AST_IF_ELEM, NULL, $3)); }
;
if_stmt_without_else:
T_IF '(' expr ')' statement
{ $$ = zend_ast_create_list(1, ZEND_AST_IF,
zend_ast_create(ZEND_AST_IF_ELEM, $3, $5)); }
| if_stmt_without_else T_ELSEIF '(' expr ')' statement
{ $$ = zend_ast_list_add($1,
zend_ast_create(ZEND_AST_IF_ELEM, $4, $6)); }
;
@asgrim
zend_language_parser.y (PHP 5.6.26)
T_IF parenthesis_expr { zend_do_if_cond(&$2, &$1 TSRMLS_CC); }
statement { zend_do_if_after_statement(&$1, 1 TSRMLS_CC); }
void zend_do_if_cond(const znode *cond, znode *closing_bracket_token TSRMLS_DC)
{
int if_cond_op_number = get_next_op_number(CG(active_op_array));
zend_op *opline = get_next_op(CG(active_op_array) TSRMLS_CC);
opline->opcode = ZEND_JMPZ;
SET_NODE(opline->op1, cond);
closing_bracket_token->u.op.opline_num = if_cond_op_number;
SET_UNUSED(opline->op2);
INC_BPC(CG(active_op_array));
}
@asgrim
AST is new in PHP 7+
@asgrim
How PHP works
PHP code
OpCache
Execute (VM)
Lexer + Parser
Compiler
@asgrim
Let’s simplify!
@asgrim
First… WTF is AST?
@asgrim
AST is just a data structure
@asgrim
PHP code
<?php
echo "Hello world";
@asgrim
An AST representation
Echo statement
`-- String, value "Hello world"
@asgrim
PHP code
<?php
echo "Hello " . "world";
@asgrim
An AST representation
Echo statement
`-- Concat
|-- Left
| `-- String, value "Hello "
`-- Right
`-- String, value "world"
@asgrim
PHP code
<?php
$a = 5;
$b = 3;
echo $a + ($b * 2);
@asgrim
An AST representation
Assign statement
|-- Variable $a
`-- Integer, value 5
Assign statement
|-- Variable $b
`-- Integer, value 3
Echo statement
`-- Add operation
|-- Left
| `-- Variable $a
`-- Right
`-- Multiply operation
|-- Left
| `-- Variable $b
`-- Right
`-- Integer, value 2
@asgrim
Why?
@asgrim
Faster!*
@asgrim
AST compilation
Statements
EchoAssign
Scalar
value: (int)5
Variable
name: $a
Assign
Scalar
value: (int)3
Variable
name: $b
Add op
Right operandLeft operand
Variable
name: $a
Multiply op
Right operandLeft operand
Variable
name: $b
Scalar
value: (int)2
@asgrim
AST compilation: pre-order traversal
Statements
EchoAssign
Scalar
value: (int)5
Variable
name: $a
Assign
Scalar
value: (int)3
Variable
name: $b
Add op
Right operandLeft operand
Variable
name: $a
Multiply op
Right operandLeft operand
Variable
name: $b
Scalar
value: (int)2
@asgrim
Pre-order traversal: Polish notation
Assign(Variable $a, Scalar 5)
Assign(Variable $b, Scalar 3)
Echo (
Add(
Variable $a,
Multiply( $b, 2 )
)
)
@asgrim
Order of precedence
1 + 2 * 3
= 1 + (2 * 3) = 7?
= (1 + 2) * 3 = 9?
@asgrim
Order of precedence
1 + 2 * 3
= 1 + (2 * 3) = 7?
= (1 + 2) * 3 = 9?
+ 1 * 2 3
@asgrim
Order of precedence
1 + 2 * 3
= 1 + (2 * 3) = 7?
= (1 + 2) * 3 = 9?
+ 1 * 2 3
Operator Left operand
Right operand
@asgrim
Order of precedence
1 + 2 * 3
= 1 + (2 * 3) = 7?
= (1 + 2) * 3 = 9?
+ 1 * 2 3
Operator Left operand Right operand
Operator Left operand
Right operand
@asgrim
Reverse Polish Notation
1 2 3 * +
@asgrim
Reverse Polish Notation
1 2 3 * + The stack
@asgrim
Reverse Polish Notation
1 2 3 * + The stack
1
@asgrim
Reverse Polish Notation
1 2 3 * + The stack
1
2
@asgrim
Reverse Polish Notation
1 2 3 * + The stack
1
2
3
@asgrim
Reverse Polish Notation
1 2 3 * + The stack
1
2
3
@asgrim
Reverse Polish Notation
1 2 3 * + The stack
1
2
3
@asgrim
Reverse Polish Notation
1 2 3 * + The stack
1
6
@asgrim
Reverse Polish Notation
1 2 3 * + The stack
1
6
@asgrim
Reverse Polish Notation
1 2 3 * + The stack
7
@asgrim
Let’s write a compiler (!!!)
In three easy steps…
@asgrim
Warning: do not use in production
@asgrim
View > Source
https://github.com/asgrim/basic-maths-compiler
@asgrim
Define the language
Tokens
● T_ADD (+)
● T_SUBTRACT (-)
● T_MULTIPLY (/)
● T_DIVIDE (*)
● T_INTEGER (d)
● T_WHITESPACE (s+)
@asgrim
Step 1: Writing a simple lexer
@asgrim
Using regular expressions
private static $matches = [
'/^(+)/' => Token::T_ADD,
'/^(-)/' => Token::T_SUBTRACT,
'/^(*)/' => Token::T_MULTIPLY,
'/^(/)/' => Token::T_DIVIDE,
'/^(d+)/' => Token::T_INTEGER,
'/^(s+)/' => Token::T_WHITESPACE,
];
@asgrim
Step through the input string
public function __invoke(string $input) : array
{
$tokens = [];
$offset = 0;
while ($offset < strlen($input)) {
$focus = substr($input, $offset);
$result = $this->match($focus);
$tokens[] = $result;
$offset += strlen($result->getLexeme());
}
return $tokens;
}
@asgrim
The matching method
private function match(string $input) : Token
{
foreach (self::$matches as $pattern => $token) {
if (preg_match($pattern, $input, $matches)) {
return new Token($token, $matches[1]);
}
}
throw new RuntimeException(sprintf(
'Unmatched token, next 15 chars were: %s', substr($input, 0, 15)
));
}
@asgrim
Step 2: Parsing the tokens
@asgrim
Order tokens by operator precedence
/**
* Higher number is higher precedence.
* @var int[]
*/
private static $operatorPrecedence = [
Token::T_SUBTRACT => 0,
Token::T_ADD => 1,
Token::T_DIVIDE => 2,
Token::T_MULTIPLY => 3,
];
@asgrim
Order tokens by operator precedence
/** @var Token[] $stack */
$stack = [];
/** @var Token[] $operators */
$operators = [];
while (false !== ($token = current($tokens))) {
if ($token->isOperator()) {
// ...
}
$stack[] = $token;
next($tokens);
}
@asgrim
Order tokens by operator precedence
/** @var Token[] $stack */
$stack = [];
/** @var Token[] $operators */
$operators = [];
while (false !== ($token = current($tokens))) {
if ($token->isOperator()) {
// ...
}
$stack[] = $token;
next($tokens);
}
@asgrim
Order tokens by operator precedence
/** @var Token[] $stack */
$stack = [];
/** @var Token[] $operators */
$operators = [];
while (false !== ($token = current($tokens))) {
if ($token->isOperator()) {
// ...
}
$stack[] = $token;
next($tokens);
}
@asgrim
Order tokens by operator precedence
/** @var Token[] $stack */
$stack = [];
/** @var Token[] $operators */
$operators = [];
while (false !== ($token = current($tokens))) {
if ($token->isOperator()) {
// ...
}
$stack[] = $token;
next($tokens);
}
@asgrim
Order tokens by operator precedence
if ($token->isOperator()) {
$tokenPrecedence = self::$operatorPrecedence[$token->getToken()];
while (
count($operators)
&& self::$operatorPrecedence[$operators[count($operators) - 1]->getToken()]
> $tokenPrecedence
) {
$higherOp = array_pop($operators);
$stack[] = $higherOp;
}
$operators[] = $token;
next($tokens);
continue;
}
@asgrim
Order tokens by operator precedence
if ($token->isOperator()) {
$tokenPrecedence = self::$operatorPrecedence[$token->getToken()];
while (
count($operators)
&& self::$operatorPrecedence[$operators[count($operators) - 1]->getToken()]
> $tokenPrecedence
) {
$higherOp = array_pop($operators);
$stack[] = $higherOp;
}
$operators[] = $token;
next($tokens);
continue;
}
@asgrim
Order tokens by operator precedence
if ($token->isOperator()) {
$tokenPrecedence = self::$operatorPrecedence[$token->getToken()];
while (
count($operators)
&& self::$operatorPrecedence[$operators[count($operators) - 1]->getToken()]
> $tokenPrecedence
) {
$higherOp = array_pop($operators);
$stack[] = $higherOp;
}
$operators[] = $token;
next($tokens);
continue;
}
@asgrim
Order tokens by operator precedence
if ($token->isOperator()) {
$tokenPrecedence = self::$operatorPrecedence[$token->getToken()];
while (
count($operators)
&& self::$operatorPrecedence[$operators[count($operators) - 1]->getToken()]
> $tokenPrecedence
) {
$higherOp = array_pop($operators);
$stack[] = $higherOp;
}
$operators[] = $token;
next($tokens);
continue;
}
@asgrim
Order tokens by operator precedence
// Clean up by moving any remaining operators onto the token stack
while (count($operators)) {
$stack[] = array_pop($operators);
}
return $stack;
@asgrim
Order tokens by operator precedence
1 + 2 * 3
Output stack
Operator stack
@asgrim
Order tokens by operator precedence
1 + 2 * 3
1Output stack
Operator stack
@asgrim
Order tokens by operator precedence
1 + 2 * 3
1
+
Output stack
Operator stack
@asgrim
Order tokens by operator precedence
1 + 2 * 3
1 2
+
Output stack
Operator stack
@asgrim
Order tokens by operator precedence
1 + 2 * 3
1 2
+ *
Output stack
Operator stack
@asgrim
Order tokens by operator precedence
1 + 2 * 3
1 2 3
+ *
Output stack
Operator stack
@asgrim
Order tokens by operator precedence
1 + 2 * 3
1 2 3 *
+ *
Output stack
Operator stack
@asgrim
Order tokens by operator precedence
1 + 2 * 3
1 2 3 * +
+
Output stack
Operator stack
@asgrim
Create AST
while ($ip < count($tokenStack)) {
$token = $tokenStack[$ip++];
if ($token->isOperator()) {
// (figure out $nodeType)
$right = array_pop($astStack);
$left = array_pop($astStack);
$astStack[] = new $nodeType($left, $right);
continue;
}
$astStack[] = new NodeScalarIntegerValue((int)$token->getLexeme());
}
@asgrim
Create AST
while ($ip < count($tokenStack)) {
$token = $tokenStack[$ip++];
if ($token->isOperator()) {
// (figure out $nodeType)
$right = array_pop($astStack);
$left = array_pop($astStack);
$astStack[] = new $nodeType($left, $right);
continue;
}
$astStack[] = new NodeScalarIntegerValue((int)$token->getLexeme());
}
@asgrim
Create AST
while ($ip < count($tokenStack)) {
$token = $tokenStack[$ip++];
if ($token->isOperator()) {
// (figure out $nodeType)
$right = array_pop($astStack);
$left = array_pop($astStack);
$astStack[] = new $nodeType($left, $right);
continue;
}
$astStack[] = new NodeScalarIntegerValue((int)$token->getLexeme());
}
@asgrim
Create AST
while ($ip < count($tokenStack)) {
$token = $tokenStack[$ip++];
if ($token->isOperator()) {
// (figure out $nodeType)
$right = array_pop($astStack);
$left = array_pop($astStack);
$astStack[] = new $nodeType($left, $right);
continue;
}
$astStack[] = new NodeScalarIntegerValue((int)$token->getLexeme());
}
@asgrim
Create AST
NodeBinaryOpAdd (
NodeScalarIntegerValue(1),
NodeBinaryOpMultiply (
NodeScalarIntegerValue(2),
NodeScalarIntegerValue(3)
)
)
@asgrim
Step 3: Executing the AST
@asgrim
Compile & execute AST
private function compileNode(NodeInterface $node)
{
if ($node instanceof NodeBinaryOpAbstractBinaryOp) {
return $this->compileBinaryOp($node);
}
if ($node instanceof NodeScalarIntegerValue) {
return $node->getValue();
}
}
@asgrim
Compile & execute AST
private function compileBinaryOp(NodeBinaryOpAbstractBinaryOp $node)
{
$left = $this->compileNode($node->getLeft());
$right = $this->compileNode($node->getRight());
switch (get_class($node)) {
case NodeBinaryOpAdd::class:
return $left + $right;
case NodeBinaryOpSubtract::class:
return $left - $right;
case NodeBinaryOpMultiply::class:
return $left * $right;
case NodeBinaryOpDivide::class:
return $left / $right;
}
}
@asgrim
What does this mean for me?
@asgrim
AST in userland
@asgrim
php-ast extension
https://github.com/nikic/php-ast
@asgrim
php-ast example usage
<?php
require 'path/to/util.php';
$code = <<<'EOC'
<?php
$var = 42;
EOC;
echo ast_dump(astparse_code($code, $version=35)), "n";
// Output:
AST_STMT_LIST
0: AST_ASSIGN
var: AST_VAR
name: "var"
expr: 42
@asgrim
astkit
https://github.com/sgolemon/astkit
@asgrim
astkit example usage
$if = AstKit::parseString(<<<EOD
if (true) {
echo "This is a triumph.n";
} else {
echo "The cake is a lie.n";
}
EOD
);
$if->execute(); // First run, program is as-seen above
$const = $if->getChild(0)->getChild(0);
// Replace the "true" constant in the condition with false
$const->graft(0, false);
// Can also graft other AstKit nodes, instead of constants
$if->execute(); // Second run now takes the else path
@asgrim
PhpParser
https://github.com/nikic/PHP-Parser
@asgrim
PHP Parser
<?php
use PhpParserParserFactory;
$parser = (new ParserFactory)
->create(ParserFactory::PREFER_PHP7);
print_r($parser->parse(
file_get_contents('ast-demo-src.php')
));
@asgrim
Better Reflection
https://github.com/Roave/BetterReflection
@asgrim
Better Reflection workflow
Reflector
Source
Locator
PhpParser
Reflection
@asgrim
PHP Reflection
$reflection = new ReflectionClass(
MyExampleClass::class
);
$this->assertSame(
'ExampleClass',
$reflection->getShortName()
);
@asgrim
Better Reflection
$reflection = (new BetterReflection())
->classReflector()
->reflect(MyExampleClass::class);
$this->assertSame(
'ExampleClass',
$reflection->getShortName()
);
@asgrim
Class BetterReflection
public function sourceLocator() : SourceLocator
{
$astLocator = $this->astLocator();
return $this->sourceLocator
?? $this->sourceLocator = new MemoizingSourceLocator(new AggregateSourceLocator([
new PhpInternalSourceLocator($astLocator),
new EvaledCodeSourceLocator($astLocator),
new AutoloadSourceLocator($astLocator),
]));
}
public function classReflector() : ClassReflector
{
return $this->classReflector
?? $this->classReflector = new ClassReflector($this->sourceLocator());
}
@asgrim
Given a class structure...
<?php
class Foo
{
private $bar;
public function thing()
{
}
}
@asgrim
… we get the AST!
Class, name Foo
|-- Statements
| |-- Property, name bar
| | |-- Type [private]
| | `-- Attributes [start line: 7, end line: 9]
| `-- Method, name thing
| |-- Type [public]
| |-- Parameters [...]
| |-- Statements [...]
| `-- Attributes [start line: 7, end line: 9]
`-- Attributes [start line: 3, end line: 10]
@asgrim
What can I use Better Reflection for?
@asgrim
Monkey patching example
class MyClass
{
public function foo()
{
return 5;
}
}
@asgrim
Monkey patching example
use RoaveBetterReflectionReflectorClassReflector;
use RoaveBetterReflectionSourceLocatorTypeSingleFileSourceLocator;
use RoaveBetterReflectionUtilAutoloadClassLoader;
use RoaveBetterReflectionUtilAutoloadClassLoaderMethodFileCacheLoader;
$loader = new ClassLoader(FileCacheLoader::defaultFileCacheLoader(__DIR__));
// Create the reflection first (without loading)
$classInfo = (new ClassReflector(
new SingleFileSourceLocator(
__DIR__ . '/MyClass.php',
(new BetterReflection())->astLocator()
)
))->reflect('MyClass');
$loader->addClass($classInfo);
@asgrim
Monkey patching example
use RoaveBetterReflectionReflectorClassReflector;
use RoaveBetterReflectionSourceLocatorTypeSingleFileSourceLocator;
use RoaveBetterReflectionUtilAutoloadClassLoader;
use RoaveBetterReflectionUtilAutoloadClassLoaderMethodFileCacheLoader;
$loader = new ClassLoader(FileCacheLoader::defaultFileCacheLoader(__DIR__));
// Create the reflection first (without loading)
$classInfo = (new ClassReflector(
new SingleFileSourceLocator(
__DIR__ . '/MyClass.php',
(new BetterReflection())->astLocator()
)
))->reflect('MyClass');
$loader->addClass($classInfo);
@asgrim
Monkey patching example
// Override the body...!
$classInfo->getMethod('foo')->setBodyFromClosure(
function () {
return 4;
}
);
$c = new MyClass();
echo $c->foo() . "n";
@asgrim
Monkey patching example
// Override the body...!
$classInfo->getMethod('foo')->setBodyFromClosure(
function () {
return 4;
}
);
$c = new MyClass();
echo $c->foo() . "n"; // returns 4, not 5
@asgrim
To summarise
● For PHP engine:
○ AST is an efficient data structure to represent code
○ AST means faster compilation (ignoring opcache)
○ Separation in PHP engine for parser and compiler
○ https://wiki.php.net/rfc/abstract_syntax_tree
● Concepts can be used in userland
○ PHP Parser library - https://github.com/nikic/php-parser
○ Better Reflection - https://github.com/Roave/BetterReflection
■ Reflect on not-yet-loaded files
■ Monkey patching in userland code (!)
○ Static analysis opportunities
■ Better Reflection
■ Exakat static analysis (uses own AST)
■ Phan (uses php-ast extension)
Any questions?
...
James Titcumb
@asgrim

More Related Content

PDF
Climbing the Abstract Syntax Tree (DPC 2017)
PDF
Climbing the Abstract Syntax Tree (CodeiD PHP Odessa 2017)
PDF
Climbing the Abstract Syntax Tree (PHP Developer Days Dresden 2018)
PDF
Climbing the Abstract Syntax Tree (ScotlandPHP 2018)
PDF
Climbing the Abstract Syntax Tree (Bulgaria PHP 2016)
PDF
Climbing the Abstract Syntax Tree (Forum PHP 2017)
PDF
Climbing the Abstract Syntax Tree (PHP UK 2018)
PDF
Climbing the Abstract Syntax Tree (PHP Russia 2019)
Climbing the Abstract Syntax Tree (DPC 2017)
Climbing the Abstract Syntax Tree (CodeiD PHP Odessa 2017)
Climbing the Abstract Syntax Tree (PHP Developer Days Dresden 2018)
Climbing the Abstract Syntax Tree (ScotlandPHP 2018)
Climbing the Abstract Syntax Tree (Bulgaria PHP 2016)
Climbing the Abstract Syntax Tree (Forum PHP 2017)
Climbing the Abstract Syntax Tree (PHP UK 2018)
Climbing the Abstract Syntax Tree (PHP Russia 2019)

What's hot (20)

PDF
Climbing the Abstract Syntax Tree (Southeast PHP 2018)
PDF
Climbing the Abstract Syntax Tree (Midwest PHP 2020)
PDF
Climbing the Abstract Syntax Tree (php[world] 2019)
PDF
Climbing the Abstract Syntax Tree (phpDay 2017)
PDF
Interpret this... (PHPem 2016)
PPT
Class 4 - PHP Arrays
PDF
Sorting arrays in PHP
PDF
Creating a compiler in Perl 6
PDF
What's New in Perl? v5.10 - v5.16
PDF
OSDC.TW - Gutscript for PHP haters
PDF
Arrays in PHP
PDF
Dades i operadors
PPTX
Php 2
PPT
Php Using Arrays
PDF
Php array
KEY
Achieving Parsing Sanity In Erlang
PDF
Perl6 one-liners
PDF
Data Types Master
PPT
PDF
The Magic Of Tie
Climbing the Abstract Syntax Tree (Southeast PHP 2018)
Climbing the Abstract Syntax Tree (Midwest PHP 2020)
Climbing the Abstract Syntax Tree (php[world] 2019)
Climbing the Abstract Syntax Tree (phpDay 2017)
Interpret this... (PHPem 2016)
Class 4 - PHP Arrays
Sorting arrays in PHP
Creating a compiler in Perl 6
What's New in Perl? v5.10 - v5.16
OSDC.TW - Gutscript for PHP haters
Arrays in PHP
Dades i operadors
Php 2
Php Using Arrays
Php array
Achieving Parsing Sanity In Erlang
Perl6 one-liners
Data Types Master
The Magic Of Tie
Ad

Similar to Climbing the Abstract Syntax Tree (IPC Fall 2017) (20)

TXT
Pop3ck sh
TXT
Bouncingballs sh
PPTX
Php functions
ZIP
Round PEG, Round Hole - Parsing Functionally
PDF
Perl6 a whistle stop tour
PPTX
Perl6 a whistle stop tour
TXT
Gta v savegame
KEY
Hidden treasures of Ruby
PDF
... now write an interpreter (PHPem 2016)
PDF
How to write code you won't hate tomorrow
PDF
Top 10 php classic traps
PDF
Hacking Parse.y with ujihisa
PDF
Symfony2 - extending the console component
PDF
The Perl6 Type System
TXT
Shell.php
PDF
PHP for Adults: Clean Code and Object Calisthenics
PDF
Hacking parse.y (RubyKansai38)
PDF
R57shell
PDF
Create Custom Post Type Plugin
PDF
Good Evils In Perl
Pop3ck sh
Bouncingballs sh
Php functions
Round PEG, Round Hole - Parsing Functionally
Perl6 a whistle stop tour
Perl6 a whistle stop tour
Gta v savegame
Hidden treasures of Ruby
... now write an interpreter (PHPem 2016)
How to write code you won't hate tomorrow
Top 10 php classic traps
Hacking Parse.y with ujihisa
Symfony2 - extending the console component
The Perl6 Type System
Shell.php
PHP for Adults: Clean Code and Object Calisthenics
Hacking parse.y (RubyKansai38)
R57shell
Create Custom Post Type Plugin
Good Evils In Perl
Ad

More from James Titcumb (20)

PDF
Living the Best Life on a Legacy Project (phpday 2022).pdf
PDF
Tips for Tackling a Legacy Codebase (ScotlandPHP 2021)
PDF
Best practices for crafting high quality PHP apps (Bulgaria 2019)
PDF
Best practices for crafting high quality PHP apps (php[world] 2019)
PDF
Crafting Quality PHP Applications (PHP Joburg Oct 2019)
PDF
Best practices for crafting high quality PHP apps - PHP UK 2019
PDF
Best practices for crafting high quality PHP apps (ScotlandPHP 2018)
PDF
Kicking off with Zend Expressive and Doctrine ORM (PHP South Africa 2018)
PDF
Best practices for crafting high quality PHP apps (PHP South Africa 2018)
PDF
Crafting Quality PHP Applications (PHPkonf 2018)
PDF
Best practices for crafting high quality PHP apps (PHP Yorkshire 2018)
PDF
Crafting Quality PHP Applications: an overview (PHPSW March 2018)
PDF
Kicking off with Zend Expressive and Doctrine ORM (PHP MiNDS March 2018)
PDF
Crafting Quality PHP Applications (PHP Benelux 2018)
PDF
Crafting Quality PHP Applications (ConFoo YVR 2017)
PDF
Dip Your Toes in the Sea of Security (ConFoo YVR 2017)
PDF
Kicking off with Zend Expressive and Doctrine ORM (ConFoo YVR 2017)
PDF
Dip Your Toes in the Sea of Security (IPC Fall 2017)
PDF
Dip Your Toes in the Sea of Security (PHP South Africa 2017)
PDF
Climbing the Abstract Syntax Tree (PHP South Africa 2017)
Living the Best Life on a Legacy Project (phpday 2022).pdf
Tips for Tackling a Legacy Codebase (ScotlandPHP 2021)
Best practices for crafting high quality PHP apps (Bulgaria 2019)
Best practices for crafting high quality PHP apps (php[world] 2019)
Crafting Quality PHP Applications (PHP Joburg Oct 2019)
Best practices for crafting high quality PHP apps - PHP UK 2019
Best practices for crafting high quality PHP apps (ScotlandPHP 2018)
Kicking off with Zend Expressive and Doctrine ORM (PHP South Africa 2018)
Best practices for crafting high quality PHP apps (PHP South Africa 2018)
Crafting Quality PHP Applications (PHPkonf 2018)
Best practices for crafting high quality PHP apps (PHP Yorkshire 2018)
Crafting Quality PHP Applications: an overview (PHPSW March 2018)
Kicking off with Zend Expressive and Doctrine ORM (PHP MiNDS March 2018)
Crafting Quality PHP Applications (PHP Benelux 2018)
Crafting Quality PHP Applications (ConFoo YVR 2017)
Dip Your Toes in the Sea of Security (ConFoo YVR 2017)
Kicking off with Zend Expressive and Doctrine ORM (ConFoo YVR 2017)
Dip Your Toes in the Sea of Security (IPC Fall 2017)
Dip Your Toes in the Sea of Security (PHP South Africa 2017)
Climbing the Abstract Syntax Tree (PHP South Africa 2017)

Recently uploaded (20)

PPTX
Detection-First SIEM: Rule Types, Dashboards, and Threat-Informed Strategy
PPTX
Cloud computing and distributed systems.
PDF
NewMind AI Monthly Chronicles - July 2025
PDF
Per capita expenditure prediction using model stacking based on satellite ima...
PPTX
Understanding_Digital_Forensics_Presentation.pptx
PDF
KodekX | Application Modernization Development
PDF
TokAI - TikTok AI Agent : The First AI Application That Analyzes 10,000+ Vira...
PPTX
20250228 LYD VKU AI Blended-Learning.pptx
PPTX
KOM of Painting work and Equipment Insulation REV00 update 25-dec.pptx
PDF
cuic standard and advanced reporting.pdf
PDF
Building Integrated photovoltaic BIPV_UPV.pdf
PPTX
Big Data Technologies - Introduction.pptx
PDF
Bridging biosciences and deep learning for revolutionary discoveries: a compr...
PDF
Network Security Unit 5.pdf for BCA BBA.
PDF
CIFDAQ's Market Insight: SEC Turns Pro Crypto
PDF
Blue Purple Modern Animated Computer Science Presentation.pdf.pdf
DOCX
The AUB Centre for AI in Media Proposal.docx
PDF
Mobile App Security Testing_ A Comprehensive Guide.pdf
PDF
Empathic Computing: Creating Shared Understanding
PDF
Encapsulation theory and applications.pdf
Detection-First SIEM: Rule Types, Dashboards, and Threat-Informed Strategy
Cloud computing and distributed systems.
NewMind AI Monthly Chronicles - July 2025
Per capita expenditure prediction using model stacking based on satellite ima...
Understanding_Digital_Forensics_Presentation.pptx
KodekX | Application Modernization Development
TokAI - TikTok AI Agent : The First AI Application That Analyzes 10,000+ Vira...
20250228 LYD VKU AI Blended-Learning.pptx
KOM of Painting work and Equipment Insulation REV00 update 25-dec.pptx
cuic standard and advanced reporting.pdf
Building Integrated photovoltaic BIPV_UPV.pdf
Big Data Technologies - Introduction.pptx
Bridging biosciences and deep learning for revolutionary discoveries: a compr...
Network Security Unit 5.pdf for BCA BBA.
CIFDAQ's Market Insight: SEC Turns Pro Crypto
Blue Purple Modern Animated Computer Science Presentation.pdf.pdf
The AUB Centre for AI in Media Proposal.docx
Mobile App Security Testing_ A Comprehensive Guide.pdf
Empathic Computing: Creating Shared Understanding
Encapsulation theory and applications.pdf

Climbing the Abstract Syntax Tree (IPC Fall 2017)