SlideShare a Scribd company logo
Buccaneers of the Binary
Plundering Compiler Optimizations for Decompilation Treasure
Zion Leonahenahe Basque
$ whoami
@mahal0z
$ whoami: summary
• I nerd over/study decompilers
• I am an active decompiler user! (CTFer)
• I hate pineapple pizza
Talk Goals
• REBELLION and demand better decompilers
• OR
• Give our angr decompiler a shot
Reverse Engineers Fight Binaries
What is a Disassembler?
(disassembling)
What is a Decompiler?
(decompiling)
if (welcome)
puts("hello!");
puts("goodbye");
return 0;
What is a Decompiler? A Simple Code Generator…
(decompiling)
if (welcome)
puts("hello!");
puts("goodbye");
return 0;
What is a Decompiler? A Complex Code Generator...
(decompiling)
while ( 1 )
{
v8 = getopt_long(v4, (char *const *)v5, "0123456789cstuw:p:g:",
long_options, 0LL);
if ( v8 == -1 )
break;
if ( v8 == 112 )
{
v9 = optarg;
prefix_lead_space = 0;
if ( *optarg == 32 )
{
do
v10 = 1 - (_DWORD)optarg + (_DWORD)v9++;
while ( *v9 == 32 );
prefix_lead_space = v10;
}
prefix = v9;
prefix_full_length = strlen(v9);
v11 = &v9[prefix_full_length];
if ( v9 < v11 )
{
do
{
if ( *(v11 - 1) != 32 )
break;
--v11;
}
while ( v9 != v11 );
// ...
if ( v8 != 103 )
goto LABEL_53;
v7 = optarg;
}
}
else if ( v8 == 116 )
{
tagged = 1;
}
else if ( v8 <= 116 )
{
if ( v8 != 115 )
goto LABEL_53;
split = 1;
}
else if ( v8 == 117 )
{
uniform = 1;
}
else
{
if ( v8 != 119 )
goto LABEL_53;
v3 = optarg;
}
if ( !v7 )
{
LABEL_50:
goal_width = 187 * max_width / 200;
goto LABEL_37;
}
v27 = dcgettext(0LL, "invalid width", 5);
goal_width = xdectoumax(v7, 0LL, 0x4BuLL, "", v27, 0);
max_width = goal_width + 10;
LABEL_37:
v15 = optind;
// ...
}
Even Simple Cases Are Hard
if (v1 && v2) {
puts("1");
puts("2");
goto B;
}
puts("2");
if (v2) {
B:
puts("3");
}
return 0;
if (a && b)
puts("1");
puts("2");
if (b)
puts("3");
return 0;
Even Simple Cases Are Hard
if (v1 && v2) {
puts("1");
puts("2");
goto B;
}
puts("2");
if (v2) {
B:
puts("3");
}
return 0;
if (a && b)
puts("1");
puts("2");
if (b)
puts("3");
return 0;
Even Simple Cases Are Hard
if (v1 && v2) {
puts("1");
puts("2");
goto B;
}
puts("2");
if (v2) {
B:
puts("3");
}
return 0;
if (a && b)
puts("1");
puts("2");
if (b)
puts("3");
return 0;
Why the goto?
Is decompilation really
that broken?
A Real-World Case Study
fmt: Your Favorite Built In Text Formatter
fmt: Your Favorite Built In Text Formatter
• A part of Coreutils (comes on nearly every UNIX system)
• Changes text to meet specific formats (like indents)
• Takes flags to do different formats
• Fairly simple (~1000 lines)
Examining fmt: main (source)
Examining fmt: main, arg parser (source)
int main (int argc, char **argv) {
// ...
switch (optchar) {
default:
// ...
case 'c':
// ...
case 's':
// ...
case 't':
// ...
case 'u':
// ...
case 'w':
// ...
case 'g':
// ...
case 'p':
// ...
case_GETOPT_HELP_CHAR;
// ...
case_GETOPT_VERSION_CHAR(...);
// ...
}
// ...
}
Examining fmt: source vs IDA 9
int main (int argc, char **argv) {
// ...
switch (optchar) {
default:
// ...
case 'c':
// ...
case 's':
// ...
case 't':
// ...
case 'u':
// ...
case 'w':
// ...
case 'g':
// ...
case 'p':
// ...
case_GETOPT_HELP_CHAR;
// ...
case_GETOPT_VERSION_CHAR(...);
// ...
}
// ...
}
int __cdecl main(int argc, const char **argv, const char
**envp) {
// ...
if ( v8 == 112 )
{
// ...
}
else if ( v8 <= 112 )
{
if ( v8 == -130 )
// ...
if ( v8 <= -130 )
{
if ( v8 == -131 )
{
// ...
}
LABEL_53:
// ...
}
if ( v8 == 99 )
{
// ...
}
else
{
if ( v8 != 103 )
goto LABEL_53;
// ...
}
}
else if ( v8 == 116 )
{
// ...
}
else if ( v8 <= 116 )
{
if ( v8 != 115 )
goto LABEL_53;
// ...
}
else if ( v8 == 117 )
{
// ...
}
else
{
if ( v8 != 119 )
Examining fmt: where did the Switch go?!?
int main (int argc, char **argv) {
// ...
switch (optchar) {
default:
// ...
case 'c':
// ...
case 's':
// ...
case 't':
// ...
case 'u':
// ...
case 'w':
// ...
case 'g':
// ...
case 'p':
// ...
case_GETOPT_HELP_CHAR;
// ...
case_GETOPT_VERSION_CHAR(...);
// ...
}
// ...
}
int __cdecl main(int argc, const char **argv, const char
**envp) {
// ...
if ( v8 == 112 )
{
// ...
}
else if ( v8 <= 112 )
{
if ( v8 == -130 )
// ...
if ( v8 <= -130 )
{
if ( v8 == -131 )
{
// ...
}
LABEL_53:
// ...
}
if ( v8 == 99 )
{
// ...
}
else
{
if ( v8 != 103 )
goto LABEL_53;
// ...
}
}
else if ( v8 == 116 )
{
// ...
}
else if ( v8 <= 116 )
{
if ( v8 != 115 )
goto LABEL_53;
// ...
}
else if ( v8 == 117 )
{
// ...
}
else
{
if ( v8 != 119 )
Examining fmt: more gotos…
int main (int argc, char **argv) {
// ...
switch (optchar) {
default:
// ...
case 'c':
// ...
case 's':
// ...
case 't':
// ...
case 'u':
// ...
case 'w':
// ...
case 'g':
// ...
case 'p':
// ...
case_GETOPT_HELP_CHAR;
// ...
case_GETOPT_VERSION_CHAR(...);
// ...
}
// ...
}
int __cdecl main(int argc, const char **argv, const char
**envp) {
// ...
if ( v8 == 112 )
{
// ...
}
else if ( v8 <= 112 )
{
if ( v8 == -130 )
// ...
if ( v8 <= -130 )
{
if ( v8 == -131 )
{
// ...
}
LABEL_53:
// ...
}
if ( v8 == 99 )
{
// ...
}
else
{
if ( v8 != 103 )
goto LABEL_53;
// ...
}
}
else if ( v8 == 116 )
{
// ...
}
else if ( v8 <= 116 )
{
if ( v8 != 115 )
goto LABEL_53;
// ...
}
else if ( v8 == 117 )
{
// ...
}
else
{
if ( v8 != 119 )
Examining fmt: main, width sanitizer (source)
int main (int argc, char **argv) {
// ...
if (max_width_option)
{
max_width = xdectoumax(...);
}
if (goal_width_option)
{
goal_width = xdectoumax(...);
if (max_width_option == NULL)
max_width = goal_width + 10;
}
else
{
goal_width = max_width * (2 *
(100 - LEEWAY) + 1) / 200;
}
// ...
}
Examining fmt: source vs IDA 9
int main (int argc, char **argv) {
// ...
if (max_width_option)
{
max_width = xdectoumax(...);
}
if (goal_width_option)
{
goal_width = xdectoumax(...);
if (max_width_option == NULL)
max_width = goal_width + 10;
}
else
{
goal_width = max_width * (2 *
(100 - LEEWAY) + 1) / 200;
}
// ...
}
int __cdecl main(int argc, const char
**argv) {
// ...
if ( v3 )
{
v12 = dcgettext(...);
v13 = xdectoumax(..., v12);
max_width = v13;
if ( v7 )
{
v14 = dcgettext(...);
goal_width = xdectoumax(..., v14);
goto LABEL_37;
}
goto LABEL_50;
}
if ( !v7 )
{
LABEL_50:
goal_width = 187 * max_width / 200;
goto LABEL_37;
}
v27 = dcgettext(...);
goal_width = xdectoumax(..., v27);
max_width = goal_width + 10;
LABEL_37:
Examining fmt: source vs IDA 9
int main (int argc, char **argv) {
// ...
if (max_width_option)
{
max_width = xdectoumax(...);
}
if (goal_width_option)
{
goal_width = xdectoumax(...);
if (max_width_option == NULL)
max_width = goal_width + 10;
}
else
{
goal_width = max_width * (2 *
(100 - LEEWAY) + 1) / 200;
}
// ...
}
int __cdecl main(int argc, const char
**argv) {
// ...
if ( v3 )
{
v12 = dcgettext(...);
v13 = xdectoumax(..., v12);
max_width = v13;
if ( v7 )
{
v14 = dcgettext(...);
goal_width = xdectoumax(..., v14);
goto LABEL_37;
}
goto LABEL_50;
}
if ( !v7 )
{
LABEL_50:
goal_width = 187 * max_width / 200;
goto LABEL_37;
}
v27 = dcgettext(...);
goal_width = xdectoumax(..., v27);
max_width = goal_width + 10;
LABEL_37:
Examining fmt: source vs IDA 9
int main (int argc, char **argv) {
// ...
if (max_width_option)
{
max_width = xdectoumax(...);
}
if (goal_width_option)
{
goal_width = xdectoumax(...);
if (max_width_option == NULL)
max_width = goal_width + 10;
}
else
{
goal_width = max_width * (2 *
(100 - LEEWAY) + 1) / 200;
}
// ...
}
int __cdecl main(int argc, const char
**argv) {
// ...
if ( v3 )
{
v12 = dcgettext(...);
v13 = xdectoumax(..., v12);
max_width = v13;
if ( v7 )
{
v14 = dcgettext(...);
goal_width = xdectoumax(..., v14);
goto LABEL_37;
}
goto LABEL_50;
}
if ( !v7 )
{
LABEL_50:
goal_width = 187 * max_width / 200;
goto LABEL_37;
}
v27 = dcgettext(...);
goal_width = xdectoumax(..., v27);
max_width = goal_width + 10;
LABEL_37:
What is going on here?
How Decompilers Make Linear Code
if (v1)
puts("hello!");
puts("goodbye");
return 0;
int v1;
Control Flow Structuring: Graph Pattern Matching
if (a)
puts("1");
puts("2");
if (a)
puts("1");
else
puts("2");
puts("3")
do {
puts("1");
} while(a);
puts("2");
switch(a) {
case 1:
break;
case 2:
break;
default:
break;
}
puts("1");
Decompilation
Structuring Example
Structuring Example
Start bottom up
Structuring Example
if-then-else
Structuring Example
if-then-else
Structuring Example
if-then-else
Structuring Example
if-then-else
if-then
Structuring Example
if-then-else
if-then
Structuring Example
if-then-else
if-then
Structuring Example
if-then-else
if-then
We’re out of structures to
match! This causes a failure.
Structuring Example
if-then-else
Pretend this edge does
not exist (“virtualization”)
Structuring Example
if-then-else
Pretend this edge does
not exist (“virtualization”)
sequence
Structuring Example
if-then-else
Pretend this edge does
not exist (“virtualization”)
sequence
Structuring Example
if-then-else
Pretend this edge does
not exist (“virtualization”)
sequence
Structuring Example
if (a)
goto B;
// ...
if (b) {
B:
// ...
}
else {
// ...
}
Failed Structuring Results in Gotos
if (a)
goto B;
// ...
if (b) {
B:
// ...
}
else {
// ...
}
What Causes Failures?
switch(a) {
case 1:
break;
case 2:
break;
default:
break;
}
puts("1");
switch
Expected
What Causes Failures? Unknown Patterns!
switch(a) {
case 1:
break;
case 2:
break;
default:
break;
}
puts("1");
switch
≠
Reality
What Causes Unknown Patterns? Compiler Optimizations!
switch(a) {
case 1:
break;
case 2:
break;
default:
break;
}
puts("1");
Optimization
How To Know An Optimization (Likely) Occurred
if (a)
goto B;
// ...
if (b) {
B:
// ...
}
else {
// ...
}
int __cdecl main(int argc, const char
**argv, const char **envp) {
if ( v8 != 103 )
goto LABEL_53;
// ...
}
}
else if ( v8 == 116 )
{
// ...
}
else if ( v8 <= 116 )
{
if ( v8 != 115 )
goto LABEL_53;
// ...
}
else if ( v8 == 117 )
{
// ...
}
else
{
if ( v8 != 119 )
goto LABEL_53;
// ...
}
int __cdecl main(int argc, const char
**argv) {
// ...
if ( v3 )
{
v12 = dcgettext(...);
v13 = xdectoumax(..., v12);
max_width = v13;
if ( v7 )
{
v14 = dcgettext(...);
goal_width = xdectoumax(..., v14);
goto LABEL_37;
}
goto LABEL_50;
}
if ( !v7 )
{
LABEL_50:
goal_width = 187 * max_width / 200;
goto LABEL_37;
}
// ...
}
How To Know An Optimization (Likely) Occurred
if (a)
goto B;
// ...
if (b) {
B:
// ...
}
else {
// ...
}
int __cdecl main(int argc, const char
**argv, const char **envp) {
if ( v8 != 103 )
goto LABEL_53;
// ...
}
}
else if ( v8 == 116 )
{
// ...
}
else if ( v8 <= 116 )
{
if ( v8 != 115 )
goto LABEL_53;
// ...
}
else if ( v8 == 117 )
{
// ...
}
else
{
if ( v8 != 119 )
goto LABEL_53;
// ...
}
int __cdecl main(int argc, const char
**argv) {
// ...
if ( v3 )
{
v12 = dcgettext(...);
v13 = xdectoumax(..., v12);
max_width = v13;
if ( v7 )
{
v14 = dcgettext(...);
goal_width = xdectoumax(..., v14);
goto LABEL_37;
}
goto LABEL_50;
}
if ( !v7 )
{
LABEL_50:
goal_width = 187 * max_width / 200;
goto LABEL_37;
}
// ...
}
Compiler Optimizations And Gotos (Failures)
Compiler Optimizations And Gotos (Failures)
Only 7 optimizations cause 80% of gotos
Compiler Optimizations And Gotos (Failures)
3 Observed Types of Compiler Optimizations
Optimization
Jump Threading
STCR
Loop Header
CSE
Switch Conversion
Cross Jumping
Builtin Inlining
Switch Lowering
Non-Ret Functions
Decompiler Effect
Code Duplication
Code Merging
Code Movement/Other
Compiler Optimization Summary
• You can find optimization locations with gotos (It’s not 100% lossy!)
• Not all optimizations are the same level of destructive
• There is a reasonably limited amount that causes destruction
• Solution: identify and reverse the most impactful!
Plundering Compiler
Optimizations
fmt: The Evil Switch
int main (int argc, char **argv) {
// ...
switch (optchar) {
default:
// ...
case 'c':
// ...
case 's':
// ...
case 't':
// ...
case 'u':
// ...
case 'w':
// ...
case 'g':
// ...
case 'p':
// ...
case_GETOPT_HELP_CHAR;
// ...
case_GETOPT_VERSION_CHAR(...);
// ...
}
// ...
}
int __cdecl main(int argc, const char **argv, const char
**envp) {
// ...
if ( v8 == 112 )
{
// ...
}
else if ( v8 <= 112 )
{
if ( v8 == -130 )
// ...
if ( v8 <= -130 )
{
if ( v8 == -131 )
{
// ...
}
LABEL_53:
// ...
}
if ( v8 == 99 )
{
// ...
}
else
{
if ( v8 != 103 )
goto LABEL_53;
// ...
}
}
else if ( v8 == 116 )
{
// ...
}
else if ( v8 <= 116 )
{
if ( v8 != 115 )
goto LABEL_53;
// ...
}
else if ( v8 == 117 )
{
// ...
}
else
{
if ( v8 != 119 )
The Evil Switch: Switch Lowering Optimization
int main (int argc, char **argv) {
// ...
switch (optchar) {
default:
// ...
case 'c':
// ...
case 's':
// ...
case 't':
// ...
case 'u':
// ...
case 'w':
// ...
case 'g':
// ...
case 'p':
// ...
case_GETOPT_HELP_CHAR;
// ...
case_GETOPT_VERSION_CHAR(...);
// ...
}
// ...
}
Switch Lowering Optimization (simplified)
• Minimizes the number of jumps for a non-consecutive Switch
• Converts what would be many tables into an if-tree
• Has existed since the early 2000s
• Can’t be disabled in GCC (exists in O0)
The Switch Lowering Pattern
int main (int argc, char **argv) {
// ...
switch (optchar) {
default:
// ...
case 'c':
// ...
case 's':
// ...
case 't':
// ...
case 'u':
// ...
case 'w':
// ...
case 'g':
// ...
case 'p':
// ...
case_GETOPT_HELP_CHAR;
// ...
case_GETOPT_VERSION_CHAR(...);
// ...
}
// ...
}
CODE
CODE
CODE
v1
v1 != N
v1 < M
goto*
The Mythical Calls: Jump Threading
int main (int argc, char **argv) {
// ...
if (max_width_option)
{
max_width = xdectoumax(...);
}
if (goal_width_option)
{
goal_width = xdectoumax(...);
if (max_width_option == NULL)
max_width = goal_width + 10;
}
else
{
goal_width = max_width * (2 *
(100 - LEEWAY) + 1) / 200;
}
// ...
}
int __cdecl main(int argc, const char
**argv) {
// ...
if ( v3 )
{
v12 = dcgettext(...);
v13 = xdectoumax(..., v12);
max_width = v13;
if ( v7 )
{
v14 = dcgettext(...);
goal_width = xdectoumax(..., v14);
goto LABEL_37;
}
goto LABEL_50;
}
if ( !v7 )
{
LABEL_50:
goal_width = 187 * max_width / 200;
goto LABEL_37;
}
v27 = dcgettext(...);
goal_width = xdectoumax(..., v27);
max_width = goal_width + 10;
LABEL_37:
The Mythical Calls: Jump Threading
int main (int argc, char **argv) {
// ...
if (max_width_option)
{
max_width = xdectoumax(...);
}
if (goal_width_option)
{
goal_width = xdectoumax(...);
if (max_width_option == NULL)
max_width = goal_width + 10;
}
else
{
goal_width = max_width * (2 *
(100 - LEEWAY) + 1) / 200;
}
// ...
}
Jump Threading (Simplified)
if (v1 && v2) {
puts("1");
puts("2");
goto B;
}
puts("2");
if (v2) {
B:
puts("3");
}
return 0;
if (a && b)
puts("1");
puts("2");
if (b)
puts("3");
return 0;
Overlapping Conditions
Jump Threading (Simplified)
if (v1 && v2) {
puts("1");
puts("2");
goto B;
}
puts("2");
if (v2) {
B:
puts("3");
}
return 0;
if (a && b)
puts("1");
puts("2");
if (b)
puts("3");
return 0;
Overlapping Conditions Copy Overlapped Code
Jump To Overlapping Condition
The Mythical Calls: Jump Threading
int main (int argc, char **argv) {
// ...
if (max_width_option)
{
max_width = xdectoumax(...);
}
if (goal_width_option)
{
goal_width = xdectoumax(...);
if (max_width_option == NULL)
max_width = goal_width + 10;
}
else
{
goal_width = max_width * (2 *
(100 - LEEWAY) + 1) / 200;
}
// ...
}
Overlapping condition checks
Jump Threading (Simplified)
• Minimizes the number of conditional jumps when overlapping conditions
are detected
• Enabled in O2
• Can duplicate around 15 code statements before stopping
Jump Threading Pattern
int main (int argc, char **argv) {
// ...
if (max_width_option)
{
max_width = xdectoumax(...);
}
if (goal_width_option)
{
goal_width = xdectoumax(...);
if (max_width_option == NULL)
max_width = goal_width + 10;
}
else
{
goal_width = max_width * (2 *
(100 - LEEWAY) + 1) / 200;
}
// ...
}
DUPE
CODE
… …
…
DUPE
CODE
…
goto
IS THIS ALL REAL?
DEMO:
angr decompiler
Deoptimizing At Scale
Keeps failures (gotos) low, while maintaining source similarity
All Deoptimizations
• We created reverse algorithms for all optimization classes
• We tested on GCC, Clang, MSVC, and some malware
• Algorithms and code open-source:
• https://github.com/mahaloz/sailr-eval
• “Ahoy SAILR! There is No Need to DREAM of C: A Compiler-Aware
Structuring Algorithm for Binary Decompilation”, USENIX 2024
Benefiting from This Talk (1): REBEL! DEMAND BETTER!
• Modern decompilers (IDA, Binja, Ghidra) don’t give API access to
structuring choices… Ask for it!
• Large developers/funders of decompilers ignore ”interactive” or “tunable”
decompilers… Demand it!
• Alternative: maybe instruction-level rewriting?
Benefiting from This Talk (2): Try angr decompiler!
• User angr decompiler (GUI): https://github.com/angr/angr-management,
where we implemented every algorithm
• Open-source, command line, GUI, hackable
• Limitations: made by wacky academics, slower (python), hackable
sometimes means crashable
Thank You for Listening
Zion Leonahenahe Basque
zbasque@asu.edu
@mahal0z
pip install angr
angr decompile /bin/true
angr decompiler

More Related Content

PPTX
Groovy
PDF
Beyond javascript using the features of tomorrow
PDF
Rust Workshop - NITC FOSSMEET 2017
PPTX
Самые вкусные баги из игрового кода: как ошибаются наши коллеги-программисты ...
PDF
Bytes in the Machine: Inside the CPython interpreter
PPTX
ES6 Overview
ODP
Scala 2 + 2 > 4
PDF
Davide Cerbo - Kotlin: forse è la volta buona - Codemotion Milan 2017
Groovy
Beyond javascript using the features of tomorrow
Rust Workshop - NITC FOSSMEET 2017
Самые вкусные баги из игрового кода: как ошибаются наши коллеги-программисты ...
Bytes in the Machine: Inside the CPython interpreter
ES6 Overview
Scala 2 + 2 > 4
Davide Cerbo - Kotlin: forse è la volta buona - Codemotion Milan 2017

Similar to REverse_SAILR_talk.pptx buccaneers of the binary (20)

PPTX
Namespaces
PPTX
C++ AMP 실천 및 적용 전략
PDF
Marrow: A Meta-Framework for Python 2.6+ and 3.1+
PPT
为什么 rust-lang 吸引我?
ODP
Domain Specific Languages In Scala Duse3
PPTX
MiamiJS - The Future of JavaScript
PPT
Effecient javascript
PDF
A comparison between C# and Java
PDF
Silicon Valley JUG: JVM Mechanics
KEY
Sbaw091006
PPTX
golang_getting_started.pptx
PPTX
EcmaScript unchained
PPT
C Tutorials
PPTX
ES6 is Nigh
PDF
Introduction aux Macros
PPTX
C++11 - STL Additions
PPTX
Lecture 04 Programming C for Beginners 001
PDF
Is Haskell an acceptable Perl?
PDF
C++ and OOPS Crash Course by ACM DBIT | Grejo Joby
Namespaces
C++ AMP 실천 및 적용 전략
Marrow: A Meta-Framework for Python 2.6+ and 3.1+
为什么 rust-lang 吸引我?
Domain Specific Languages In Scala Duse3
MiamiJS - The Future of JavaScript
Effecient javascript
A comparison between C# and Java
Silicon Valley JUG: JVM Mechanics
Sbaw091006
golang_getting_started.pptx
EcmaScript unchained
C Tutorials
ES6 is Nigh
Introduction aux Macros
C++11 - STL Additions
Lecture 04 Programming C for Beginners 001
Is Haskell an acceptable Perl?
C++ and OOPS Crash Course by ACM DBIT | Grejo Joby
Ad

Recently uploaded (20)

PDF
Reach Out and Touch Someone: Haptics and Empathic Computing
PDF
Architecting across the Boundaries of two Complex Domains - Healthcare & Tech...
PPTX
MYSQL Presentation for SQL database connectivity
PPTX
Understanding_Digital_Forensics_Presentation.pptx
PPTX
A Presentation on Artificial Intelligence
PPTX
VMware vSphere Foundation How to Sell Presentation-Ver1.4-2-14-2024.pptx
PPT
Teaching material agriculture food technology
PDF
Network Security Unit 5.pdf for BCA BBA.
PDF
Spectral efficient network and resource selection model in 5G networks
PPTX
20250228 LYD VKU AI Blended-Learning.pptx
PPTX
KOM of Painting work and Equipment Insulation REV00 update 25-dec.pptx
PDF
Empathic Computing: Creating Shared Understanding
PDF
Peak of Data & AI Encore- AI for Metadata and Smarter Workflows
PDF
NewMind AI Monthly Chronicles - July 2025
PDF
Review of recent advances in non-invasive hemoglobin estimation
PDF
Blue Purple Modern Animated Computer Science Presentation.pdf.pdf
PDF
Mobile App Security Testing_ A Comprehensive Guide.pdf
PDF
Per capita expenditure prediction using model stacking based on satellite ima...
PPTX
Cloud computing and distributed systems.
PDF
CIFDAQ's Market Insight: SEC Turns Pro Crypto
Reach Out and Touch Someone: Haptics and Empathic Computing
Architecting across the Boundaries of two Complex Domains - Healthcare & Tech...
MYSQL Presentation for SQL database connectivity
Understanding_Digital_Forensics_Presentation.pptx
A Presentation on Artificial Intelligence
VMware vSphere Foundation How to Sell Presentation-Ver1.4-2-14-2024.pptx
Teaching material agriculture food technology
Network Security Unit 5.pdf for BCA BBA.
Spectral efficient network and resource selection model in 5G networks
20250228 LYD VKU AI Blended-Learning.pptx
KOM of Painting work and Equipment Insulation REV00 update 25-dec.pptx
Empathic Computing: Creating Shared Understanding
Peak of Data & AI Encore- AI for Metadata and Smarter Workflows
NewMind AI Monthly Chronicles - July 2025
Review of recent advances in non-invasive hemoglobin estimation
Blue Purple Modern Animated Computer Science Presentation.pdf.pdf
Mobile App Security Testing_ A Comprehensive Guide.pdf
Per capita expenditure prediction using model stacking based on satellite ima...
Cloud computing and distributed systems.
CIFDAQ's Market Insight: SEC Turns Pro Crypto
Ad

REverse_SAILR_talk.pptx buccaneers of the binary

  • 1. Buccaneers of the Binary Plundering Compiler Optimizations for Decompilation Treasure Zion Leonahenahe Basque
  • 3. $ whoami: summary • I nerd over/study decompilers • I am an active decompiler user! (CTFer) • I hate pineapple pizza
  • 4. Talk Goals • REBELLION and demand better decompilers • OR • Give our angr decompiler a shot
  • 6. What is a Disassembler? (disassembling)
  • 7. What is a Decompiler? (decompiling) if (welcome) puts("hello!"); puts("goodbye"); return 0;
  • 8. What is a Decompiler? A Simple Code Generator… (decompiling) if (welcome) puts("hello!"); puts("goodbye"); return 0;
  • 9. What is a Decompiler? A Complex Code Generator... (decompiling) while ( 1 ) { v8 = getopt_long(v4, (char *const *)v5, "0123456789cstuw:p:g:", long_options, 0LL); if ( v8 == -1 ) break; if ( v8 == 112 ) { v9 = optarg; prefix_lead_space = 0; if ( *optarg == 32 ) { do v10 = 1 - (_DWORD)optarg + (_DWORD)v9++; while ( *v9 == 32 ); prefix_lead_space = v10; } prefix = v9; prefix_full_length = strlen(v9); v11 = &v9[prefix_full_length]; if ( v9 < v11 ) { do { if ( *(v11 - 1) != 32 ) break; --v11; } while ( v9 != v11 ); // ... if ( v8 != 103 ) goto LABEL_53; v7 = optarg; } } else if ( v8 == 116 ) { tagged = 1; } else if ( v8 <= 116 ) { if ( v8 != 115 ) goto LABEL_53; split = 1; } else if ( v8 == 117 ) { uniform = 1; } else { if ( v8 != 119 ) goto LABEL_53; v3 = optarg; } if ( !v7 ) { LABEL_50: goal_width = 187 * max_width / 200; goto LABEL_37; } v27 = dcgettext(0LL, "invalid width", 5); goal_width = xdectoumax(v7, 0LL, 0x4BuLL, "", v27, 0); max_width = goal_width + 10; LABEL_37: v15 = optind; // ... }
  • 10. Even Simple Cases Are Hard if (v1 && v2) { puts("1"); puts("2"); goto B; } puts("2"); if (v2) { B: puts("3"); } return 0; if (a && b) puts("1"); puts("2"); if (b) puts("3"); return 0;
  • 11. Even Simple Cases Are Hard if (v1 && v2) { puts("1"); puts("2"); goto B; } puts("2"); if (v2) { B: puts("3"); } return 0; if (a && b) puts("1"); puts("2"); if (b) puts("3"); return 0;
  • 12. Even Simple Cases Are Hard if (v1 && v2) { puts("1"); puts("2"); goto B; } puts("2"); if (v2) { B: puts("3"); } return 0; if (a && b) puts("1"); puts("2"); if (b) puts("3"); return 0; Why the goto?
  • 13. Is decompilation really that broken? A Real-World Case Study
  • 14. fmt: Your Favorite Built In Text Formatter
  • 15. fmt: Your Favorite Built In Text Formatter • A part of Coreutils (comes on nearly every UNIX system) • Changes text to meet specific formats (like indents) • Takes flags to do different formats • Fairly simple (~1000 lines)
  • 17. Examining fmt: main, arg parser (source) int main (int argc, char **argv) { // ... switch (optchar) { default: // ... case 'c': // ... case 's': // ... case 't': // ... case 'u': // ... case 'w': // ... case 'g': // ... case 'p': // ... case_GETOPT_HELP_CHAR; // ... case_GETOPT_VERSION_CHAR(...); // ... } // ... }
  • 18. Examining fmt: source vs IDA 9 int main (int argc, char **argv) { // ... switch (optchar) { default: // ... case 'c': // ... case 's': // ... case 't': // ... case 'u': // ... case 'w': // ... case 'g': // ... case 'p': // ... case_GETOPT_HELP_CHAR; // ... case_GETOPT_VERSION_CHAR(...); // ... } // ... } int __cdecl main(int argc, const char **argv, const char **envp) { // ... if ( v8 == 112 ) { // ... } else if ( v8 <= 112 ) { if ( v8 == -130 ) // ... if ( v8 <= -130 ) { if ( v8 == -131 ) { // ... } LABEL_53: // ... } if ( v8 == 99 ) { // ... } else { if ( v8 != 103 ) goto LABEL_53; // ... } } else if ( v8 == 116 ) { // ... } else if ( v8 <= 116 ) { if ( v8 != 115 ) goto LABEL_53; // ... } else if ( v8 == 117 ) { // ... } else { if ( v8 != 119 )
  • 19. Examining fmt: where did the Switch go?!? int main (int argc, char **argv) { // ... switch (optchar) { default: // ... case 'c': // ... case 's': // ... case 't': // ... case 'u': // ... case 'w': // ... case 'g': // ... case 'p': // ... case_GETOPT_HELP_CHAR; // ... case_GETOPT_VERSION_CHAR(...); // ... } // ... } int __cdecl main(int argc, const char **argv, const char **envp) { // ... if ( v8 == 112 ) { // ... } else if ( v8 <= 112 ) { if ( v8 == -130 ) // ... if ( v8 <= -130 ) { if ( v8 == -131 ) { // ... } LABEL_53: // ... } if ( v8 == 99 ) { // ... } else { if ( v8 != 103 ) goto LABEL_53; // ... } } else if ( v8 == 116 ) { // ... } else if ( v8 <= 116 ) { if ( v8 != 115 ) goto LABEL_53; // ... } else if ( v8 == 117 ) { // ... } else { if ( v8 != 119 )
  • 20. Examining fmt: more gotos… int main (int argc, char **argv) { // ... switch (optchar) { default: // ... case 'c': // ... case 's': // ... case 't': // ... case 'u': // ... case 'w': // ... case 'g': // ... case 'p': // ... case_GETOPT_HELP_CHAR; // ... case_GETOPT_VERSION_CHAR(...); // ... } // ... } int __cdecl main(int argc, const char **argv, const char **envp) { // ... if ( v8 == 112 ) { // ... } else if ( v8 <= 112 ) { if ( v8 == -130 ) // ... if ( v8 <= -130 ) { if ( v8 == -131 ) { // ... } LABEL_53: // ... } if ( v8 == 99 ) { // ... } else { if ( v8 != 103 ) goto LABEL_53; // ... } } else if ( v8 == 116 ) { // ... } else if ( v8 <= 116 ) { if ( v8 != 115 ) goto LABEL_53; // ... } else if ( v8 == 117 ) { // ... } else { if ( v8 != 119 )
  • 21. Examining fmt: main, width sanitizer (source) int main (int argc, char **argv) { // ... if (max_width_option) { max_width = xdectoumax(...); } if (goal_width_option) { goal_width = xdectoumax(...); if (max_width_option == NULL) max_width = goal_width + 10; } else { goal_width = max_width * (2 * (100 - LEEWAY) + 1) / 200; } // ... }
  • 22. Examining fmt: source vs IDA 9 int main (int argc, char **argv) { // ... if (max_width_option) { max_width = xdectoumax(...); } if (goal_width_option) { goal_width = xdectoumax(...); if (max_width_option == NULL) max_width = goal_width + 10; } else { goal_width = max_width * (2 * (100 - LEEWAY) + 1) / 200; } // ... } int __cdecl main(int argc, const char **argv) { // ... if ( v3 ) { v12 = dcgettext(...); v13 = xdectoumax(..., v12); max_width = v13; if ( v7 ) { v14 = dcgettext(...); goal_width = xdectoumax(..., v14); goto LABEL_37; } goto LABEL_50; } if ( !v7 ) { LABEL_50: goal_width = 187 * max_width / 200; goto LABEL_37; } v27 = dcgettext(...); goal_width = xdectoumax(..., v27); max_width = goal_width + 10; LABEL_37:
  • 23. Examining fmt: source vs IDA 9 int main (int argc, char **argv) { // ... if (max_width_option) { max_width = xdectoumax(...); } if (goal_width_option) { goal_width = xdectoumax(...); if (max_width_option == NULL) max_width = goal_width + 10; } else { goal_width = max_width * (2 * (100 - LEEWAY) + 1) / 200; } // ... } int __cdecl main(int argc, const char **argv) { // ... if ( v3 ) { v12 = dcgettext(...); v13 = xdectoumax(..., v12); max_width = v13; if ( v7 ) { v14 = dcgettext(...); goal_width = xdectoumax(..., v14); goto LABEL_37; } goto LABEL_50; } if ( !v7 ) { LABEL_50: goal_width = 187 * max_width / 200; goto LABEL_37; } v27 = dcgettext(...); goal_width = xdectoumax(..., v27); max_width = goal_width + 10; LABEL_37:
  • 24. Examining fmt: source vs IDA 9 int main (int argc, char **argv) { // ... if (max_width_option) { max_width = xdectoumax(...); } if (goal_width_option) { goal_width = xdectoumax(...); if (max_width_option == NULL) max_width = goal_width + 10; } else { goal_width = max_width * (2 * (100 - LEEWAY) + 1) / 200; } // ... } int __cdecl main(int argc, const char **argv) { // ... if ( v3 ) { v12 = dcgettext(...); v13 = xdectoumax(..., v12); max_width = v13; if ( v7 ) { v14 = dcgettext(...); goal_width = xdectoumax(..., v14); goto LABEL_37; } goto LABEL_50; } if ( !v7 ) { LABEL_50: goal_width = 187 * max_width / 200; goto LABEL_37; } v27 = dcgettext(...); goal_width = xdectoumax(..., v27); max_width = goal_width + 10; LABEL_37:
  • 25. What is going on here?
  • 26. How Decompilers Make Linear Code if (v1) puts("hello!"); puts("goodbye"); return 0; int v1;
  • 27. Control Flow Structuring: Graph Pattern Matching if (a) puts("1"); puts("2"); if (a) puts("1"); else puts("2"); puts("3") do { puts("1"); } while(a); puts("2"); switch(a) { case 1: break; case 2: break; default: break; } puts("1"); Decompilation
  • 36. Structuring Example if-then-else if-then We’re out of structures to match! This causes a failure.
  • 37. Structuring Example if-then-else Pretend this edge does not exist (“virtualization”)
  • 38. Structuring Example if-then-else Pretend this edge does not exist (“virtualization”) sequence
  • 39. Structuring Example if-then-else Pretend this edge does not exist (“virtualization”) sequence
  • 40. Structuring Example if-then-else Pretend this edge does not exist (“virtualization”) sequence
  • 41. Structuring Example if (a) goto B; // ... if (b) { B: // ... } else { // ... }
  • 42. Failed Structuring Results in Gotos if (a) goto B; // ... if (b) { B: // ... } else { // ... }
  • 43. What Causes Failures? switch(a) { case 1: break; case 2: break; default: break; } puts("1"); switch Expected
  • 44. What Causes Failures? Unknown Patterns! switch(a) { case 1: break; case 2: break; default: break; } puts("1"); switch ≠ Reality
  • 45. What Causes Unknown Patterns? Compiler Optimizations! switch(a) { case 1: break; case 2: break; default: break; } puts("1"); Optimization
  • 46. How To Know An Optimization (Likely) Occurred if (a) goto B; // ... if (b) { B: // ... } else { // ... } int __cdecl main(int argc, const char **argv, const char **envp) { if ( v8 != 103 ) goto LABEL_53; // ... } } else if ( v8 == 116 ) { // ... } else if ( v8 <= 116 ) { if ( v8 != 115 ) goto LABEL_53; // ... } else if ( v8 == 117 ) { // ... } else { if ( v8 != 119 ) goto LABEL_53; // ... } int __cdecl main(int argc, const char **argv) { // ... if ( v3 ) { v12 = dcgettext(...); v13 = xdectoumax(..., v12); max_width = v13; if ( v7 ) { v14 = dcgettext(...); goal_width = xdectoumax(..., v14); goto LABEL_37; } goto LABEL_50; } if ( !v7 ) { LABEL_50: goal_width = 187 * max_width / 200; goto LABEL_37; } // ... }
  • 47. How To Know An Optimization (Likely) Occurred if (a) goto B; // ... if (b) { B: // ... } else { // ... } int __cdecl main(int argc, const char **argv, const char **envp) { if ( v8 != 103 ) goto LABEL_53; // ... } } else if ( v8 == 116 ) { // ... } else if ( v8 <= 116 ) { if ( v8 != 115 ) goto LABEL_53; // ... } else if ( v8 == 117 ) { // ... } else { if ( v8 != 119 ) goto LABEL_53; // ... } int __cdecl main(int argc, const char **argv) { // ... if ( v3 ) { v12 = dcgettext(...); v13 = xdectoumax(..., v12); max_width = v13; if ( v7 ) { v14 = dcgettext(...); goal_width = xdectoumax(..., v14); goto LABEL_37; } goto LABEL_50; } if ( !v7 ) { LABEL_50: goal_width = 187 * max_width / 200; goto LABEL_37; } // ... }
  • 48. Compiler Optimizations And Gotos (Failures)
  • 49. Compiler Optimizations And Gotos (Failures) Only 7 optimizations cause 80% of gotos
  • 50. Compiler Optimizations And Gotos (Failures)
  • 51. 3 Observed Types of Compiler Optimizations Optimization Jump Threading STCR Loop Header CSE Switch Conversion Cross Jumping Builtin Inlining Switch Lowering Non-Ret Functions Decompiler Effect Code Duplication Code Merging Code Movement/Other
  • 52. Compiler Optimization Summary • You can find optimization locations with gotos (It’s not 100% lossy!) • Not all optimizations are the same level of destructive • There is a reasonably limited amount that causes destruction • Solution: identify and reverse the most impactful!
  • 54. fmt: The Evil Switch int main (int argc, char **argv) { // ... switch (optchar) { default: // ... case 'c': // ... case 's': // ... case 't': // ... case 'u': // ... case 'w': // ... case 'g': // ... case 'p': // ... case_GETOPT_HELP_CHAR; // ... case_GETOPT_VERSION_CHAR(...); // ... } // ... } int __cdecl main(int argc, const char **argv, const char **envp) { // ... if ( v8 == 112 ) { // ... } else if ( v8 <= 112 ) { if ( v8 == -130 ) // ... if ( v8 <= -130 ) { if ( v8 == -131 ) { // ... } LABEL_53: // ... } if ( v8 == 99 ) { // ... } else { if ( v8 != 103 ) goto LABEL_53; // ... } } else if ( v8 == 116 ) { // ... } else if ( v8 <= 116 ) { if ( v8 != 115 ) goto LABEL_53; // ... } else if ( v8 == 117 ) { // ... } else { if ( v8 != 119 )
  • 55. The Evil Switch: Switch Lowering Optimization int main (int argc, char **argv) { // ... switch (optchar) { default: // ... case 'c': // ... case 's': // ... case 't': // ... case 'u': // ... case 'w': // ... case 'g': // ... case 'p': // ... case_GETOPT_HELP_CHAR; // ... case_GETOPT_VERSION_CHAR(...); // ... } // ... }
  • 56. Switch Lowering Optimization (simplified) • Minimizes the number of jumps for a non-consecutive Switch • Converts what would be many tables into an if-tree • Has existed since the early 2000s • Can’t be disabled in GCC (exists in O0)
  • 57. The Switch Lowering Pattern int main (int argc, char **argv) { // ... switch (optchar) { default: // ... case 'c': // ... case 's': // ... case 't': // ... case 'u': // ... case 'w': // ... case 'g': // ... case 'p': // ... case_GETOPT_HELP_CHAR; // ... case_GETOPT_VERSION_CHAR(...); // ... } // ... } CODE CODE CODE v1 v1 != N v1 < M goto*
  • 58. The Mythical Calls: Jump Threading int main (int argc, char **argv) { // ... if (max_width_option) { max_width = xdectoumax(...); } if (goal_width_option) { goal_width = xdectoumax(...); if (max_width_option == NULL) max_width = goal_width + 10; } else { goal_width = max_width * (2 * (100 - LEEWAY) + 1) / 200; } // ... } int __cdecl main(int argc, const char **argv) { // ... if ( v3 ) { v12 = dcgettext(...); v13 = xdectoumax(..., v12); max_width = v13; if ( v7 ) { v14 = dcgettext(...); goal_width = xdectoumax(..., v14); goto LABEL_37; } goto LABEL_50; } if ( !v7 ) { LABEL_50: goal_width = 187 * max_width / 200; goto LABEL_37; } v27 = dcgettext(...); goal_width = xdectoumax(..., v27); max_width = goal_width + 10; LABEL_37:
  • 59. The Mythical Calls: Jump Threading int main (int argc, char **argv) { // ... if (max_width_option) { max_width = xdectoumax(...); } if (goal_width_option) { goal_width = xdectoumax(...); if (max_width_option == NULL) max_width = goal_width + 10; } else { goal_width = max_width * (2 * (100 - LEEWAY) + 1) / 200; } // ... }
  • 60. Jump Threading (Simplified) if (v1 && v2) { puts("1"); puts("2"); goto B; } puts("2"); if (v2) { B: puts("3"); } return 0; if (a && b) puts("1"); puts("2"); if (b) puts("3"); return 0; Overlapping Conditions
  • 61. Jump Threading (Simplified) if (v1 && v2) { puts("1"); puts("2"); goto B; } puts("2"); if (v2) { B: puts("3"); } return 0; if (a && b) puts("1"); puts("2"); if (b) puts("3"); return 0; Overlapping Conditions Copy Overlapped Code Jump To Overlapping Condition
  • 62. The Mythical Calls: Jump Threading int main (int argc, char **argv) { // ... if (max_width_option) { max_width = xdectoumax(...); } if (goal_width_option) { goal_width = xdectoumax(...); if (max_width_option == NULL) max_width = goal_width + 10; } else { goal_width = max_width * (2 * (100 - LEEWAY) + 1) / 200; } // ... } Overlapping condition checks
  • 63. Jump Threading (Simplified) • Minimizes the number of conditional jumps when overlapping conditions are detected • Enabled in O2 • Can duplicate around 15 code statements before stopping
  • 64. Jump Threading Pattern int main (int argc, char **argv) { // ... if (max_width_option) { max_width = xdectoumax(...); } if (goal_width_option) { goal_width = xdectoumax(...); if (max_width_option == NULL) max_width = goal_width + 10; } else { goal_width = max_width * (2 * (100 - LEEWAY) + 1) / 200; } // ... } DUPE CODE … … … DUPE CODE … goto
  • 65. IS THIS ALL REAL?
  • 67. Deoptimizing At Scale Keeps failures (gotos) low, while maintaining source similarity
  • 68. All Deoptimizations • We created reverse algorithms for all optimization classes • We tested on GCC, Clang, MSVC, and some malware • Algorithms and code open-source: • https://github.com/mahaloz/sailr-eval • “Ahoy SAILR! There is No Need to DREAM of C: A Compiler-Aware Structuring Algorithm for Binary Decompilation”, USENIX 2024
  • 69. Benefiting from This Talk (1): REBEL! DEMAND BETTER! • Modern decompilers (IDA, Binja, Ghidra) don’t give API access to structuring choices… Ask for it! • Large developers/funders of decompilers ignore ”interactive” or “tunable” decompilers… Demand it! • Alternative: maybe instruction-level rewriting?
  • 70. Benefiting from This Talk (2): Try angr decompiler! • User angr decompiler (GUI): https://github.com/angr/angr-management, where we implemented every algorithm • Open-source, command line, GUI, hackable • Limitations: made by wacky academics, slower (python), hackable sometimes means crashable
  • 71. Thank You for Listening Zion Leonahenahe Basque zbasque@asu.edu @mahal0z pip install angr angr decompile /bin/true angr decompiler