10. Even Simple Cases Are Hard
if (v1 && v2) {
puts("1");
puts("2");
goto B;
}
puts("2");
if (v2) {
B:
puts("3");
}
return 0;
if (a && b)
puts("1");
puts("2");
if (b)
puts("3");
return 0;
11. Even Simple Cases Are Hard
if (v1 && v2) {
puts("1");
puts("2");
goto B;
}
puts("2");
if (v2) {
B:
puts("3");
}
return 0;
if (a && b)
puts("1");
puts("2");
if (b)
puts("3");
return 0;
12. Even Simple Cases Are Hard
if (v1 && v2) {
puts("1");
puts("2");
goto B;
}
puts("2");
if (v2) {
B:
puts("3");
}
return 0;
if (a && b)
puts("1");
puts("2");
if (b)
puts("3");
return 0;
Why the goto?
15. fmt: Your Favorite Built In Text Formatter
• A part of Coreutils (comes on nearly every UNIX system)
• Changes text to meet specific formats (like indents)
• Takes flags to do different formats
• Fairly simple (~1000 lines)
52. Compiler Optimization Summary
• You can find optimization locations with gotos (It’s not 100% lossy!)
• Not all optimizations are the same level of destructive
• There is a reasonably limited amount that causes destruction
• Solution: identify and reverse the most impactful!
54. fmt: The Evil Switch
int main (int argc, char **argv) {
// ...
switch (optchar) {
default:
// ...
case 'c':
// ...
case 's':
// ...
case 't':
// ...
case 'u':
// ...
case 'w':
// ...
case 'g':
// ...
case 'p':
// ...
case_GETOPT_HELP_CHAR;
// ...
case_GETOPT_VERSION_CHAR(...);
// ...
}
// ...
}
int __cdecl main(int argc, const char **argv, const char
**envp) {
// ...
if ( v8 == 112 )
{
// ...
}
else if ( v8 <= 112 )
{
if ( v8 == -130 )
// ...
if ( v8 <= -130 )
{
if ( v8 == -131 )
{
// ...
}
LABEL_53:
// ...
}
if ( v8 == 99 )
{
// ...
}
else
{
if ( v8 != 103 )
goto LABEL_53;
// ...
}
}
else if ( v8 == 116 )
{
// ...
}
else if ( v8 <= 116 )
{
if ( v8 != 115 )
goto LABEL_53;
// ...
}
else if ( v8 == 117 )
{
// ...
}
else
{
if ( v8 != 119 )
55. The Evil Switch: Switch Lowering Optimization
int main (int argc, char **argv) {
// ...
switch (optchar) {
default:
// ...
case 'c':
// ...
case 's':
// ...
case 't':
// ...
case 'u':
// ...
case 'w':
// ...
case 'g':
// ...
case 'p':
// ...
case_GETOPT_HELP_CHAR;
// ...
case_GETOPT_VERSION_CHAR(...);
// ...
}
// ...
}
56. Switch Lowering Optimization (simplified)
• Minimizes the number of jumps for a non-consecutive Switch
• Converts what would be many tables into an if-tree
• Has existed since the early 2000s
• Can’t be disabled in GCC (exists in O0)
57. The Switch Lowering Pattern
int main (int argc, char **argv) {
// ...
switch (optchar) {
default:
// ...
case 'c':
// ...
case 's':
// ...
case 't':
// ...
case 'u':
// ...
case 'w':
// ...
case 'g':
// ...
case 'p':
// ...
case_GETOPT_HELP_CHAR;
// ...
case_GETOPT_VERSION_CHAR(...);
// ...
}
// ...
}
CODE
CODE
CODE
v1
v1 != N
v1 < M
goto*
59. The Mythical Calls: Jump Threading
int main (int argc, char **argv) {
// ...
if (max_width_option)
{
max_width = xdectoumax(...);
}
if (goal_width_option)
{
goal_width = xdectoumax(...);
if (max_width_option == NULL)
max_width = goal_width + 10;
}
else
{
goal_width = max_width * (2 *
(100 - LEEWAY) + 1) / 200;
}
// ...
}
60. Jump Threading (Simplified)
if (v1 && v2) {
puts("1");
puts("2");
goto B;
}
puts("2");
if (v2) {
B:
puts("3");
}
return 0;
if (a && b)
puts("1");
puts("2");
if (b)
puts("3");
return 0;
Overlapping Conditions
61. Jump Threading (Simplified)
if (v1 && v2) {
puts("1");
puts("2");
goto B;
}
puts("2");
if (v2) {
B:
puts("3");
}
return 0;
if (a && b)
puts("1");
puts("2");
if (b)
puts("3");
return 0;
Overlapping Conditions Copy Overlapped Code
Jump To Overlapping Condition
62. The Mythical Calls: Jump Threading
int main (int argc, char **argv) {
// ...
if (max_width_option)
{
max_width = xdectoumax(...);
}
if (goal_width_option)
{
goal_width = xdectoumax(...);
if (max_width_option == NULL)
max_width = goal_width + 10;
}
else
{
goal_width = max_width * (2 *
(100 - LEEWAY) + 1) / 200;
}
// ...
}
Overlapping condition checks
63. Jump Threading (Simplified)
• Minimizes the number of conditional jumps when overlapping conditions
are detected
• Enabled in O2
• Can duplicate around 15 code statements before stopping
68. All Deoptimizations
• We created reverse algorithms for all optimization classes
• We tested on GCC, Clang, MSVC, and some malware
• Algorithms and code open-source:
• https://github.com/mahaloz/sailr-eval
• “Ahoy SAILR! There is No Need to DREAM of C: A Compiler-Aware
Structuring Algorithm for Binary Decompilation”, USENIX 2024
69. Benefiting from This Talk (1): REBEL! DEMAND BETTER!
• Modern decompilers (IDA, Binja, Ghidra) don’t give API access to
structuring choices… Ask for it!
• Large developers/funders of decompilers ignore ”interactive” or “tunable”
decompilers… Demand it!
• Alternative: maybe instruction-level rewriting?
70. Benefiting from This Talk (2): Try angr decompiler!
• User angr decompiler (GUI): https://github.com/angr/angr-management,
where we implemented every algorithm
• Open-source, command line, GUI, hackable
• Limitations: made by wacky academics, slower (python), hackable
sometimes means crashable
71. Thank You for Listening
Zion Leonahenahe Basque
zbasque@asu.edu
@mahal0z
pip install angr
angr decompile /bin/true
angr decompiler