diff --git a/CMakeLists.txt b/CMakeLists.txt index 23c50917a..d0d05dc48 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,12 +1,10 @@ -cmake_minimum_required(VERSION 2.8.5) +cmake_minimum_required(VERSION 3.5) -project(kcc C CXX) +project(kcc C) -set(CMAKE_CXX_FLAGS_DEBUG "-g") set(CMAKE_C_FLAGS_DEBUG "-g") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wextra -pedantic -std=c99 -D_POSIX_C_SOURCE=200809") -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -pedantic -std=c++11") if (NOT DEFINED KCC_DEFAULT_PREPROCESSOR) set(KCC_DEFAULT_PREPROCESSOR "cpp" CACHE PATH "The preprocessor KCC should use if none is explicitly specified") @@ -20,7 +18,8 @@ add_definitions("-DKCC_DEFAULT_PREPROCESSOR=\"${KCC_DEFAULT_PREPROCESSOR}\"") find_package(BISON REQUIRED) find_package(FLEX REQUIRED) -find_package(Boost REQUIRED) + +add_compile_options(-Wno-implicit-function-declaration -Wno-incompatible-pointer-types -Wno-int-conversion) if (KCC_DOCUMENTATION) find_program(SCDOC NAMES scdoc REQUIRED) @@ -40,8 +39,8 @@ BISON_TARGET(SDCCy ${PROJECT_SOURCE_DIR}/src/SDCC.y ${PROJECT_BINARY_DIR}/gen/SD FLEX_TARGET(SDCClex ${PROJECT_SOURCE_DIR}/src/SDCC.lex ${PROJECT_BINARY_DIR}/gen/SDCClex.c) FILE(GLOB SOURCES - ${PROJECT_SOURCE_DIR}/src/*.c ${PROJECT_SOURCE_DIR}/src/*.cc - ${PROJECT_SOURCE_DIR}/src/backend/*.c ${PROJECT_SOURCE_DIR}/src/backend/*.cc + ${PROJECT_SOURCE_DIR}/src/*.c + ${PROJECT_SOURCE_DIR}/src/backend/*.c ${PROJECT_SOURCE_DIR}/src/util/*.c ${PROJECT_SOURCE_DIR}/src/backend/*.rul ) @@ -77,6 +76,6 @@ if (KCC_DOCUMENTATION) install(FILES ${PROJECT_BINARY_DIR}/kcc.1 DESTINATION share/man/man1) endif() -target_link_libraries(kcc m ${BOOST_LIBRARIES} ${LIBS}) +target_link_libraries(kcc m ${LIBS}) install(TARGETS kcc DESTINATION bin) diff --git a/src/KCCCleanup.c b/src/KCCCleanup.c new file mode 100644 index 000000000..cfc60fd0c --- /dev/null +++ b/src/KCCCleanup.c @@ -0,0 +1,230 @@ +#include "KCCCleanup.h" + +#include "SDCCglobl.h" + +#include +#include +#include +#include + +extern struct options options; + +static const char *allowed_commands[] = { + ".equ", ".globl", ".area", ".org", ".map", ".db", ".ds", + ".dw", ".ascii", ".asciip", ".asciiz", ".if", ".ifdef", ".ifndef", + ".elif", ".elseif", ".end", ".module", ".optsdcc", ".function"}; + +static const size_t allowed_commands_count = + sizeof(allowed_commands) / sizeof(allowed_commands[0]); + +/* Check if a character is whitespace (space or tab), matching the original. */ +static int is_ws(char c) { return c == ' ' || c == '\t'; } + +/* Trim leading/trailing spaces and tabs, and collapse internal runs of + * whitespace into a single space. Returns a malloc'd string (possibly empty). + * Caller frees. */ +static char *reduce_line(const char *src) { + size_t len = strlen(src); + /* find first non-ws */ + size_t begin = 0; + while (begin < len && is_ws(src[begin])) + begin++; + if (begin == len) { + char *out = (char *)malloc(1); + if (out) + out[0] = '\0'; + return out; + } + /* find last non-ws */ + size_t end = len; + while (end > begin && is_ws(src[end - 1])) + end--; + + /* Worst case: no collapsing needed, output size = end - begin + 1 */ + char *out = (char *)malloc(end - begin + 1); + if (!out) + return NULL; + size_t oi = 0; + size_t i = begin; + while (i < end) { + if (is_ws(src[i])) { + out[oi++] = ' '; + while (i < end && is_ws(src[i])) + i++; + } else { + out[oi++] = src[i++]; + } + } + out[oi] = '\0'; + return out; +} + +/* Read a line from fp. Returns malloc'd string without trailing newline, or + * NULL on EOF (with nothing read). Caller frees. */ +static char *read_line(FILE *fp) { + size_t cap = 128; + size_t len = 0; + char *buf = (char *)malloc(cap); + if (!buf) + return NULL; + int c; + int got_any = 0; + while ((c = fgetc(fp)) != EOF) { + got_any = 1; + if (c == '\n') + break; + if (len + 1 >= cap) { + size_t new_cap = cap * 2; + char *nb = (char *)realloc(buf, new_cap); + if (!nb) { + free(buf); + return NULL; + } + buf = nb; + cap = new_cap; + } + buf[len++] = (char)c; + } + if (!got_any) { + free(buf); + return NULL; + } + buf[len] = '\0'; + return buf; +} + +static int is_allowed_command(const char *s) { + for (size_t i = 0; i < allowed_commands_count; i++) { + if (strcmp(s, allowed_commands[i]) == 0) + return 1; + } + return 0; +} + +void cleanupFile(const char *file) { + FILE *asm_file = fopen(file, "r"); + if (!asm_file) { + printf("Error: file \"%s\" doesn't exist!\n", file); + return; + } + + /* Dynamic array of char* for buffered lines. */ + size_t buf_cap = 128; + size_t buf_len = 0; + char **buffer = (char **)malloc(buf_cap * sizeof(char *)); + if (!buffer) { + fclose(asm_file); + return; + } + + int dirty = 0; + + /* Keep the first 4 lines verbatim. */ + for (int i = 0; i < 4; i++) { + char *l = read_line(asm_file); + if (l == NULL) { + /* Fewer than 4 lines; push empty string to mirror std::getline + * behavior of leaving o_line empty on EOF. */ + l = (char *)malloc(1); + if (l) + l[0] = '\0'; + } + if (buf_len == buf_cap) { + buf_cap *= 2; + char **nb = (char **)realloc(buffer, buf_cap * sizeof(char *)); + if (!nb) { + free(l); + goto cleanup; + } + buffer = nb; + } + buffer[buf_len++] = l ? l : (char *)calloc(1, 1); + } + + /* Process the rest. */ + char *o_line; + while ((o_line = read_line(asm_file)) != NULL) { + char *line = reduce_line(o_line); + if (!line) { + free(o_line); + continue; + } + /* token = substring before first space */ + char *space = strchr(line, ' '); + if (space) + *space = '\0'; + + int keep = 0; + size_t line_len = strlen(line); + + if (line[0] != ';') { + int cond = 0; + if (line[0] != '.') { + cond = 1; + } else if (line_len > 0 && line[line_len - 1] == ':') { + cond = 1; + } else if (is_allowed_command(line)) { + cond = 1; + } + + if (cond) { + /* If previous buffered line starts with "\t.area" and this o_line + * also starts with "\t.area", drop the previous buffered one. */ + if (buf_len != 0) { + const char *last_line = buffer[buf_len - 1]; + size_t last_len = strlen(last_line); + size_t o_len = strlen(o_line); + if (last_len > 6 && strncmp(last_line, "\t.area", 6) == 0 && + o_len > 6 && strncmp(o_line, "\t.area", 6) == 0) { + free(buffer[buf_len - 1]); + buf_len--; + } + } + keep = 1; + } else if (options.verbose) { + fprintf(stderr, "Discarded: %s\n", o_line); + } + } + + free(line); + + if (keep) { + if (buf_len == buf_cap) { + buf_cap *= 2; + char **nb = (char **)realloc(buffer, buf_cap * sizeof(char *)); + if (!nb) { + free(o_line); + goto cleanup; + } + buffer = nb; + } + buffer[buf_len++] = o_line; + } else { + free(o_line); + dirty = 1; + } + } + + fclose(asm_file); + asm_file = NULL; + + if (dirty) { + FILE *output = fopen(file, "w"); + if (!output) { + printf("Error opening file for output!\n"); + goto cleanup; + } + for (size_t i = 0; i < buf_len; i++) { + fputs(buffer[i], output); + fputc('\n', output); + } + fclose(output); + } + +cleanup: + if (asm_file) + fclose(asm_file); + for (size_t i = 0; i < buf_len; i++) + free(buffer[i]); + free(buffer); +} diff --git a/src/KCCCleanup.cc b/src/KCCCleanup.cc deleted file mode 100644 index e85760b30..000000000 --- a/src/KCCCleanup.cc +++ /dev/null @@ -1,107 +0,0 @@ -#include "KCCCleanup.h" - -#include "SDCCglobl.h" -#include -#include -#include -#include -#include -#include - -extern struct options options; - -std::vector allowed_commands = { - ".equ", ".globl", ".area", ".org", ".map", ".db", ".ds", - ".dw", ".ascii", ".asciip", ".asciiz", ".if", ".ifdef", ".ifndef", - ".elif", ".elseif", ".end", ".module", ".optsdcc", ".function"}; - -std::string trim(const std::string &str, - const std::string &whitespace = " \t") { - const auto strBegin = str.find_first_not_of(whitespace); - if (strBegin == std::string::npos) - return ""; // no content - - const auto strEnd = str.find_last_not_of(whitespace); - const auto strRange = strEnd - strBegin + 1; - - return str.substr(strBegin, strRange); -} - -std::string reduce(const std::string &str, const std::string &fill = " ", - const std::string &whitespace = " \t") { - // trim first - auto result = trim(str, whitespace); - - // replace sub ranges - auto beginSpace = result.find_first_of(whitespace); - while (beginSpace != std::string::npos) { - const auto endSpace = result.find_first_not_of(whitespace, beginSpace); - const auto range = endSpace - beginSpace; - - result.replace(beginSpace, range, fill); - - const auto newStart = beginSpace + fill.length(); - beginSpace = result.find_first_of(whitespace, newStart); - } - - return result; -} - -void cleanupFile(const char *file) { - std::ifstream asm_file(file); - if (!asm_file) { - printf("Error: file \"%s\" doesn't exist!\n", file); - return; - } - std::vector buffer; - std::string line, o_line; - bool dirty = false; - // Keep the first five lines - for (int i = 0; i < 4; i++) { - std::getline(asm_file, o_line); - buffer.push_back(o_line); - } - // Process the rest of them, discarding unneeded pieces - for (int line_number = 0; std::getline(asm_file, o_line); line_number++) { - line = reduce(o_line); - size_t space = line.find(' '); - bool keep = false; - if (space != std::string::npos) - line = line.substr(0, space); - if (line[0] != ';') { - if ((line[0] != '.' || line[line.length() - 1] == ':' || - std::find(allowed_commands.begin(), allowed_commands.end(), line) != - allowed_commands.end())) { - if (buffer.size() != 0) { - std::string last_line = buffer.back(); - if (last_line.length() > 6 && last_line.substr(0, 6) == "\t.area" && - o_line.length() > 6 && o_line.substr(0, 6) == "\t.area") { - buffer.pop_back(); - } - } - keep = true; - } else if (options.verbose) { - fprintf(stderr, "Discarded: %s\n", o_line.c_str()); - } - } - if (keep) { - buffer.push_back(o_line); - } else { - //~ printf("Dropping line: %s\n", o_line.c_str()); - dirty = true; - } - } - asm_file.close(); - if (dirty) { - std::ofstream output(file); - if (!output) { - printf("Error opening file for output!\n"); - return; - } else { - for (std::string s : buffer) { - output << s << std::endl; - } - output.close(); - } - } -} diff --git a/src/SDCCbtree.c b/src/SDCCbtree.c new file mode 100644 index 000000000..b3f4d3cbb --- /dev/null +++ b/src/SDCCbtree.c @@ -0,0 +1,254 @@ +/* Philipp Klaus Krause, philipp@informatik.uni-frankfurt.de, pkk@spth.de, 2011 + * + * (c) 2011 Goethe-Universitaet Frankfurt + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Pure C port of the former SDCCbtree.cc. The original used boost's + * adjacency_list, but the graph is always a rooted in-tree (each node + * except the root has exactly one parent set via btree_add_child), so + * we represent it as a flat array of nodes. Node 0 is the root, and + * children are always appended after their parent -- matching boost's + * vecS vertex numbering -- so the "walk the larger id upward" trick + * used by btree_lowest_common_ancestor still works. + */ + +#include +#include + +#include "common.h" +#include "SDCCbtree.h" + +/* A node of the block tree. */ +typedef struct btree_node { + int parent; /* index of parent node; -1 for root */ + struct symbol **syms; /* dynamic array of symbol pointers (the "set") */ + int nsyms; + int syms_cap; + int cssize; /* accumulated subtree size (was btree[v].second) */ + int block; /* original block id (bmaprev[v]) */ +} btree_node_t; + +static btree_node_t *nodes = NULL; +static int nnodes = 0; +static int nodes_cap = 0; + +/* bmap: block id -> node index. Dynamic array indexed by block id; -1 means + * the block is not present. Block ids are small non-negative shorts. */ +static int *bmap = NULL; +static int bmap_cap = 0; + +static int +btree_new_node (int parent, int block) +{ + int idx; + if (nnodes == nodes_cap) + { + int newcap = nodes_cap ? nodes_cap * 2 : 16; + btree_node_t *n = (btree_node_t *) realloc (nodes, (size_t) newcap * sizeof (btree_node_t)); + wassert (n); + nodes = n; + nodes_cap = newcap; + } + idx = nnodes++; + nodes[idx].parent = parent; + nodes[idx].syms = NULL; + nodes[idx].nsyms = 0; + nodes[idx].syms_cap = 0; + nodes[idx].cssize = 0; + nodes[idx].block = block; + return idx; +} + +static void +bmap_ensure (int block) +{ + if (block < bmap_cap) + return; + { + int newcap = bmap_cap ? bmap_cap : 16; + int i; + int *nb; + while (newcap <= block) + newcap *= 2; + nb = (int *) realloc (bmap, (size_t) newcap * sizeof (int)); + wassert (nb); + for (i = bmap_cap; i < newcap; i++) + nb[i] = -1; + bmap = nb; + bmap_cap = newcap; + } +} + +static int +bmap_get (int block) +{ + if (block < 0 || block >= bmap_cap) + return -1; + return bmap[block]; +} + +static void +btree_clear_subtree (int v) +{ + int i; + nodes[v].nsyms = 0; + /* Walk children: any node whose parent is v. */ + for (i = v + 1; i < nnodes; i++) + if (nodes[i].parent == v) + btree_clear_subtree (i); +} + +void +btree_clear (void) +{ + if (nnodes == 0) + return; + btree_clear_subtree (0); +} + +void +btree_add_child (short parent, short child) +{ + int pidx, cidx; + + if (nnodes == 0) + { + (void) btree_new_node (-1, 0); + bmap_ensure (0); + bmap[0] = 0; + } + + wassert (parent != child); + pidx = bmap_get (parent); + wassert (pidx != -1); + + cidx = btree_new_node (pidx, child); + bmap_ensure (child); + bmap[child] = cidx; + + wassert (pidx != cidx); +} + +static int +btree_lowest_common_ancestor_impl (int a, int b) +{ + if (a == b) + return a; + else if (a > b) + a = nodes[a].parent; + else + b = nodes[b].parent; + return btree_lowest_common_ancestor_impl (a, b); +} + +short +btree_lowest_common_ancestor (short a, short b) +{ + int ai = bmap_get (a); + int bi = bmap_get (b); + int anc; + wassert (ai != -1 && bi != -1); + anc = btree_lowest_common_ancestor_impl (ai, bi); + return (short) nodes[anc].block; +} + +void +btree_add_symbol (struct symbol *s) +{ + int block; + int v; + int i; + btree_node_t *n; + + wassert (s); + /* This is essentially a workaround. TODO: Ensure that the parameter + * block is placed correctly in the btree instead! */ + block = s->_isparm ? 0 : s->block; + + v = bmap_get (block); + wassert (v != -1); + wassert (v < nnodes); + n = &nodes[v]; + + /* Set semantics: skip duplicates. */ + for (i = 0; i < n->nsyms; i++) + if (n->syms[i] == s) + return; + + if (n->nsyms == n->syms_cap) + { + int newcap = n->syms_cap ? n->syms_cap * 2 : 4; + struct symbol **ns = (struct symbol **) realloc (n->syms, (size_t) newcap * sizeof (struct symbol *)); + wassert (ns); + n->syms = ns; + n->syms_cap = newcap; + } + n->syms[n->nsyms++] = s; +} + +static void +btree_alloc_subtree (int v, int sPtr, int cssize, int *ssize) +{ + int i; + btree_node_t *n; + + wassert (v < nnodes); + n = &nodes[v]; + + for (i = 0; i < n->nsyms; i++) + { + struct symbol *const sym = n->syms[i]; + const int size = getSize (sym->type); + + if (port->stack.direction > 0) + { + SPEC_STAK (sym->etype) = sym->stack = (sPtr + 1); + sPtr += size; + } + else + { + sPtr -= size; + SPEC_STAK (sym->etype) = sym->stack = sPtr; + } + + cssize += size; + } + nodes[v].cssize = cssize; + if (cssize > *ssize) + *ssize = cssize; + + /* Recurse into children. */ + for (i = v + 1; i < nnodes; i++) + if (nodes[i].parent == v) + btree_alloc_subtree (i, sPtr, cssize, ssize); +} + +void +btree_alloc (void) +{ + int ssize = 0; + + if (nnodes == 0) + return; + + btree_alloc_subtree (0, 0, 0, &ssize); + + if (currFunc) + { + currFunc->stack += ssize; + SPEC_STAK (currFunc->etype) += ssize; + } +} diff --git a/src/SDCCbtree.cc b/src/SDCCbtree.cc deleted file mode 100644 index 7c75244bd..000000000 --- a/src/SDCCbtree.cc +++ /dev/null @@ -1,169 +0,0 @@ -// Philipp Klaus Krause, philipp@informatik.uni-frankfurt.de, pkk@spth.de, 2011 -// -// (c) 2011 Goethe-Universität Frankfurt -// -// This program is free software; you can redistribute it and/or modify it -// under the terms of the GNU General Public License as published by the -// Free Software Foundation; either version 2, or (at your option) any -// later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - -#include -#include - -#include - -#include "common.h" - -#ifdef HAVE_STX_BTREE_MAP_H -#include -#endif - -extern "C" { -#include "SDCCbtree.h" -} - -#undef BTREE_DEBUG - -typedef boost::adjacency_list, int>> - btree_t; -#ifdef HAVE_STX_BTREE_MAP_H -typedef stx::btree_map bmap_t; -typedef stx::btree_map bmaprev_t; -#else -typedef std::map bmap_t; -typedef std::map bmaprev_t; -#endif - -static btree_t btree; -static bmap_t bmap; -static bmaprev_t bmaprev; - -void btree_clear_subtree(btree_t::vertex_descriptor v) { - btree[v].first.clear(); - - boost::graph_traits::out_edge_iterator e, e_end; - for (boost::tie(e, e_end) = boost::out_edges(v, btree); e != e_end; ++e) - btree_clear_subtree(boost::target(*e, btree)); -} - -void btree_clear(void) { -#ifdef BTREE_DEBUG - std::cout << "Clearing.\n"; - std::cout.flush(); -#endif - btree_clear_subtree(0); -} - -void btree_add_child(short parent, short child) { -#ifdef BTREE_DEBUG - std::cout << "Adding child " << child << " at parent " << parent << "\n"; - std::cout.flush(); -#endif - - if (!boost::num_vertices(btree)) { - boost::add_vertex(btree); - bmap[0] = 0; - bmaprev[0] = 0; - } - - wassert(parent != child); - wassert(bmap.find(parent) != bmap.end()); - - btree_t::vertex_descriptor c = boost::add_vertex(btree); - bmap[child] = c; - bmaprev[c] = child; - - wassert(bmap[parent] != c); - - boost::add_edge(bmap[parent], c, btree); -} - -static btree_t::vertex_descriptor -btree_lowest_common_ancestor_impl(btree_t::vertex_descriptor a, - btree_t::vertex_descriptor b) { - if (a == b) - return (a); - else if (a > b) - a = boost::source(*boost::in_edges(a, btree).first, btree); - else // (a < b) - b = boost::source(*boost::in_edges(b, btree).first, btree); - - return (btree_lowest_common_ancestor(a, b)); -} - -short btree_lowest_common_ancestor(short a, short b) { - return (bmaprev[btree_lowest_common_ancestor_impl(bmap[a], bmap[b])]); -} - -void btree_add_symbol(struct symbol *s) { - int block; - wassert(s); - block = s->_isparm ? 0 : s->block; // This is essentially a workaround. TODO: - // Ensure that the parameter block is - // placed correctly in the btree instead! - -#ifdef BTREE_DEBUG - std::cout << "Adding symbol " << s->name << " at " << block << "\n"; -#endif - - wassert(bmap.find(block) != bmap.end()); - wassert(bmap[block] < boost::num_vertices(btree)); - btree[bmap[block]].first.insert(s); -} - -static void btree_alloc_subtree(btree_t::vertex_descriptor v, int sPtr, - int cssize, int *ssize) { - std::set::iterator s, s_end; - wassert(v < boost::num_vertices(btree)); - for (s = btree[v].first.begin(), s_end = btree[v].first.end(); s != s_end; - ++s) { - struct symbol *const sym = *s; - const int size = getSize(sym->type); - -#ifdef BTREE_DEBUG - std::cout << "Allocating symbol " << sym->name << " (" << v << ") of size " - << size << " to " << sPtr << "\n"; -#endif - - if (port->stack.direction > 0) { - SPEC_STAK(sym->etype) = sym->stack = (sPtr + 1); - sPtr += size; - } else { - sPtr -= size; - SPEC_STAK(sym->etype) = sym->stack = sPtr; - } - - cssize += size; - } - btree[v].second = cssize; - if (cssize > *ssize) - *ssize = cssize; - - boost::graph_traits::out_edge_iterator e, e_end; - for (boost::tie(e, e_end) = boost::out_edges(v, btree); e != e_end; ++e) - btree_alloc_subtree(boost::target(*e, btree), sPtr, cssize, ssize); -} - -void btree_alloc(void) { - int ssize = 0; - - if (!boost::num_vertices(btree)) - return; - - btree_alloc_subtree(0, 0, 0, &ssize); - - if (currFunc) { - currFunc->stack += ssize; - SPEC_STAK(currFunc->etype) += ssize; - } -} diff --git a/src/SDCClospre.c b/src/SDCClospre.c new file mode 100644 index 000000000..b21487e8c --- /dev/null +++ b/src/SDCClospre.c @@ -0,0 +1,1160 @@ +/* Lifetime-optimal speculative partial redundancy elimination. + * + * Original C++ implementation: + * Philipp Klaus Krause, 2012. (c) Goethe-Universitat Frankfurt. + * + * C port of SDCClospre.cc / SDCClospre.hpp. Uses the in-tree + * cgraph_t / uiset_t / tree_dec_t containers in place of Boost. + */ + +#include +#include +#include + +#include "common.h" +#include "SDCCopt.h" + +#include "util/cgraph.h" +#include "util/uiset.h" +#include "SDCCtree_dec.h" + +/* ================================================================= * + * Data structures * + * ================================================================= */ + +/* Per-CFG-vertex bundle. */ +typedef struct { + iCode *ic; + int uses; /* bool */ + int invalidates; /* bool */ + int forward_first; /* pair.first, -1 sentinel */ + int forward_second; /* pair.second, -1 sentinel */ +} cfg_lospre_node_t; + +typedef struct { + cgraph_t g; /* CG_BIDIRECTIONAL, has_weights=1 */ + cfg_lospre_node_t *node; + size_t cap; +} cfg_lospre_t; + +/* Assignment (std::list element). */ +typedef struct assignment_lospre { + float s0; /* calculation costs */ + float s1; /* lifetime costs */ + usset_t local; /* std::set */ + char *global; /* vector, size == num_vertices */ + size_t global_n; + + struct assignment_lospre *prev, *next; +} assignment_lospre_t; + +/* Doubly-linked list of assignments. */ +typedef struct { + assignment_lospre_t *head, *tail; + size_t n; +} alist_t; + +/* Per-tree-dec bag: assignment list lives on the node too. The bag itself + * is held by the underlying tree_dec_t. We mirror assignments in a parallel + * array indexed by tree vertex. */ +typedef struct { + alist_t *assignments; /* size = t->cap, grown alongside tree vertices */ + size_t cap; +} alist_per_vertex_t; + +/* ================================================================= * + * cfg_lospre lifecycle * + * ================================================================= */ + +static void cfg_init(cfg_lospre_t *c) { + cg_init(&c->g, CG_BIDIRECTIONAL, 1); + c->node = NULL; + c->cap = 0; +} + +static void cfg_free(cfg_lospre_t *c) { + cg_free(&c->g); + free(c->node); + c->node = NULL; + c->cap = 0; +} + +static void cfg_grow(cfg_lospre_t *c) { + if (c->g.nvertices < c->cap) return; + size_t nc = c->cap ? c->cap * 2 : 16; + c->node = (cfg_lospre_node_t *)realloc(c->node, nc * sizeof(cfg_lospre_node_t)); + c->cap = nc; +} + +static unsigned int cfg_add_vertex(cfg_lospre_t *c) { + cfg_grow(c); + unsigned int v = cg_add_vertex(&c->g); + c->node[v].ic = NULL; + c->node[v].uses = 0; + c->node[v].invalidates = 0; + c->node[v].forward_first = -1; + c->node[v].forward_second = -1; + return v; +} + +/* ================================================================= * + * alist helpers * + * ================================================================= */ + +static void alist_init(alist_t *l) { l->head = l->tail = NULL; l->n = 0; } + +static assignment_lospre_t *assignment_new_empty(size_t global_n) { + assignment_lospre_t *a = (assignment_lospre_t *)calloc(1, sizeof(*a)); + a->s0 = 0; a->s1 = 0; + usset_init(&a->local); + a->global = (char *)calloc(global_n > 0 ? global_n : 1, 1); + a->global_n = global_n; + return a; +} + +static assignment_lospre_t *assignment_clone(const assignment_lospre_t *src) { + assignment_lospre_t *a = (assignment_lospre_t *)calloc(1, sizeof(*a)); + a->s0 = src->s0; + a->s1 = src->s1; + usset_init(&a->local); + usset_copy(&a->local, &src->local); + a->global_n = src->global_n; + a->global = (char *)malloc(src->global_n > 0 ? src->global_n : 1); + if (src->global_n) memcpy(a->global, src->global, src->global_n); + return a; +} + +static void assignment_free(assignment_lospre_t *a) { + if (!a) return; + usset_free(&a->local); + free(a->global); + free(a); +} + +static void alist_push_back(alist_t *l, assignment_lospre_t *a) { + a->prev = l->tail; + a->next = NULL; + if (l->tail) l->tail->next = a; + else l->head = a; + l->tail = a; + l->n++; +} + +/* Remove node `a` from list, return the successor. Does not free `a`. */ +static assignment_lospre_t *alist_unlink(alist_t *l, assignment_lospre_t *a) { + assignment_lospre_t *nx = a->next; + if (a->prev) a->prev->next = a->next; else l->head = a->next; + if (a->next) a->next->prev = a->prev; else l->tail = a->prev; + a->prev = a->next = NULL; + l->n--; + return nx; +} + +/* Remove+free node; return successor. */ +static assignment_lospre_t *alist_erase(alist_t *l, assignment_lospre_t *a) { + assignment_lospre_t *nx = alist_unlink(l, a); + assignment_free(a); + return nx; +} + +static void alist_clear(alist_t *l) { + assignment_lospre_t *a = l->head; + while (a) { + assignment_lospre_t *nx = a->next; + assignment_free(a); + a = nx; + } + l->head = l->tail = NULL; + l->n = 0; +} + +/* Swap contents of two lists. */ +static void alist_swap(alist_t *a, alist_t *b) { + alist_t tmp = *a; *a = *b; *b = tmp; +} + +/* lexicographic: (local element, then global[elem]). Returns <0,0,>0. */ +static int assignment_cmp(const assignment_lospre_t *a, + const assignment_lospre_t *b) { + size_t ia = 0, ib = 0; + for (;;) { + int aend = (ia >= a->local.n); + int bend = (ib >= b->local.n); + if (aend && bend) return 0; + if (aend) return -1; /* a is "less" */ + if (bend) return 1; + unsigned short ae = a->local.items[ia]; + unsigned short be = b->local.items[ib]; + if (ae != be) return ae < be ? -1 : 1; + int ag = a->global[ae] ? 1 : 0; + int bg = b->global[be] ? 1 : 0; + if (ag != bg) return ag < bg ? -1 : 1; + ia++; ib++; + } +} + +/* `a->s > b->s` in boost::tuple sense: lex by s0 then s1. */ +static int assignment_s_greater(const assignment_lospre_t *a, + const assignment_lospre_t *b) { + if (a->s0 != b->s0) return a->s0 > b->s0; + return a->s1 > b->s1; +} + +/* Equal ignoring list links. */ +static int assignments_locally_same(const assignment_lospre_t *a, + const assignment_lospre_t *b) { + if (!usset_equal(&a->local, &b->local)) return 0; + for (size_t i = 0; i < a->local.n; i++) { + unsigned short idx = a->local.items[i]; + if ((a->global[idx] ? 1 : 0) != (b->global[idx] ? 1 : 0)) return 0; + } + return 1; +} + +/* Sort alist via qsort on an array of pointers, then re-link. */ +static int alist_sort_cmp(const void *x, const void *y) { + const assignment_lospre_t *a = *(const assignment_lospre_t *const *)x; + const assignment_lospre_t *b = *(const assignment_lospre_t *const *)y; + return assignment_cmp(a, b); +} + +static void alist_sort(alist_t *l) { + if (l->n < 2) return; + assignment_lospre_t **arr = + (assignment_lospre_t **)malloc(l->n * sizeof(*arr)); + size_t i = 0; + for (assignment_lospre_t *a = l->head; a; a = a->next) arr[i++] = a; + qsort(arr, l->n, sizeof(*arr), alist_sort_cmp); + + l->head = arr[0]; + l->tail = arr[l->n - 1]; + arr[0]->prev = NULL; + arr[l->n - 1]->next = NULL; + for (size_t k = 0; k + 1 < l->n; k++) { + arr[k]->next = arr[k + 1]; + arr[k + 1]->prev = arr[k]; + } + free(arr); +} + +/* ================================================================= * + * alist_per_vertex helpers * + * ================================================================= */ + +static void apv_init(alist_per_vertex_t *a) { a->assignments = NULL; a->cap = 0; } + +static void apv_ensure(alist_per_vertex_t *a, size_t n) { + if (n <= a->cap) return; + size_t nc = a->cap ? a->cap : 8; + while (nc < n) nc *= 2; + a->assignments = (alist_t *)realloc(a->assignments, nc * sizeof(alist_t)); + for (size_t i = a->cap; i < nc; i++) alist_init(&a->assignments[i]); + a->cap = nc; +} + +static void apv_free(alist_per_vertex_t *a, size_t n_used) { + if (!a->assignments) { a->cap = 0; return; } + for (size_t i = 0; i < n_used && i < a->cap; i++) alist_clear(&a->assignments[i]); + free(a->assignments); + a->assignments = NULL; + a->cap = 0; +} + +/* ================================================================= * + * CFG construction * + * ================================================================= */ + +/* key -> index map (small: linear search is O(N^2) but acceptable; matches + * what SDCCnaddr.cc used with std::map). We build a sorted array for + * binary search. */ +typedef struct { + int key; + unsigned int idx; +} key_pair_t; + +static int key_pair_cmp(const void *a, const void *b) { + int ka = ((const key_pair_t *)a)->key; + int kb = ((const key_pair_t *)b)->key; + return ka < kb ? -1 : (ka > kb ? 1 : 0); +} + +static unsigned int key_lookup(const key_pair_t *arr, size_t n, int key) { + size_t lo = 0, hi = n; + while (lo < hi) { + size_t mid = lo + ((hi - lo) >> 1); + if (arr[mid].key < key) lo = mid + 1; + else hi = mid; + } + /* Original std::map[] would default-construct; we trust the key is present. */ + return arr[lo].idx; +} + +static void create_cfg_lospre(cfg_lospre_t *cfg, iCode *start_ic, ebbIndex *ebbi) { + size_t n = 0; + for (iCode *ic = start_ic; ic; ic = ic->next) n++; + + key_pair_t *k2i = (key_pair_t *)malloc((n ? n : 1) * sizeof(*k2i)); + size_t i = 0; + for (iCode *ic = start_ic; ic; ic = ic->next, i++) { + unsigned int v = cfg_add_vertex(cfg); + cfg->node[v].ic = ic; + k2i[i].key = ic->key; + k2i[i].idx = v; + } + qsort(k2i, n, sizeof(*k2i), key_pair_cmp); + + for (iCode *ic = start_ic; ic; ic = ic->next) { + unsigned int src = key_lookup(k2i, n, ic->key); + + if ((ic->op == '>' || ic->op == '<' || ic->op == LE_OP || + ic->op == GE_OP || ic->op == EQ_OP || ic->op == NE_OP || + ic->op == '^' || ic->op == '|' || ic->op == BITWISEAND) && + ifxForOp(IC_RESULT(ic), ic)) { + cg_add_edge(&cfg->g, src, + key_lookup(k2i, n, ic->next->key), 4.0f); + } else if (ic->op != GOTO && ic->op != RETURN && + ic->op != JUMPTABLE && ic->next) { + cg_add_edge(&cfg->g, src, + key_lookup(k2i, n, ic->next->key), 3.0f); + } + + if (ic->op == GOTO) { + cg_add_edge(&cfg->g, src, + key_lookup(k2i, n, eBBWithEntryLabel(ebbi, ic->label)->sch->key), + 6.0f); + } else if (ic->op == RETURN) { + cg_add_edge(&cfg->g, src, + key_lookup(k2i, n, eBBWithEntryLabel(ebbi, returnLabel)->sch->key), + 6.0f); + } else if (ic->op == IFX) { + symbol *tgt = IC_TRUE(ic) ? IC_TRUE(ic) : IC_FALSE(ic); + cg_add_edge(&cfg->g, src, + key_lookup(k2i, n, eBBWithEntryLabel(ebbi, tgt)->sch->key), + 6.0f); + } else if (ic->op == JUMPTABLE) { + for (symbol *lbl = (symbol *)setFirstItem(IC_JTLABELS(ic)); lbl; + lbl = (symbol *)setNextItem(IC_JTLABELS(ic))) + cg_add_edge(&cfg->g, src, + key_lookup(k2i, n, eBBWithEntryLabel(ebbi, lbl)->sch->key), + 6.0f); + } + } + + free(k2i); +} + +/* ================================================================= * + * Candidate expressions * + * ================================================================= */ + +static int candidate_expression(const iCode *ic, int lkey) { + (void)lkey; + wassert(ic); + + if (ic->op != '!' && ic->op != '~' && ic->op != UNARYMINUS && + ic->op != '+' && ic->op != '-' && ic->op != '*' && ic->op != '/' && + ic->op != '%' && ic->op != '>' && ic->op != '<' && + ic->op != LE_OP && ic->op != GE_OP && ic->op != NE_OP && + ic->op != EQ_OP && ic->op != AND_OP && ic->op != OR_OP && + ic->op != '^' && ic->op != '|' && ic->op != BITWISEAND && + ic->op != RRC && ic->op != RLC && ic->op != GETABIT && + ic->op != GETHBIT && ic->op != LEFT_OP && ic->op != RIGHT_OP && + !(ic->op == '=' && !POINTER_SET(ic) && !(IS_ITEMP(IC_RIGHT(ic)))) && + ic->op != GET_VALUE_AT_ADDRESS && ic->op != CAST) + return 0; + + operand *left = IC_LEFT(ic); + operand *right = IC_RIGHT(ic); + operand *result = IC_RESULT(ic); + + if (ic->op == '=' && IS_OP_LITERAL(right)) + return 0; + + if (IS_OP_VOLATILE(left) || IS_OP_VOLATILE(right)) + return 0; + + if (POINTER_GET(ic) && IS_VOLATILE(operandType(IC_LEFT(ic))->next)) + return 0; + + if ((ic->op != CAST && left && !(IS_SYMOP(left) || IS_OP_LITERAL(left))) || + (right && !(IS_SYMOP(right) || IS_OP_LITERAL(right))) || + (result && !(IS_SYMOP(result) || IS_OP_LITERAL(result)))) + return 0; + + return 1; +} + +static int same_expression(const iCode *lic, const iCode *ric) { + wassert(lic); + wassert(ric); + + if (lic->op != ric->op) return 0; + + operand *lleft = IC_LEFT(lic); + operand *lright = IC_RIGHT(lic); + operand *lresult = IC_RESULT(lic); + operand *rleft = IC_LEFT(ric); + operand *rright = IC_RIGHT(ric); + operand *rresult = IC_RESULT(ric); + + int ops_match = + (isOperandEqual(lleft, rleft) && isOperandEqual(lright, rright)) || + (IS_COMMUTATIVE(lic) && isOperandEqual(lleft, rright) && + isOperandEqual(lright, rleft)); + + if (ops_match && lresult && rresult && + compareTypeInexact(operandType(lresult), operandType(rresult)) > 0) + return 1; + + return 0; +} + +static void get_candidate_set(iset_t *c, const iCode *sic, int lkey) { + for (const iCode *ic = sic; ic; ic = ic->next) { + if (!candidate_expression(ic, lkey)) continue; + for (const iCode *pic = sic; pic != ic; pic = pic->next) { + if (candidate_expression(pic, lkey) && + same_expression(ic, pic) && + !iset_contains(c, pic->key)) { + iset_insert(c, pic->key); + break; + } + } + } +} + +static int setup_cfg_for_expression(cfg_lospre_t *cfg, const iCode *eic) { + operand *eleft = IC_LEFT(eic); + operand *eright = IC_RIGHT(eic); + int uses_global = + (eic->op == GET_VALUE_AT_ADDRESS) || + isOperandGlobal(eleft) || isOperandGlobal(eright) || + (IS_SYMOP(eleft) && OP_SYMBOL_CONST(eleft)->addrtaken) || + (IS_SYMOP(eright) && OP_SYMBOL_CONST(eright)->addrtaken); + int safety_required = 0; + + if (eic->op == CALL || eic->op == PCALL) + safety_required = 1; + if (eic->op == GET_VALUE_AT_ADDRESS && !optimize.lospre_unsafe_read) + safety_required = 1; + if (optimize.codeSpeed) + safety_required = 1; + + size_t nv = cg_num_vertices(&cfg->g); + for (unsigned int i = 0; i < nv; i++) { + iCode *ic = cfg->node[i].ic; + cfg->node[i].uses = same_expression(eic, ic); + cfg->node[i].invalidates = 0; + if (IC_RESULT(ic) && !IS_OP_LITERAL(IC_RESULT(ic)) && !POINTER_SET(ic) && + ((eleft && isOperandEqual(eleft, IC_RESULT(ic))) || + (eright && isOperandEqual(eright, IC_RESULT(ic))))) + cfg->node[i].invalidates = 1; + if (ic->op == FUNCTION || ic->op == ENDFUNCTION || ic->op == RECEIVE) + cfg->node[i].invalidates = 1; + if (uses_global && (ic->op == CALL || ic->op == PCALL)) + cfg->node[i].invalidates = 1; + if (uses_global && POINTER_SET(ic)) + cfg->node[i].invalidates = 1; + + cfg->node[i].forward_first = -1; + cfg->node[i].forward_second = -1; + } + + return safety_required; +} + +/* ================================================================= * + * Graph dump (optional) * + * ================================================================= */ + +static void dump_cfg_lospre(const cfg_lospre_t *cfg) { + if (!currFunc) return; + + size_t nlen = strlen(dstFileName) + strlen(currFunc->rname) + 64; + char *path = (char *)malloc(nlen); + snprintf(path, nlen, "%s.dumplosprecfg%s.dot", dstFileName, currFunc->rname); + FILE *f = fopen(path, "w"); + free(path); + if (!f) return; + + fprintf(f, "digraph G {\n"); + size_t nv = cg_num_vertices(&cfg->g); + for (size_t i = 0; i < nv; i++) { + const char *iLine = printILine(cfg->node[i].ic); + /* Escape embedded quotes/newlines minimally. */ + fprintf(f, " %zu [label=\"%zu, %d : ", i, i, cfg->node[i].ic->key); + for (const char *p = iLine; p && *p; p++) { + if (*p == '"') fputs("\\\"", f); + else if (*p == '\n') fputs("\\l", f); + else fputc(*p, f); + } + fprintf(f, "\"];\n"); + dbuf_free(iLine); + } + for (size_t u = 0; u < nv; u++) { + for (size_t k = 0; k < cfg->g.out[u].n; k++) { + unsigned int v = cfg->g.out[u].dst[k]; + fprintf(f, " %zu -> %u;\n", u, v); + } + } + fprintf(f, "}\n"); + fclose(f); +} + +/* ================================================================= * + * Tree-decomposition DP * + * ================================================================= */ + +/* Return the single child of t in T (exactly one expected). */ +static unsigned int t_only_child(const tree_dec_t *T, unsigned int t) { + return T->g.out[t].dst[0]; +} + +/* Leaf. */ +static void tree_dec_lospre_leaf(alist_per_vertex_t *A, unsigned int t, + const cfg_lospre_t *G) { + apv_ensure(A, t + 1); + alist_clear(&A->assignments[t]); + assignment_lospre_t *a = assignment_new_empty(cg_num_vertices(&G->g)); + alist_push_back(&A->assignments[t], a); +} + +/* Introduce. Returns 0 on success, -1 if we pruned. */ +static int tree_dec_lospre_introduce(alist_per_vertex_t *A, + const tree_dec_t *T, + unsigned int t, + const cfg_lospre_t *G) { + (void)G; + unsigned int c = t_only_child(T, t); + apv_ensure(A, t + 1); + alist_t *alist = &A->assignments[c]; + alist_t *alist2 = &A->assignments[t]; + alist_clear(alist2); + + if (alist->n > (size_t)options.max_allocs_per_node / 2) { + alist_clear(alist); + return -1; + } + + /* new_inst = T[t].bag - T[c].bag (we take the first element). */ + uiset_t new_inst; + uiset_init(&new_inst); + uiset_difference(&T->bag[t], &T->bag[c], &new_inst); + if (new_inst.n == 0) { uiset_free(&new_inst); alist_clear(alist); return 0; } + unsigned short i = (unsigned short)new_inst.items[0]; + uiset_free(&new_inst); + + for (assignment_lospre_t *ai = alist->head; ai; ai = ai->next) { + usset_insert(&ai->local, i); + /* Emit copy with global[i]=false. */ + ai->global[i] = 0; + assignment_lospre_t *c0 = assignment_clone(ai); + alist_push_back(alist2, c0); + /* Emit copy with global[i]=true. */ + ai->global[i] = 1; + assignment_lospre_t *c1 = assignment_clone(ai); + alist_push_back(alist2, c1); + } + alist_clear(alist); + return 0; +} + +/* Collapse: assumes alist is sorted. */ +static void alist_collapse_locally_same(alist_t *alist) { + assignment_lospre_t *ai = alist->head; + while (ai) { + assignment_lospre_t *aif = ai; + assignment_lospre_t *aj = ai->next; + while (aj && assignments_locally_same(aif, aj)) { + if (assignment_s_greater(aif, aj)) { + alist_unlink(alist, aif); + assignment_free(aif); + aif = aj; + aj = aj->next; + } else { + assignment_lospre_t *nx = aj->next; + alist_unlink(alist, aj); + assignment_free(aj); + aj = nx; + } + } + ai = aif->next; + } +} + +/* Forget. */ +static void tree_dec_lospre_forget(alist_per_vertex_t *A, + const tree_dec_t *T, + unsigned int t, + const cfg_lospre_t *G) { + unsigned int c = t_only_child(T, t); + apv_ensure(A, t + 1); + alist_t *alist = &A->assignments[t]; + alist_clear(alist); + alist_swap(alist, &A->assignments[c]); + + uiset_t old_inst; + uiset_init(&old_inst); + uiset_difference(&T->bag[c], &T->bag[t], &old_inst); + if (old_inst.n == 0) { uiset_free(&old_inst); return; } + unsigned short i = (unsigned short)old_inst.items[0]; + uiset_free(&old_inst); + + for (assignment_lospre_t *ai = alist->head; ai; ai = ai->next) { + usset_erase(&ai->local, i); + ai->s1 += ai->global[i] ? 1.0f : 0.0f; + + /* Out-edges: (i -> tgt). */ + size_t idx; unsigned int tgt; float wt; + CG_FOREACH_OUT(&G->g, i, idx, tgt, wt) { + /* ai->local.find(tgt) == end? */ + if (!usset_contains(&ai->local, (unsigned short)tgt)) + continue; + int l = (ai->global[i] && !G->node[i].invalidates) ? 1 : 0; + int r = (ai->global[tgt] || G->node[tgt].uses) ? 1 : 0; + if (l >= r) continue; + ai->s0 += wt; + } + /* In-edges: (src -> i). */ + unsigned int src; + CG_FOREACH_IN(&G->g, i, idx, src, wt) { + if (!usset_contains(&ai->local, (unsigned short)src)) + continue; + int l = (ai->global[src] && !G->node[src].invalidates) ? 1 : 0; + int r = (ai->global[i] || G->node[i].uses) ? 1 : 0; + if (l >= r) continue; + ai->s0 += wt; + } + } + + alist_sort(alist); + alist_collapse_locally_same(alist); + + if (!alist->n) + fprintf(stderr, "No surviving assignments at forget node (lospre).\n"); +} + +/* Join (used by both lospre and safety). */ +static void tree_dec_lospre_join(alist_per_vertex_t *A, + const tree_dec_t *T, + unsigned int t, + const cfg_lospre_t *G) { + (void)G; + /* Two children. */ + unsigned int c2 = T->g.out[t].dst[0]; + unsigned int c3 = T->g.out[t].dst[1]; + + apv_ensure(A, t + 1); + alist_t *alist1 = &A->assignments[t]; + alist_t *alist2 = &A->assignments[c2]; + alist_t *alist3 = &A->assignments[c3]; + alist_clear(alist1); + + alist_sort(alist2); + alist_sort(alist3); + + assignment_lospre_t *ai2 = alist2->head; + assignment_lospre_t *ai3 = alist3->head; + + while (ai2 && ai3) { + if (assignments_locally_same(ai2, ai3)) { + /* Merge: combine costs into ai2, OR global, push copy to alist1. */ + ai2->s0 += ai3->s0; + ai2->s1 += ai3->s1; + size_t N = ai2->global_n; + for (size_t i = 0; i < N; i++) + ai2->global[i] = (ai2->global[i] || ai3->global[i]) ? 1 : 0; + alist_push_back(alist1, assignment_clone(ai2)); + ai2 = ai2->next; + ai3 = ai3->next; + } else { + int cmp = assignment_cmp(ai2, ai3); + if (cmp < 0) ai2 = ai2->next; + else if (cmp > 0) ai3 = ai3->next; + else { + /* Equal by cmp but not locally_same -> advance both to avoid + * looping. The original `continue` loops forever in that + * theoretically-unreachable case; we advance conservatively. */ + ai2 = ai2->next; + ai3 = ai3->next; + } + } + } + + alist_clear(alist2); + alist_clear(alist3); +} + +/* Dispatcher: returns 0 or -1 propagated. */ +static int tree_dec_lospre_nodes(alist_per_vertex_t *A, + const tree_dec_t *T, + unsigned int t, + const cfg_lospre_t *G) { + size_t od = cg_out_degree(&T->g, t); + switch (od) { + case 0: + tree_dec_lospre_leaf(A, t, G); + break; + case 1: { + unsigned int c0 = T->g.out[t].dst[0]; + if (tree_dec_lospre_nodes(A, T, c0, G) < 0) return -1; + if (T->bag[c0].n < T->bag[t].n) { + if (tree_dec_lospre_introduce(A, T, t, G) < 0) return -1; + } else { + tree_dec_lospre_forget(A, T, t, G); + } + break; + } + case 2: { + unsigned int c0 = T->g.out[t].dst[0]; + unsigned int c1 = T->g.out[t].dst[1]; + if (tree_dec_lospre_nodes(A, T, c0, G) < 0) return -1; + if (tree_dec_lospre_nodes(A, T, c1, G) < 0) { + alist_clear(&A->assignments[c0]); + return -1; + } + tree_dec_lospre_join(A, T, t, G); + break; + } + default: + fprintf(stderr, "Not nice.\n"); + break; + } + return 0; +} + +/* Safety forget. */ +static void tree_dec_safety_forget(alist_per_vertex_t *A, + const tree_dec_t *T, + unsigned int t, + const cfg_lospre_t *G) { + unsigned int c = t_only_child(T, t); + apv_ensure(A, t + 1); + alist_t *alist = &A->assignments[t]; + alist_clear(alist); + alist_swap(alist, &A->assignments[c]); + + uiset_t old_inst; + uiset_init(&old_inst); + uiset_difference(&T->bag[c], &T->bag[t], &old_inst); + if (old_inst.n == 0) { uiset_free(&old_inst); return; } + unsigned short i = (unsigned short)old_inst.items[0]; + uiset_free(&old_inst); + + assignment_lospre_t *ai = alist->head; + while (ai) { + usset_erase(&ai->local, i); + + if (!ai->global[i]) { ai = ai->next; continue; } + + if (G->node[i].uses) { + ai = alist_erase(alist, ai); + continue; + } + + ai->s1 -= 1.0f; + + /* At least one successor "ok". */ + int ok = 0; + size_t idx; unsigned int nbr; float wt; + CG_FOREACH_OUT(&G->g, i, idx, nbr, wt) { + if (ai->global[nbr] || G->node[nbr].invalidates) { ok = 1; break; } + } + if (!ok) { ai = alist_erase(alist, ai); continue; } + + /* At least one predecessor "ok". */ + ok = 0; + CG_FOREACH_IN(&G->g, i, idx, nbr, wt) { + if (ai->global[nbr] || G->node[nbr].invalidates) { ok = 1; break; } + } + if (!ok) { ai = alist_erase(alist, ai); continue; } + + ai = ai->next; + } + + alist_sort(alist); + alist_collapse_locally_same(alist); + + if (!alist->n) + fprintf(stderr, "No surviving assignments at forget node.\n"); +} + +/* Safety dispatcher. */ +static int tree_dec_safety_nodes(alist_per_vertex_t *A, + const tree_dec_t *T, + unsigned int t, + const cfg_lospre_t *G) { + size_t od = cg_out_degree(&T->g, t); + switch (od) { + case 0: + tree_dec_lospre_leaf(A, t, G); + break; + case 1: { + unsigned int c0 = T->g.out[t].dst[0]; + if (tree_dec_safety_nodes(A, T, c0, G) < 0) return -1; + if (T->bag[c0].n < T->bag[t].n) { + if (tree_dec_lospre_introduce(A, T, t, G) < 0) return -1; + } else { + tree_dec_safety_forget(A, T, t, G); + } + break; + } + case 2: { + unsigned int c0 = T->g.out[t].dst[0]; + unsigned int c1 = T->g.out[t].dst[1]; + if (T->weight[c0] < T->weight[c1]) { + unsigned int tmp = c0; c0 = c1; c1 = tmp; + } + if (tree_dec_safety_nodes(A, T, c0, G) < 0) return -1; + if (tree_dec_safety_nodes(A, T, c1, G) < 0) { + alist_clear(&A->assignments[c0]); + return -1; + } + tree_dec_lospre_join(A, T, t, G); + break; + } + default: + fprintf(stderr, "Not nice.\n"); + break; + } + return 0; +} + +/* ================================================================= * + * Split edge * + * ================================================================= */ + +static void split_edge(tree_dec_t *T, cfg_lospre_t *G, + unsigned int esrc, unsigned int edst, + float ewt, + const iCode *ic, operand *tmpop) { + /* Insert new iCode into chain. */ + iCode *newic = newiCode(ic->op, IC_LEFT(ic), IC_RIGHT(ic)); + IC_RESULT(newic) = tmpop; + newic->filename = ic->filename; + newic->lineno = ic->lineno; + newic->prev = G->node[esrc].ic; + newic->next = G->node[edst].ic; + G->node[esrc].ic->next = newic; + G->node[edst].ic->prev = newic; + + /* Insert node into cfg. */ + unsigned int n = cfg_add_vertex(G); + G->node[n].ic = newic; + G->node[n].uses = 0; + cg_add_edge(&G->g, esrc, n, ewt); + cg_add_edge(&G->g, n, edst, 3.0f); + + /* Update tree-decomposition: find a bag containing both endpoints and + * attach a new bag {esrc, edst, n} to it. Grow tree state first. */ + size_t n_tv = T->g.nvertices; + for (unsigned int n1 = 0; n1 < n_tv; n1++) { + if (!uiset_contains(&T->bag[n1], esrc)) continue; + if (!uiset_contains(&T->bag[n1], edst)) continue; + unsigned int n2 = tree_dec_add_vertex(T); + uiset_insert(&T->bag[n2], esrc); + uiset_insert(&T->bag[n2], edst); + uiset_insert(&T->bag[n2], n); + cg_add_edge(&T->g, n1, n2, 0.0f); + break; + } + + /* Remove old edge. */ + cg_remove_edge(&G->g, esrc, edst); +} + +/* ================================================================= * + * forward_lospre_assignment * + * ================================================================= */ + +static void forward_lospre_assignment(cfg_lospre_t *G, unsigned int i, + const iCode *ic, + const assignment_lospre_t *a) { + operand *tmpop = IC_RIGHT(ic); + int forward_first = IC_RESULT(ic)->key; + int forward_second = IC_RIGHT(ic)->key; + + for (;;) { + if (G->node[i].forward_first == forward_first && + G->node[i].forward_second == forward_second) + break; /* Already visited. */ + + iCode *nic = G->node[i].ic; + + if (isOperandEqual(IC_RESULT(ic), IC_LEFT(nic)) && + nic->op != ADDRESS_OF && + (!POINTER_GET(nic) || !IS_PTR(operandType(IC_RESULT(nic))) || + !IS_BITFIELD(operandType(IC_LEFT(nic))->next) || + compareType(operandType(IC_LEFT(nic)), operandType(tmpop)) == 1)) { + unsigned int isaddr = IC_LEFT(nic)->isaddr; + IC_LEFT(nic) = operandFromOperand(tmpop); + IC_LEFT(nic)->isaddr = isaddr; + } + if (isOperandEqual(IC_RESULT(ic), IC_RIGHT(nic))) { + IC_RIGHT(nic) = operandFromOperand(tmpop); + } + if (POINTER_SET(nic) && isOperandEqual(IC_RESULT(ic), IC_RESULT(nic)) && + (!IS_PTR(operandType(IC_RESULT(nic))) || + !IS_BITFIELD(operandType(IC_RESULT(nic))->next) || + compareType(operandType(IC_RESULT(nic)), operandType(tmpop)) == 1)) { + IC_RESULT(nic) = operandFromOperand(tmpop); + IC_RESULT(nic)->isaddr = 1; + } + + if (nic->op == LABEL) { + /* Continue only if all in-edges are already forwarded. */ + int all_forwarded = 1; + size_t idx; unsigned int src; float wt; + CG_FOREACH_IN(&G->g, i, idx, src, wt) { + (void)wt; + if (G->node[src].forward_first != forward_first || + G->node[src].forward_second != forward_second) { + all_forwarded = 0; break; + } + } + if (!all_forwarded) break; + } + + if (isOperandEqual(IC_RESULT(ic), IC_RESULT(nic)) && !POINTER_SET(nic)) + break; + if ((nic->op == CALL || nic->op == PCALL || POINTER_SET(nic)) && + IS_TRUE_SYMOP(IC_RESULT(ic))) + break; + + G->node[i].forward_first = forward_first; + G->node[i].forward_second = forward_second; + + if (nic->op == GOTO || nic->op == IFX || nic->op == JUMPTABLE) { + size_t idx; unsigned int cnbr; float wt; + CG_FOREACH_OUT(&G->g, i, idx, cnbr, wt) { + (void)wt; + int l = (a->global[i] && !G->node[i].invalidates) ? 1 : 0; + int r = (a->global[cnbr]) ? 1 : 0; + if (!l && r) continue; /* Calculation edge */ + forward_lospre_assignment(G, cnbr, ic, a); + } + break; + } + + if (G->g.out[i].n == 0) break; + unsigned int cnbr = G->g.out[i].dst[0]; + int l = (a->global[i] && !G->node[i].invalidates) ? 1 : 0; + int r = (a->global[cnbr]) ? 1 : 0; + if (!l && r) break; /* Calculation edge */ + i = cnbr; + } +} + +/* ================================================================= * + * implement_lospre_assignment / implement_safety * + * ================================================================= */ + +typedef struct { + unsigned int src, dst; + float wt; +} edge_rec_t; + +static int implement_lospre_assignment(const assignment_lospre_t *a_in, + tree_dec_t *T, cfg_lospre_t *G, + const iCode *ic) { + /* Clone assignment so it survives tree-dec mutations. */ + assignment_lospre_t *a = assignment_clone(a_in); + + unsigned substituted = 0, split = 0; + + /* Collect calculation edges. */ + edge_rec_t *edges = NULL; + size_t n_edges = 0, cap_edges = 0; + size_t nv = cg_num_vertices(&G->g); + for (unsigned int u = 0; u < nv; u++) { + size_t idx; unsigned int v; float wt; + CG_FOREACH_OUT(&G->g, u, idx, v, wt) { + int l = (a->global[u] && !G->node[u].invalidates) ? 1 : 0; + int r = (a->global[v]) ? 1 : 0; + if (l >= r) continue; + if (n_edges == cap_edges) { + cap_edges = cap_edges ? cap_edges * 2 : 8; + edges = (edge_rec_t *)realloc(edges, cap_edges * sizeof(*edges)); + } + edges[n_edges].src = u; + edges[n_edges].dst = v; + edges[n_edges].wt = wt; + n_edges++; + } + } + + if (!n_edges) { + free(edges); + assignment_free(a); + return 0; + } + + operand *tmpop = newiTempOperand(operandType(IC_RESULT(ic)), TRUE); + tmpop->isvolatile = 0; + + for (size_t k = 0; k < n_edges; k++) { + split_edge(T, G, edges[k].src, edges[k].dst, edges[k].wt, ic, tmpop); + split++; + } + free(edges); + + /* After splitting, `a->global` doesn't cover new vertices (they were + * appended). It still indexes original vertices, which is what we need. */ + nv = cg_num_vertices(&G->g); + for (unsigned int v = 0; v < nv; v++) { + if (!G->node[v].uses) continue; + if (a->global_n <= v) continue; + int has_in = (G->g.in[v].n > 0); + unsigned int esrc = has_in ? G->g.in[v].dst[0] : 0; + + int cond1 = (a->global[v] && !G->node[v].invalidates) ? 1 : 0; + int cond2 = (has_in && esrc < a->global_n && a->global[esrc]) ? 1 : 0; + if (!cond1 && !cond2) continue; + + substituted++; + iCode *iic = G->node[v].ic; + IC_RIGHT(iic) = tmpop; + if (!POINTER_SET(iic)) { + IC_LEFT(iic) = 0; + iic->op = '='; + IC_RESULT(iic) = operandFromOperand(IC_RESULT(iic)); + IC_RESULT(iic)->isaddr = 0; + } + if (IS_OP_VOLATILE(IC_RESULT(iic))) continue; + + if (G->g.out[v].n > 0) { + unsigned int cnbr = G->g.out[v].dst[0]; + forward_lospre_assignment(G, cnbr, iic, a); + } + } + + if (substituted <= 0) { + fprintf(stderr, "Introduced %s, but did not substitute any calculations.\n", + OP_SYMBOL_CONST(tmpop)->name); + assignment_free(a); + return -1; + } + + if (substituted < split) { + fprintf(stdout, + "Introduced %s, but did substitute only %u calculations, " + "while introducing %u.\n", + OP_SYMBOL_CONST(tmpop)->name, substituted, split); + fflush(stdout); + } + + assignment_free(a); + return 1; +} + +static void implement_safety(const assignment_lospre_t *a, cfg_lospre_t *G) { + size_t nv = cg_num_vertices(&G->g); + for (unsigned int v = 0; v < nv; v++) { + if (v < a->global_n) + G->node[v].invalidates |= a->global[v] ? 1 : 0; + } +} + +/* ================================================================= * + * tree_dec_lospre / tree_dec_safety * + * ================================================================= */ + +static int tree_dec_lospre(tree_dec_t *T, cfg_lospre_t *G, const iCode *ic) { + alist_per_vertex_t A; + apv_init(&A); + apv_ensure(&A, T->g.nvertices); + + unsigned int root = tree_dec_find_root(T); + int err = tree_dec_lospre_nodes(&A, T, root, G); + if (err) { + apv_free(&A, T->g.nvertices); + return -1; + } + + /* Grow in case root was mutated. */ + apv_ensure(&A, T->g.nvertices); + + alist_t *rlist = &A.assignments[root]; + wassert(rlist->head != NULL); + assignment_lospre_t *winner = rlist->head; + + int change = implement_lospre_assignment(winner, T, G, ic); + if (change) tree_dec_nicify(T); + alist_clear(rlist); + + apv_free(&A, T->g.nvertices); + return change; +} + +static int tree_dec_safety(tree_dec_t *T, cfg_lospre_t *G, const iCode *ic) { + (void)ic; + alist_per_vertex_t A; + apv_init(&A); + apv_ensure(&A, T->g.nvertices); + + unsigned int root = tree_dec_find_root(T); + int err = tree_dec_safety_nodes(&A, T, root, G); + if (err) { + apv_free(&A, T->g.nvertices); + return -1; + } + + apv_ensure(&A, T->g.nvertices); + alist_t *rlist = &A.assignments[root]; + wassert(rlist->head != NULL); + assignment_lospre_t *winner = rlist->head; + implement_safety(winner, G); + alist_clear(rlist); + + apv_free(&A, T->g.nvertices); + return 0; +} + +/* ================================================================= * + * Public entry point * + * ================================================================= */ + +void lospre(iCode *sic, ebbIndex *ebbi) { + cfg_lospre_t cfg; + tree_dec_t td; + + wassert(sic); + + cfg_init(&cfg); + tree_dec_init(&td); + + create_cfg_lospre(&cfg, sic, ebbi); + + if (options.dump_graphs) + dump_cfg_lospre(&cfg); + + tree_dec_thorup(&td, &cfg.g); + tree_dec_nicify(&td); + + int lkey = operandKey; + + int change = 1; + while (change) { + change = 0; + + iset_t candidates; + iset_init(&candidates); + get_candidate_set(&candidates, sic, lkey); + + for (size_t k = 0; k < candidates.n; k++) { + int ckey = candidates.items[k]; + const iCode *ic; + for (ic = sic; ic && ic->key != ckey; ic = ic->next) ; + if (!ic || !candidate_expression(ic, lkey)) continue; + + int safety = setup_cfg_for_expression(&cfg, ic); + if (safety && tree_dec_safety(&td, &cfg, ic) < 0) continue; + if (tree_dec_lospre(&td, &cfg, ic) > 0) change = 1; + } + + iset_free(&candidates); + } + + tree_dec_free(&td); + cfg_free(&cfg); +} diff --git a/src/SDCClospre.cc b/src/SDCClospre.cc deleted file mode 100644 index ed8ece9e7..000000000 --- a/src/SDCClospre.cc +++ /dev/null @@ -1,321 +0,0 @@ -// Philipp Klaus Krause, philipp@informatik.uni-frankfurt.de, pkk@spth.de, 2012 -// -// (c) 2012 Goethe-Universität Frankfurt -// -// This program is free software; you can redistribute it and/or modify it -// under the terms of the GNU General Public License as published by the -// Free Software Foundation; either version 2, or (at your option) any -// later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -// -// -// Lifetime-optimal speculative partial redundancy elimination. - -// #define DEBUG_LOSPRE // Uncomment to get debug messages while doing lospre. - -#include "SDCClospre.hpp" - -// A quick-and-dirty function to get the CFG from sdcc (a simplified version of -// the function from SDCCralloc.hpp). -void create_cfg_lospre(cfg_lospre_t &cfg, iCode *start_ic, ebbIndex *ebbi) { - iCode *ic; - - std::map key_to_index; - { - int i; - - for (ic = start_ic, i = 0; ic; ic = ic->next, i++) { - boost::add_vertex(cfg); - key_to_index[ic->key] = i; - cfg[i].ic = ic; - } - } - - // Get control flow graph from sdcc. - for (ic = start_ic; ic; ic = ic->next) { - if ((ic->op == '>' || ic->op == '<' || ic->op == LE_OP || ic->op == GE_OP || - ic->op == EQ_OP || ic->op == NE_OP || ic->op == '^' || ic->op == '|' || - ic->op == BITWISEAND) && - ifxForOp(IC_RESULT(ic), ic)) - boost::add_edge(key_to_index[ic->key], key_to_index[ic->next->key], 4.0f, - cfg); // Try not to separate op from ifx. - else if (ic->op != GOTO && ic->op != RETURN && ic->op != JUMPTABLE && - ic->next) - boost::add_edge(key_to_index[ic->key], key_to_index[ic->next->key], 3.0f, - cfg); - - if (ic->op == GOTO) - boost::add_edge( - key_to_index[ic->key], - key_to_index[eBBWithEntryLabel(ebbi, ic->label)->sch->key], 6.0f, - cfg); - else if (ic->op == RETURN) - boost::add_edge( - key_to_index[ic->key], - key_to_index[eBBWithEntryLabel(ebbi, returnLabel)->sch->key], 6.0f, - cfg); - else if (ic->op == IFX) - boost::add_edge( - key_to_index[ic->key], - key_to_index[eBBWithEntryLabel(ebbi, IC_TRUE(ic) ? IC_TRUE(ic) - : IC_FALSE(ic)) - ->sch->key], - 6.0f, cfg); - else if (ic->op == JUMPTABLE) - for (symbol *lbl = (symbol *)(setFirstItem(IC_JTLABELS(ic))); lbl; - lbl = (symbol *)(setNextItem(IC_JTLABELS(ic)))) - boost::add_edge(key_to_index[ic->key], - key_to_index[eBBWithEntryLabel(ebbi, lbl)->sch->key], - 6.0f, cfg); - } -} - -static bool candidate_expression(const iCode *const ic, int lkey) { - wassert(ic); - - if (ic->op != '!' && ic->op != '~' && ic->op != UNARYMINUS && ic->op != '+' && - ic->op != '-' && ic->op != '*' && ic->op != '/' && ic->op != '%' && - ic->op != '>' && ic->op != '<' && ic->op != LE_OP && ic->op != GE_OP && - ic->op != NE_OP && ic->op != EQ_OP && ic->op != AND_OP && - ic->op != OR_OP && ic->op != '^' && ic->op != '|' && - ic->op != BITWISEAND && ic->op != RRC && ic->op != RLC && - ic->op != GETABIT && ic->op != GETHBIT && ic->op != LEFT_OP && - ic->op != RIGHT_OP && - !(ic->op == '=' && !POINTER_SET(ic) && - !(IS_ITEMP(IC_RIGHT(ic)) /*&& IC_RIGHT(ic)->key > lkey*/)) && - ic->op != GET_VALUE_AT_ADDRESS && ic->op != CAST) - return (false); - - const operand *const left = IC_LEFT(ic); - const operand *const right = IC_RIGHT(ic); - const operand *const result = IC_RESULT(ic); - - // Todo: Allow literal right operand once backends can rematerialize literals! - if (ic->op == '=' && IS_OP_LITERAL(right)) - return (false); - - if (IS_OP_VOLATILE(left) || IS_OP_VOLATILE(right)) - return (false); - - if (POINTER_GET(ic) && IS_VOLATILE(operandType(IC_LEFT(ic))->next)) - return (false); - - // Todo: Allow more operands! - if (ic->op != CAST && left && !(IS_SYMOP(left) || IS_OP_LITERAL(left)) || - right && !(IS_SYMOP(right) || IS_OP_LITERAL(right)) || - result && !(IS_SYMOP(result) || IS_OP_LITERAL(result))) - return (false); - - return (true); -} - -static bool same_expression(const iCode *const lic, const iCode *const ric) { - wassert(lic); - wassert(ric); - - if (lic->op != ric->op) - return (false); - - const operand *lleft = IC_LEFT(lic); - const operand *lright = IC_RIGHT(lic); - const operand *lresult = IC_RESULT(lic); - const operand *rleft = IC_LEFT(ric); - const operand *rright = IC_RIGHT(ric); - const operand *rresult = IC_RESULT(ric); - - if ((isOperandEqual(lleft, rleft) && isOperandEqual(lright, rright) || - IS_COMMUTATIVE(lic) && isOperandEqual(lleft, rright) && - isOperandEqual(lright, rleft)) && - (lresult && rresult && - compareTypeInexact(operandType(lresult), operandType(rresult)) > 0)) - return (true); - - return (false); -} - -static void get_candidate_set(std::set *c, const iCode *const sic, - int lkey) { - // TODO: For loop invariant code motion allow expression that only occurs - // once, too - will be needed when optimizing for speed. - for (const iCode *ic = sic; ic; ic = ic->next) { - if (!candidate_expression(ic, lkey)) - continue; - for (const iCode *pic = sic; pic != ic; pic = pic->next) - if (candidate_expression(pic, lkey) && same_expression(ic, pic) && - c->find(pic->key) == c->end()) { - // Found expression that occurs at least twice. - c->insert(pic->key); - break; - } - } -} - -static bool setup_cfg_for_expression(cfg_lospre_t *const cfg, - const iCode *const eic) { - typedef boost::graph_traits::vertex_descriptor vertex_t; - const operand *const eleft = IC_LEFT(eic); - const operand *const eright = IC_RIGHT(eic); - const bool uses_global = - (eic->op == GET_VALUE_AT_ADDRESS || isOperandGlobal(eleft) || - isOperandGlobal(eright) || - IS_SYMOP(eleft) && OP_SYMBOL_CONST(eleft)->addrtaken || - IS_SYMOP(eright) && OP_SYMBOL_CONST(eright)->addrtaken); - bool safety_required = false; - - // In redundancy elimination, safety means not doing a computation on any path - // were it was not done before. This is important, if the compuation can have - // side-effects, which depends on the target architecure. E.g. On some systems - // division requires safety, since division by zero might result in an - // interrupt. When there are memory-mapped devices or there is memory - // management, reading from a pointer requires safety, since reading from an - // unknown location could result in making the device do something or in a - // SIGSEGV. On the other hand, addition is something that typically does not - // require safety, since adding two undefined operands gives just another - // undefined (the C standard allows trap representations, which, could result - // in addition requiring safety though; AFAIK none of the targets currently - // supported by sdcc have trap representations). Philipp, 2012-07-06. - // - // For now we just always require safety for "dangerous" operations. - // - // TODO: Replace the current one by a more exact mechanism, that takes into - // account information from (not yet implemented) generalized constant - // propagation, pointer analysis, etc. - - // Function calls can have any side effects. - if (eic->op == CALL || eic->op == PCALL) - safety_required = true; - - // Reading from an invalid address might be dangerous, since there could be - // memory-mapped I/O. - if (eic->op == GET_VALUE_AT_ADDRESS && !optimize.lospre_unsafe_read) - safety_required = true; - - // TODO: Relax this! There are cases where allowing unsafe optimizations will - // improve speed. This probably needs implementation of profile-guided - // optimization though. - if (optimize.codeSpeed) - safety_required = true; - - for (vertex_t i = 0; i < boost::num_vertices(*cfg); i++) { - const iCode *const ic = (*cfg)[i].ic; - (*cfg)[i].uses = same_expression(eic, ic); - (*cfg)[i].invalidates = false; - if (IC_RESULT(ic) && !IS_OP_LITERAL(IC_RESULT(ic)) && !POINTER_SET(ic) && - (eleft && isOperandEqual(eleft, IC_RESULT(ic)) || - eright && isOperandEqual(eright, IC_RESULT(ic)))) - (*cfg)[i].invalidates = true; - if (ic->op == FUNCTION || ic->op == ENDFUNCTION || ic->op == RECEIVE) - (*cfg)[i].invalidates = true; - if (uses_global && (ic->op == CALL || ic->op == PCALL)) - (*cfg)[i].invalidates = true; - if (uses_global && POINTER_SET(ic)) // TODO: More accuracy here! - (*cfg)[i].invalidates = true; - - (*cfg)[i].forward = std::pair(-1, -1); - } - - return (safety_required); -} - -// Dump cfg, with numbered nodes. -void dump_cfg_lospre(const cfg_lospre_t &cfg) { - if (!currFunc) - return; - - std::ofstream dump_file( - (std::string(dstFileName) + ".dumplosprecfg" + currFunc->rname + ".dot") - .c_str()); - - std::string *name = new std::string[num_vertices(cfg)]; - for (unsigned int i = 0; i < boost::num_vertices(cfg); i++) { - const char *iLine = printILine(cfg[i].ic); - std::ostringstream os; - os << i << ", " << cfg[i].ic->key << " : " << iLine; - dbuf_free(iLine); - name[i] = os.str(); - } - boost::write_graphviz(dump_file, cfg, boost::make_label_writer(name)); - delete[] name; -} - -#if 0 -// Dump tree decomposition. -static void dump_tree_decomposition(const tree_dec_lospre_t &tree_dec) -{ - std::ofstream dump_file((std::string(dstFileName) + ".dumplospredec" + currFunc->rname + ".dot").c_str()); - - unsigned int w = 0; - - std::string *name = new std::string[num_vertices(tree_dec)]; - for (unsigned int i = 0; i < boost::num_vertices(tree_dec); i++) - { - if (tree_dec[i].bag.size() > w) - w = tree_dec[i].bag.size(); - std::ostringstream os; - std::set::const_iterator v1; - os << i << " | "; - for (v1 = tree_dec[i].bag.begin(); v1 != tree_dec[i].bag.end(); ++v1) - os << *v1 << " "; - name[i] = os.str(); - } - boost::write_graphviz(dump_file, tree_dec, boost::make_label_writer(name)); - delete[] name; -} -#endif - -void lospre(iCode *sic, ebbIndex *ebbi) { - cfg_lospre_t control_flow_graph; - tree_dec_lospre_t tree_decomposition; - - wassert(sic); - -#ifdef DEBUG_LOSPRE - if (currFunc) - std::cout << "lospre for " << currFunc->rname << "()\n"; -#endif - - create_cfg_lospre(control_flow_graph, sic, ebbi); - - if (options.dump_graphs) - dump_cfg_lospre(control_flow_graph); - - thorup_tree_decomposition(tree_decomposition, control_flow_graph); - nicify(tree_decomposition); - - int lkey = operandKey; - - for (bool change = true; change;) { - change = false; - - std::set candidate_set; - get_candidate_set(&candidate_set, sic, lkey); - - std::set::iterator ci, ci_end; - for (ci = candidate_set.begin(), ci_end = candidate_set.end(); ci != ci_end; - ++ci) { - const iCode *ic; - for (ic = sic; ic && ic->key != *ci; ic = ic->next) - ; - - if (!ic || !candidate_expression(ic, lkey)) - continue; - - bool safety = setup_cfg_for_expression(&control_flow_graph, ic); - - if (safety && - tree_dec_safety(tree_decomposition, control_flow_graph, ic) < 0) - continue; - - change |= - (tree_dec_lospre(tree_decomposition, control_flow_graph, ic) > 0); - } - } -} diff --git a/src/SDCClospre.hpp b/src/SDCClospre.hpp deleted file mode 100644 index a145e7a64..000000000 --- a/src/SDCClospre.hpp +++ /dev/null @@ -1,821 +0,0 @@ -// Philipp Klaus Krause, philipp@informatik.uni-frankfurt.de, pkk@spth.de, 2012 -// -// (c) 2012 Goethe-Universität Frankfurt -// -// This program is free software; you can redistribute it and/or modify it -// under the terms of the GNU General Public License as published by the -// Free Software Foundation; either version 2, or (at your option) any -// later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -// -// -// Lifetime-optimal speculative partial redundancy elimination. - -#include -#include -#include - -#include "SDCCtree_dec.hpp" - -extern "C" { -#include "SDCCBBlock.h" -#include "SDCCasm.h" -#include "SDCCgen.h" -#include "SDCCicode.h" -#include "SDCCopt.h" -#include "SDCCsymt.h" -#include "SDCCy.h" -#include "port.h" -} - -#ifdef HAVE_STX_BTREE_SET_H -#include -#endif - -#if 0 // def HAVE_STX_BTREE_SET_H -typedef stx::btree_set lospreset_t; // Faster than std::set -#else -typedef std::set lospreset_t; -#endif - -struct assignment_lospre { - boost::tuple - s; // First entry: Calculation costs, second entry: Lifetime costs. - lospreset_t local; - std::vector global; - - bool operator<(const assignment_lospre &a) const { - lospreset_t::const_iterator i, ai, i_end, ai_end; - - i_end = local.end(); - ai_end = a.local.end(); - - for (i = local.begin(), ai = a.local.begin();; ++i, ++ai) { - if (i == i_end) - return (true); - if (ai == ai_end) - return (false); - - if (*i < *ai) - return (true); - if (*i > *ai) - return (false); - - if (global[*i] < a.global[*ai]) - return (true); - if (global[*i] > a.global[*ai]) - return (false); - } - } -}; - -bool assignments_lospre_locally_same(const assignment_lospre &a1, - const assignment_lospre &a2) { - if (a1.local != a2.local) - return (false); - - lospreset_t::const_iterator i, i_end; - for (i = a1.local.begin(), i_end = a1.local.end(); i != i_end; ++i) - if (a1.global[*i] != a2.global[*i]) - return (false); - - return (true); -} - -struct cfg_lospre_node { - iCode *ic; - - bool uses; - bool invalidates; - - std::pair forward; -}; - -typedef std::list assignment_list_lospre_t; - -struct tree_dec_lospre_node { - std::set bag; - assignment_list_lospre_t assignments; - unsigned weight; // The weight is the number of nodes at which intermediate - // results need to be remembered. In general, to minimize - // memory consumption, at join nodes the child with maximum - // weight should be processed first. -}; - -typedef boost::adjacency_list - cfg_lospre_t; // The edge property is the cost of subdividing the edge and - // inserting an instruction (for now we always use 1, - // optimizing for code size, but relative execution frequency - // could be used when optimizing for speed or total energy - // consumption; aggregates thereof can be a good idea as - // well). -typedef boost::adjacency_list - tree_dec_lospre_t; - -#if 1 -void print_assignment(const assignment_lospre &a, cfg_lospre_t G) { - wassert(a.global.size() == boost::num_vertices(G)); - for (unsigned int i = 0; i < boost::num_vertices(G); i++) - std::cout << "(" << i << ", " << a.global[i] << "),"; - std::cout << "\n"; - std::cout << "Cost: " << a.s << "\nLocal:"; - for (lospreset_t::const_iterator i = a.local.begin(); i != a.local.end(); ++i) - std::cout << *i << " "; - std::cout << "\n"; - std::cout.flush(); -} -#endif - -// Handle Leaf nodes in the nice tree decomposition -template -void tree_dec_lospre_leaf( - T_t &T, typename boost::graph_traits::vertex_descriptor t, - const G_t &G) { - assignment_lospre a; - assignment_list_lospre_t &alist = T[t].assignments; - - a.s.get<0>() = 0; - a.s.get<1>() = 0; - a.global.resize(boost::num_vertices(G)); - alist.push_back(a); -} - -// Handle introduce nodes in the nice tree decomposition -template -int tree_dec_lospre_introduce( - T_t &T, typename boost::graph_traits::vertex_descriptor t, - const G_t &G) { - typedef - typename boost::graph_traits::adjacency_iterator adjacency_iter_t; - adjacency_iter_t c, c_end; - assignment_list_lospre_t::iterator ai; - boost::tie(c, c_end) = adjacent_vertices(t, T); - - assignment_list_lospre_t &alist2 = T[t].assignments; - assignment_list_lospre_t &alist = T[*c].assignments; - - if (alist.size() > size_t(options.max_allocs_per_node) / 2) { - alist.clear(); - return (-1); - } - - std::set new_inst; - std::set_difference(T[t].bag.begin(), T[t].bag.end(), T[*c].bag.begin(), - T[*c].bag.end(), std::inserter(new_inst, new_inst.end())); - unsigned short int i = *(new_inst.begin()); - - for (ai = alist.begin(); ai != alist.end(); ++ai) { - ai->local.insert(i); - ai->global[i] = false; - alist2.push_back(*ai); - ai->global[i] = true; - alist2.push_back(*ai); - } - - alist.clear(); - - return (0); -} - -// Handle forget nodes in the nice tree decomposition -template -void tree_dec_lospre_forget( - T_t &T, typename boost::graph_traits::vertex_descriptor t, - const G_t &G) { - typedef - typename boost::graph_traits::adjacency_iterator adjacency_iter_t; - adjacency_iter_t c, c_end; - boost::tie(c, c_end) = adjacent_vertices(t, T); - - assignment_list_lospre_t &alist = T[t].assignments; - - std::swap(alist, T[*c].assignments); - - std::set old_inst; - std::set_difference(T[*c].bag.begin(), T[*c].bag.end(), T[t].bag.begin(), - T[t].bag.end(), std::inserter(old_inst, old_inst.end())); - unsigned short int i = *(old_inst.begin()); - - assignment_list_lospre_t::iterator ai, aif; - - for (ai = alist.begin(); ai != alist.end(); ++ai) { - ai->local.erase(i); - ai->s.get<1>() += ai->global[i]; // Add lifetime cost. - { - typedef typename boost::graph_traits::out_edge_iterator - n_iter_t; - n_iter_t n, n_end; - for (boost::tie(n, n_end) = boost::out_edges(i, G); n != n_end; ++n) { - if (ai->local.find(boost::target(*n, G)) == ai->local.end() || - (ai->global[i] && !G[i].invalidates) >= - (ai->global[boost::target(*n, G)] || - G[boost::target(*n, G)].uses)) - continue; - - ai->s.get<0>() += G[*n]; // Add calculation cost. - } - } - { - typedef - typename boost::graph_traits::in_edge_iterator n_iter_t; - n_iter_t n, n_end; - for (boost::tie(n, n_end) = boost::in_edges(i, G); n != n_end; ++n) { - if (ai->local.find(boost::source(*n, G)) == ai->local.end() || - (ai->global[boost::source(*n, G)] && - !G[boost::source(*n, G)].invalidates) >= - (ai->global[i] || G[i].uses)) - continue; - - ai->s.get<0>() += G[*n]; // Add calculation cost. - } - } - } - - alist.sort(); - - // Collapse (locally) identical assignments. - for (ai = alist.begin(); ai != alist.end();) { - aif = ai; - - for (++ai; - ai != alist.end() && assignments_lospre_locally_same(*aif, *ai);) { - if (aif->s > ai->s) { - alist.erase(aif); - aif = ai; - ++ai; - } else { - alist.erase(ai); - ai = aif; - ++ai; - } - } - } - - if (!alist.size()) - std::cerr << "No surviving assignments at forget node (lospre).\n"; -} - -// Handle join nodes in the nice tree decomposition -template -void tree_dec_lospre_join( - T_t &T, typename boost::graph_traits::vertex_descriptor t, - const G_t &G) { - typedef - typename boost::graph_traits::adjacency_iterator adjacency_iter_t; - adjacency_iter_t c, c_end, c2, c3; - boost::tie(c, c_end) = adjacent_vertices(t, T); - - c2 = c; - ++c; - c3 = c; - - assignment_list_lospre_t &alist1 = T[t].assignments; - assignment_list_lospre_t &alist2 = T[*c2].assignments; - assignment_list_lospre_t &alist3 = T[*c3].assignments; - - alist2.sort(); - alist3.sort(); - - assignment_list_lospre_t::iterator ai2, ai3; - for (ai2 = alist2.begin(), ai3 = alist3.begin(); - ai2 != alist2.end() && ai3 != alist3.end();) { - if (assignments_lospre_locally_same(*ai2, *ai3)) { - ai2->s.get<0>() += ai3->s.get<0>(); - ai2->s.get<1>() += ai3->s.get<1>(); - for (size_t i = 0; i < ai2->global.size(); i++) - ai2->global[i] = (ai2->global[i] || ai3->global[i]); - alist1.push_back(*ai2); - - ++ai2; - ++ai3; - } else if (*ai2 < *ai3) { - ++ai2; - continue; - } else if (*ai3 < *ai2) { - ++ai3; - continue; - } - } - - alist2.clear(); - alist3.clear(); -} - -template -int tree_dec_lospre_nodes( - T_t &T, typename boost::graph_traits::vertex_descriptor t, - const G_t &G) { - typedef - typename boost::graph_traits::adjacency_iterator adjacency_iter_t; - - adjacency_iter_t c, c_end; - typename boost::graph_traits::vertex_descriptor c0, c1; - - boost::tie(c, c_end) = adjacent_vertices(t, T); - - switch (out_degree(t, T)) { - case 0: - tree_dec_lospre_leaf(T, t, G); - break; - case 1: - c0 = *c; - if (tree_dec_lospre_nodes(T, c0, G) < 0) - return (-1); - if (T[c0].bag.size() < T[t].bag.size()) { - if (tree_dec_lospre_introduce(T, t, G)) - return (-1); - } else - tree_dec_lospre_forget(T, t, G); - break; - case 2: - c0 = *c++; - c1 = *c; - if (tree_dec_lospre_nodes(T, c0, G) < 0) - return (-1); - if (tree_dec_lospre_nodes(T, c1, G) < 0) { - T[c0].assignments.clear(); - return (-1); - } - tree_dec_lospre_join(T, t, G); - break; - default: - std::cerr << "Not nice.\n"; - break; - } - return (0); -} - -template -void tree_dec_safety_forget( - T_t &T, typename boost::graph_traits::vertex_descriptor t, - const G_t &G) { - typedef - typename boost::graph_traits::adjacency_iterator adjacency_iter_t; - adjacency_iter_t c, c_end; - boost::tie(c, c_end) = adjacent_vertices(t, T); - - assignment_list_lospre_t &alist = T[t].assignments; - - std::swap(alist, T[*c].assignments); - - std::set old_inst; - std::set_difference(T[*c].bag.begin(), T[*c].bag.end(), T[t].bag.begin(), - T[t].bag.end(), std::inserter(old_inst, old_inst.end())); - unsigned short int i = *(old_inst.begin()); - - assignment_list_lospre_t::iterator ai, aif; - - for (ai = alist.begin(); ai != alist.end();) { - ai->local.erase(i); - - if (!ai->global[i]) { - ++ai; - continue; - } - - // Since we want the union of all paths between invalidating nodes without - // uses, by definition there may not be a use in it. - if (G[i].uses) { - ai = alist.erase(ai); - continue; - } - - ai->s.get<1>() -= 1; // Maximize the subsets: Find all paths - - // At least one successor needs to be in the path or invalid. - { - typedef typename boost::graph_traits::out_edge_iterator - n_iter_t; - n_iter_t n, n_end; - bool ok; - - for (ok = false, boost::tie(n, n_end) = boost::out_edges(i, G); - !ok && n != n_end; ++n) - if (ai->global[boost::target(*n, G)] || - G[boost::target(*n, G)].invalidates) - ok = true; - - if (!ok) { - ai = alist.erase(ai); - continue; - } - } - // At least one predecessor needs to be in the path or invalid. - { - typedef - typename boost::graph_traits::in_edge_iterator n_iter_t; - n_iter_t n, n_end; - bool ok; - - for (ok = false, boost::tie(n, n_end) = boost::in_edges(i, G); - !ok && n != n_end; ++n) - if (ai->global[boost::source(*n, G)] || - G[boost::source(*n, G)].invalidates) - ok = true; - - if (!ok) { - ai = alist.erase(ai); - continue; - } - } - - ++ai; - } - - alist.sort(); - - // Collapse (locally) identical assignments. - for (ai = alist.begin(); ai != alist.end();) { - aif = ai; - - for (++ai; - ai != alist.end() && assignments_lospre_locally_same(*aif, *ai);) { - if (aif->s > ai->s) { - alist.erase(aif); - aif = ai; - ++ai; - } else { - alist.erase(ai); - ai = aif; - ++ai; - } - } - } - - if (!alist.size()) - std::cerr << "No surviving assignments at forget node.\n"; -} - -template -int tree_dec_safety_nodes( - T_t &T, typename boost::graph_traits::vertex_descriptor t, - const G_t &G) { - typedef - typename boost::graph_traits::adjacency_iterator adjacency_iter_t; - - adjacency_iter_t c, c_end; - typename boost::graph_traits::vertex_descriptor c0, c1; - - boost::tie(c, c_end) = adjacent_vertices(t, T); - - switch (out_degree(t, T)) { - case 0: - tree_dec_lospre_leaf(T, t, G); - break; - case 1: - c0 = *c; - if (tree_dec_safety_nodes(T, c0, G) < 0) - return (-1); - if (T[c0].bag.size() < T[t].bag.size()) { - if (tree_dec_lospre_introduce(T, t, G)) - return (-1); - } else - tree_dec_safety_forget(T, t, G); - break; - case 2: - c0 = *c++; - c1 = *c; - - if (T[c0].weight < T[c1].weight) // Minimize memory consumption. - std::swap(c0, c1); - - if (tree_dec_safety_nodes(T, c0, G) < 0) - return (-1); - if (tree_dec_safety_nodes(T, c1, G) < 0) { - T[c0].assignments.clear(); - return (-1); - } - tree_dec_lospre_join(T, t, G); - break; - default: - std::cerr << "Not nice.\n"; - break; - } - return (0); -} - -template -static void split_edge(T_t &T, G_t &G, - typename boost::graph_traits::edge_descriptor e, - const iCode *ic, operand *tmpop) { - // Insert new iCode into chain. - iCode *newic = newiCode(ic->op, IC_LEFT(ic), IC_RIGHT(ic)); - IC_RESULT(newic) = tmpop; - newic->filename = ic->filename; - newic->lineno = ic->lineno; - newic->prev = G[boost::source(e, G)].ic; - newic->next = G[boost::target(e, G)].ic; - G[boost::source(e, G)].ic->next = newic; - G[boost::target(e, G)].ic->prev = newic; - - // if (ic->op != ADDRESS_OF && IC_LEFT (ic) && IS_ITEMP (IC_LEFT (ic))) - // bitVectSetBit (OP_SYMBOL (IC_LEFT (ic))->uses, ic->key); - // if (IC_RIGHT (ic) && IS_ITEMP (IC_RIGHT (ic))) - // bitVectSetBit (OP_SYMBOL (IC_RIGHT (ic))->uses, ic->key); - // bitVectSetBit (OP_SYMBOL (IC_RESULT (ic))->defs, ic->key); - - // Insert node into cfg. - typename boost::graph_traits::vertex_descriptor n = boost::add_vertex(G); - // TODO: Exact cost. - G[n].ic = newic; - G[n].uses = false; - boost::add_edge(boost::source(e, G), n, G[e], G); - boost::add_edge(n, boost::target(e, G), 3.0, G); - -#ifdef DEBUG_LOSPRE - std::cout << "Calculating " << OP_SYMBOL_CONST(tmpop)->name << " at ic " - << newic->key << "\n"; -#endif - - // Update tree-decomposition. - // TODO: More efficiently. - for (typename boost::graph_traits::vertex_descriptor n1 = 0; - n1 < boost::num_vertices(T); ++n1) { - if (T[n1].bag.find(boost::source(e, G)) == T[n1].bag.end()) - continue; - if (T[n1].bag.find(boost::target(e, G)) == T[n1].bag.end()) - continue; - // Found bag that contains both endpoints of original edge. - - // Add new tree node with bag there. Let nicify() sort things out later. - typename boost::graph_traits::vertex_descriptor n2 = - boost::add_vertex(T); - T[n2].bag.insert(boost::source(e, G)); - T[n2].bag.insert(boost::target(e, G)); - T[n2].bag.insert(n); - boost::add_edge(n1, n2, T); - break; - } - - // Remove old edge from cfg. - boost::remove_edge(e, G); -} - -template -static void forward_lospre_assignment( - G_t &G, typename boost::graph_traits::vertex_descriptor i, - const iCode *ic, const assignment_lospre &a) { - typedef - typename boost::graph_traits::adjacency_iterator adjacency_iter_t; - - adjacency_iter_t c, c_end; - - operand *tmpop = IC_RIGHT(ic); - const std::pair forward(IC_RESULT(ic)->key, IC_RIGHT(ic)->key); - - for (;;) { - if (G[i].forward == forward) - break; // Was here before. - - iCode *nic = G[i].ic; - - if (isOperandEqual(IC_RESULT(ic), IC_LEFT(nic)) && nic->op != ADDRESS_OF && - (!POINTER_GET(nic) || !IS_PTR(operandType(IC_RESULT(nic))) || - !IS_BITFIELD(operandType(IC_LEFT(nic))->next) || - compareType(operandType(IC_LEFT(nic)), operandType(tmpop)) == 1)) { - bool isaddr = IC_LEFT(nic)->isaddr; -#ifdef DEBUG_LOSPRE - std::cout << "Forward substituted left operand " - << OP_SYMBOL_CONST(IC_LEFT(nic))->name << " at " << nic->key - << "\n"; -#endif - // bitVectUnSetBit (OP_SYMBOL (IC_LEFT (nic))->uses, nic->key); - IC_LEFT(nic) = operandFromOperand(tmpop); - // bitVectSetBit (OP_SYMBOL (IC_LEFT (nic))->uses, nic->key); - IC_LEFT(nic)->isaddr = isaddr; - } - if (isOperandEqual(IC_RESULT(ic), IC_RIGHT(nic))) { -#ifdef DEBUG_LOSPRE - std::cout << "Forward substituted right operand " - << OP_SYMBOL_CONST(IC_RIGHT(nic))->name << " at " << nic->key - << "\n"; -#endif - // bitVectUnSetBit (OP_SYMBOL (IC_RIGHT (nic))->uses, nic->key); - IC_RIGHT(nic) = operandFromOperand(tmpop); - // bitVectSetBit (OP_SYMBOL (IC_RIGHT (nic))->uses, nic->key); - } - if (POINTER_SET(nic) && isOperandEqual(IC_RESULT(ic), IC_RESULT(nic)) && - (!IS_PTR(operandType(IC_RESULT(nic))) || - !IS_BITFIELD(operandType(IC_RESULT(nic))->next) || - compareType(operandType(IC_RESULT(nic)), operandType(tmpop)) == 1)) { -#ifdef DEBUG_LOSPRE - std::cout << "Forward substituted result operand " - << OP_SYMBOL_CONST(IC_RESULT(nic))->name << " at " << nic->key - << "\n"; -#endif - // bitVectUnSetBit (OP_SYMBOL (IC_RESULT (nic))->uses, nic->key); - IC_RESULT(nic) = operandFromOperand(tmpop); - IC_RESULT(nic)->isaddr = true; - // bitVectSetBit (OP_SYMBOL (IC_RESULT (nic))->uses, nic->key); - } - - if (nic->op == LABEL) // Reached label. Continue only if all edges goining - // here are safe. - { - typedef - typename boost::graph_traits::in_edge_iterator in_edge_iter_t; - in_edge_iter_t e, e_end; - for (boost::tie(e, e_end) = boost::in_edges(i, G); e != e_end; ++e) - if (G[boost::source(*e, G)].forward != forward) - break; - if (e != e_end) - break; - } - if (isOperandEqual(IC_RESULT(ic), IC_RESULT(nic)) && - !POINTER_SET(nic) /*|| G[i].uses*/) - break; - if ((nic->op == CALL || nic->op == PCALL || POINTER_SET(nic)) && - IS_TRUE_SYMOP(IC_RESULT(ic))) - break; - - G[i].forward = forward; - - if (nic->op == GOTO || nic->op == IFX || nic->op == JUMPTABLE) { - adjacency_iter_t c, c_end; - for (boost::tie(c, c_end) = boost::adjacent_vertices(i, G); c != c_end; - ++c) { - if (!((a.global[i] & true) && !G[i].invalidates) && - (a.global[*c] & true)) // Calculation edge - continue; - forward_lospre_assignment(G, *c, ic, a); - } - break; - } - - boost::tie(c, c_end) = adjacent_vertices(i, G); - if (c == c_end) - break; - if (!((a.global[i] & true) && !G[i].invalidates) && - (a.global[*c] & true)) // Calculation edge - break; - i = *c; - } -} - -template -static int implement_lospre_assignment( - const assignment_lospre a, T_t &T, G_t &G, - const iCode *ic) // Assignment has to be passed as a copy (not reference), - // since the transformations on the tree-decomposition will - // invalidate it otherwise. -{ - operand *tmpop; - unsigned substituted = 0, split = 0; - - typedef typename boost::graph_traits::edge_iterator edge_iter_t; - typedef typename boost::graph_traits::edge_descriptor edge_desc_t; - std::set - calculation_edges; // Use descriptor, not iterator due to possible - // invalidation of iterators when inserting vertices or - // edges. - edge_iter_t e, e_end; - for (boost::tie(e, e_end) = boost::edges(G); e != e_end; ++e) - if (!((a.global[boost::source(*e, G)] & true) && - !G[boost::source(*e, G)].invalidates) && - (a.global[boost::target(*e, G)] & true)) - calculation_edges.insert(*e); - - if (!calculation_edges.size()) - return (0); - -#ifdef DEBUG_LOSPRE - std::cout << "Optimizing at " << ic->key << "\n"; - std::cout.flush(); -#endif - - tmpop = newiTempOperand(operandType(IC_RESULT(ic)), TRUE); - tmpop->isvolatile = false; -#ifdef DEBUG_LOSPRE - std::cout << "New tmpop: " << OP_SYMBOL_CONST(tmpop)->name << " "; - printTypeChain(operandType(IC_RESULT(ic)), stdout); - std::cout << "\n"; -#endif - - for (typename std::set::iterator i = calculation_edges.begin(); - i != calculation_edges.end(); ++i) { - split_edge(T, G, *i, ic, tmpop); - split++; - } - - typedef typename boost::graph_traits::vertex_iterator vertex_iter_t; - vertex_iter_t v, v_end; - - for (boost::tie(v, v_end) = boost::vertices(G); v != v_end; ++v) { - if (!G[*v].uses) - continue; - typename boost::graph_traits::in_edge_iterator e = - in_edges(*v, G).first; - if (a.global.size() <= *v) - continue; - if (!((a.global[*v] & true) && !G[*v].invalidates || - boost::source(*e, G) < a.global.size() && - (a.global[boost::source(*e, G)] & true))) - continue; -#ifdef DEBUG_LOSPRE - std::cout << "Substituting ic " << G[*v].ic->key << "\n"; -#endif - substituted++; - - iCode *ic = G[*v].ic; - // if (IC_LEFT (ic) && IS_ITEMP (IC_LEFT (ic))) - // bitVectUnSetBit (OP_SYMBOL (IC_LEFT (ic))->uses, ic->key); - // if (IC_RIGHT (ic) && IS_ITEMP (IC_RIGHT (ic))) - // bitVectUnSetBit (OP_SYMBOL (IC_RIGHT (ic))->uses, ic->key); - IC_RIGHT(ic) = tmpop; - // bitVectSetBit (OP_SYMBOL (IC_RIGHT(ic))->uses, ic->key); - if (!POINTER_SET(ic)) { - IC_LEFT(ic) = 0; - ic->op = '='; - IC_RESULT(ic) = operandFromOperand(IC_RESULT(ic)); - IC_RESULT(ic)->isaddr = 0; - } - if (IS_OP_VOLATILE(IC_RESULT(ic))) - continue; - - { - typedef typename boost::graph_traits::adjacency_iterator - adjacency_iter_t; - adjacency_iter_t c, c_end; - boost::tie(c, c_end) = adjacent_vertices(*v, G); - if (c != c_end) - forward_lospre_assignment(G, *c, ic, a); - } - } - - if (substituted <= 0) { - std::cerr << "Introduced " << OP_SYMBOL_CONST(tmpop)->name - << ", but did not substitute any calculations.\n"; - return (-1); - } - - if (substituted < split) // Todo: Remove this warning when optimization for - // speed instead of code size is implemented! - std::cout << "Introduced " << OP_SYMBOL_CONST(tmpop)->name - << ", but did substitute only " << substituted - << " calculations, while introducing " << split << ".\n"; - std::cout.flush(); - - return (1); -} - -/* Using a template here confuses debugging tools such as valgrind. */ -/*template */ -static int tree_dec_lospre(tree_dec_lospre_t /*T_t*/ &T, - cfg_lospre_t /*G_t*/ &G, const iCode *ic) { - if (tree_dec_lospre_nodes(T, find_root(T), G)) - return (-1); - - wassert(T[find_root(T)].assignments.begin() != - T[find_root(T)].assignments.end()); - const assignment_lospre &winner = *(T[find_root(T)].assignments.begin()); - - // std::cout << "Winner (lospre): "; - // print_assignment(winner, G); - - int change; - if (change = implement_lospre_assignment(winner, T, G, ic)) - nicify(T); - T[find_root(T)].assignments.clear(); - return (change); -} - -template -static void implement_safety(const assignment_lospre &a, G_t &G) { - typedef typename boost::graph_traits::vertex_iterator vertex_iter_t; - vertex_iter_t v, v_end; - - for (boost::tie(v, v_end) = boost::vertices(G); v != v_end; ++v) - G[*v].invalidates |= a.global[*v]; -} - -/* Using a template here confuses debugging tools such as valgrind. */ -/*template */ -static int tree_dec_safety(tree_dec_lospre_t /*T_t*/ &T, - cfg_lospre_t /*G_t*/ &G, const iCode *ic) { - if (tree_dec_safety_nodes(T, find_root(T), G)) - return (-1); - - wassert(T[find_root(T)].assignments.begin() != - T[find_root(T)].assignments.end()); - const assignment_lospre &winner = *(T[find_root(T)].assignments.begin()); - - implement_safety(winner, G); - -#ifdef DEBUG_LOSPRE - std::cout << "Winner (safety): "; - print_assignment(winner, G); -#endif - - T[find_root(T)].assignments.clear(); - return (0); -} diff --git a/src/SDCCnaddr.c b/src/SDCCnaddr.c new file mode 100644 index 000000000..44fe963a5 --- /dev/null +++ b/src/SDCCnaddr.c @@ -0,0 +1,831 @@ +/* SDCCnaddr.c — pure-C port of SDCCnaddr.cc + SDCCnaddr.hpp. + * + * Optimal placement of bank switching instructions for named address spaces. + * + * See: + * Philipp Klaus Krause, "Optimal Placement of Bank Selection Instructions in + * Polynomial Time", M-SCOPES '13, pp. 23-30. + * + * This file reimplements the algorithm using our in-tree C graph library + * (util/cgraph.h), the sorted-array sets (util/uiset.h), and the C tree + * decomposition (SDCCtree_dec.h). The public entry point is + * switchAddressSpacesOptimally(), declared in SDCCopt.h. + */ + +#include +#include +#include + +#include "common.h" +#include "SDCCopt.h" + +#include "SDCCtree_dec.h" +#include "util/cgraph.h" +#include "util/uiset.h" + +/* Named address spaces. -1 means "undefined". Others index into the addrspaces + * map built by annotate_cfg_naddr(). */ +typedef short naddrspace_t; + +/* ---------- CFG (control flow graph) ---------- */ + +typedef struct { + iCode *ic; + usset_t possible_naddrspaces; /* set of sign-encoded naddrspace_t */ +} cfg_naddr_node_t; + +typedef struct { + cgraph_t g; /* CG_BIDIRECTIONAL, has_weights=1 */ + cfg_naddr_node_t *node; + size_t cap; +} cfg_naddr_t; + +static void cfg_init(cfg_naddr_t *c) { + cg_init(&c->g, CG_BIDIRECTIONAL, 1); + c->node = NULL; + c->cap = 0; +} + +static void cfg_free(cfg_naddr_t *c) { + size_t v; + for (v = 0; v < c->g.nvertices; v++) + usset_free(&c->node[v].possible_naddrspaces); + free(c->node); + c->node = NULL; + c->cap = 0; + cg_free(&c->g); +} + +static unsigned int cfg_add_vertex(cfg_naddr_t *c) { + if (c->g.nvertices >= c->cap) { + size_t nc = c->cap ? c->cap * 2 : 16; + c->node = (cfg_naddr_node_t *)realloc(c->node, nc * sizeof(*c->node)); + c->cap = nc; + } + unsigned int v = cg_add_vertex(&c->g); + c->node[v].ic = NULL; + usset_init(&c->node[v].possible_naddrspaces); + return v; +} + +/* ---------- Key -> vertex-index map (linear for simplicity; CFG is small) ---------- */ + +typedef struct { + int *keys; + unsigned int *idxs; + size_t n, cap; +} key_map_t; + +static void km_init(key_map_t *m) { + m->keys = NULL; m->idxs = NULL; m->n = 0; m->cap = 0; +} + +static void km_free(key_map_t *m) { + free(m->keys); free(m->idxs); + m->keys = NULL; m->idxs = NULL; m->n = 0; m->cap = 0; +} + +static void km_put(key_map_t *m, int key, unsigned int idx) { + if (m->n == m->cap) { + size_t nc = m->cap ? m->cap * 2 : 32; + m->keys = (int *)realloc(m->keys, nc * sizeof(int)); + m->idxs = (unsigned int *)realloc(m->idxs, nc * sizeof(unsigned int)); + m->cap = nc; + } + m->keys[m->n] = key; + m->idxs[m->n] = idx; + m->n++; +} + +static unsigned int km_get(const key_map_t *m, int key) { + size_t i; + for (i = 0; i < m->n; i++) + if (m->keys[i] == key) return m->idxs[i]; + return (unsigned int)-1; /* shouldn't happen on well-formed CFGs */ +} + +/* ---------- addrspaces map: naddrspace_t -> const symbol * ---------- */ + +typedef struct { + naddrspace_t *keys; + const symbol **vals; + size_t n, cap; +} ns_map_t; + +static void ns_init(ns_map_t *m) { + m->keys = NULL; m->vals = NULL; m->n = 0; m->cap = 0; +} + +static void ns_free(ns_map_t *m) { + free(m->keys); free(m->vals); + m->keys = NULL; m->vals = NULL; m->n = 0; m->cap = 0; +} + +static void ns_put(ns_map_t *m, naddrspace_t k, const symbol *v) { + size_t i; + for (i = 0; i < m->n; i++) { + if (m->keys[i] == k) { m->vals[i] = v; return; } + } + if (m->n == m->cap) { + size_t nc = m->cap ? m->cap * 2 : 8; + m->keys = (naddrspace_t *)realloc(m->keys, nc * sizeof(naddrspace_t)); + m->vals = (const symbol **)realloc(m->vals, nc * sizeof(const symbol *)); + m->cap = nc; + } + m->keys[m->n] = k; + m->vals[m->n] = v; + m->n++; +} + +static const symbol *ns_get(const ns_map_t *m, naddrspace_t k) { + size_t i; + for (i = 0; i < m->n; i++) + if (m->keys[i] == k) return m->vals[i]; + return NULL; +} + +/* ---------- symbol -> naddrspace_t map for annotate_cfg_naddr ---------- */ + +typedef struct { + const symbol **syms; + naddrspace_t *nas; + size_t n, cap; +} sym_idx_map_t; + +static void sim_init(sym_idx_map_t *m) { + m->syms = NULL; m->nas = NULL; m->n = 0; m->cap = 0; +} + +static void sim_free(sym_idx_map_t *m) { + free(m->syms); free(m->nas); + m->syms = NULL; m->nas = NULL; m->n = 0; m->cap = 0; +} + +/* Returns existing index or inserts with new value and returns it. */ +static int sim_find(const sym_idx_map_t *m, const symbol *sym, naddrspace_t *out) { + size_t i; + for (i = 0; i < m->n; i++) { + if (m->syms[i] == sym) { *out = m->nas[i]; return 1; } + } + return 0; +} + +static void sim_put(sym_idx_map_t *m, const symbol *sym, naddrspace_t na) { + if (m->n == m->cap) { + size_t nc = m->cap ? m->cap * 2 : 8; + m->syms = (const symbol **)realloc(m->syms, nc * sizeof(const symbol *)); + m->nas = (naddrspace_t *)realloc(m->nas, nc * sizeof(naddrspace_t)); + m->cap = nc; + } + m->syms[m->n] = sym; + m->nas[m->n] = na; + m->n++; +} + +/* ---------- naddrspace <-> unsigned short encoding ---------- + * + * The C++ version stored naddrspace_t (signed short) values in a + * std::set. We preserve that by bit-reinterpreting. On + * two's-complement systems (which the rest of SDCC already assumes) the + * cast is a no-op: (unsigned short)(short)-1 == 0xFFFF, and back again. */ +static inline unsigned short ns_enc(naddrspace_t na) { + return (unsigned short)na; +} +static inline naddrspace_t ns_dec(unsigned short v) { + return (naddrspace_t)v; +} + +/* ---------- assignment_naddr (linked list node) ---------- */ + +typedef struct assignment_naddr { + float s; + usset_t local; /* set of unsigned short (vertex indices) */ + naddrspace_t *global; /* length global_n; -2 init, -1 none */ + size_t global_n; + struct assignment_naddr *prev; + struct assignment_naddr *next; +} assignment_naddr_t; + +typedef struct { + assignment_naddr_t *head; + assignment_naddr_t *tail; + size_t n; +} assignment_list_naddr_t; + +static void al_init(assignment_list_naddr_t *l) { + l->head = l->tail = NULL; + l->n = 0; +} + +static assignment_naddr_t *a_clone(const assignment_naddr_t *src) { + assignment_naddr_t *a = (assignment_naddr_t *)malloc(sizeof(*a)); + a->s = src->s; + usset_init(&a->local); + usset_copy(&a->local, &src->local); + a->global_n = src->global_n; + a->global = (naddrspace_t *)malloc(a->global_n * sizeof(naddrspace_t)); + memcpy(a->global, src->global, a->global_n * sizeof(naddrspace_t)); + a->prev = a->next = NULL; + return a; +} + +static void a_free(assignment_naddr_t *a) { + if (!a) return; + usset_free(&a->local); + free(a->global); + free(a); +} + +static void al_clear(assignment_list_naddr_t *l) { + assignment_naddr_t *p = l->head; + while (p) { + assignment_naddr_t *nx = p->next; + a_free(p); + p = nx; + } + l->head = l->tail = NULL; + l->n = 0; +} + +static void al_push_back_take(assignment_list_naddr_t *l, assignment_naddr_t *a) { + a->prev = l->tail; + a->next = NULL; + if (l->tail) l->tail->next = a; else l->head = a; + l->tail = a; + l->n++; +} + +/* Detach node `a` from list `l` and free it. Returns the successor. */ +static assignment_naddr_t *al_erase(assignment_list_naddr_t *l, assignment_naddr_t *a) { + assignment_naddr_t *nx = a->next; + if (a->prev) a->prev->next = a->next; else l->head = a->next; + if (a->next) a->next->prev = a->prev; else l->tail = a->prev; + l->n--; + a_free(a); + return nx; +} + +static void al_swap(assignment_list_naddr_t *a, assignment_list_naddr_t *b) { + assignment_list_naddr_t t = *a; + *a = *b; + *b = t; +} + +/* ---------- assignment ordering ---------- + * + * Original C++ operator< walks locals lexicographically (unsigned short order), + * with ties broken by the corresponding global entries. */ +static int assignment_compare(const assignment_naddr_t *x, const assignment_naddr_t *y) { + size_t i, j, ni = x->local.n, nj = y->local.n; + for (i = 0, j = 0;; i++, j++) { + if (i == ni && j == nj) return 0; + if (i == ni) return -1; /* x ran out first => x < y */ + if (j == nj) return 1; + unsigned short xi = x->local.items[i]; + unsigned short yj = y->local.items[j]; + if (xi < yj) return -1; + if (xi > yj) return 1; + /* Equal local var. Compare globals at that index. */ + naddrspace_t gx = x->global[xi]; + naddrspace_t gy = y->global[yj]; + if (gx < gy) return -1; + if (gx > gy) return 1; + } +} + +static int assignments_naddr_locally_same(const assignment_naddr_t *a, + const assignment_naddr_t *b) { + size_t i; + if (!usset_equal(&a->local, &b->local)) return 0; + for (i = 0; i < a->local.n; i++) { + unsigned short v = a->local.items[i]; + if (a->global[v] != b->global[v]) return 0; + } + return 1; +} + +/* ---------- Merge sort the doubly-linked assignment list ---------- */ + +static assignment_naddr_t *al_merge(assignment_naddr_t *a, assignment_naddr_t *b) { + assignment_naddr_t head; + assignment_naddr_t *tail = &head; + head.next = NULL; + while (a && b) { + if (assignment_compare(a, b) <= 0) { + tail->next = a; a->prev = tail; a = a->next; + } else { + tail->next = b; b->prev = tail; b = b->next; + } + tail = tail->next; + } + if (a) { tail->next = a; a->prev = tail; } + else { tail->next = b; if (b) b->prev = tail; } + if (head.next) head.next->prev = NULL; + return head.next; +} + +static assignment_naddr_t *al_sort_nodes(assignment_naddr_t *head) { + if (!head || !head->next) return head; + /* split using slow/fast */ + assignment_naddr_t *slow = head; + assignment_naddr_t *fast = head->next; + while (fast && fast->next) { + slow = slow->next; + fast = fast->next->next; + } + assignment_naddr_t *mid = slow->next; + slow->next = NULL; + if (mid) mid->prev = NULL; + assignment_naddr_t *l = al_sort_nodes(head); + assignment_naddr_t *r = al_sort_nodes(mid); + return al_merge(l, r); +} + +static void al_sort(assignment_list_naddr_t *l) { + l->head = al_sort_nodes(l->head); + /* Fix tail. */ + assignment_naddr_t *p = l->head, *prev = NULL; + while (p) { prev = p; p = p->next; } + l->tail = prev; +} + +/* ---------- tree_dec_naddr wrapper (parallel assignment lists) ---------- */ + +typedef struct { + tree_dec_t td; + assignment_list_naddr_t *alist; + size_t cap; +} tree_dec_naddr_t; + +static void tdn_init(tree_dec_naddr_t *t) { + tree_dec_init(&t->td); + t->alist = NULL; + t->cap = 0; +} + +static void tdn_sync_arrays(tree_dec_naddr_t *t) { + size_t need = t->td.g.nvertices; + if (need <= t->cap) return; + size_t nc = t->cap ? t->cap * 2 : 8; + while (nc < need) nc *= 2; + t->alist = (assignment_list_naddr_t *)realloc(t->alist, nc * sizeof(*t->alist)); + for (size_t i = t->cap; i < nc; i++) al_init(&t->alist[i]); + t->cap = nc; +} + +static void tdn_free(tree_dec_naddr_t *t) { + size_t i; + for (i = 0; i < t->td.g.nvertices; i++) + al_clear(&t->alist[i]); + free(t->alist); + t->alist = NULL; + t->cap = 0; + tree_dec_free(&t->td); +} + +/* ---------- annotate_cfg_naddr ---------- */ + +static void annotate_cfg_naddr(cfg_naddr_t *cfg, ns_map_t *addrspaces) { + sym_idx_map_t sym_to_index; + naddrspace_t na_max = -1; + size_t nv = cfg->g.nvertices; + char *predetermined = (char *)calloc(nv ? nv : 1, 1); + unsigned int i; + + sim_init(&sym_to_index); + + for (i = 0; i < nv; i++) { + const iCode *ic = cfg->node[i].ic; + const symbol *addrspace; + + if (ic->op == CALL || ic->op == PCALL || ic->op == FUNCTION) + predetermined[i] = 1; + + addrspace = getAddrspaceiCode(ic); + if (addrspace) { + naddrspace_t na; + if (!sim_find(&sym_to_index, addrspace, &na)) { + na = ++na_max; + sim_put(&sym_to_index, addrspace, na); + } + ns_put(addrspaces, na, addrspace); + usset_insert(&cfg->node[i].possible_naddrspaces, ns_enc(na)); + predetermined[i] = 1; + } else { + usset_insert(&cfg->node[i].possible_naddrspaces, ns_enc((naddrspace_t)-1)); + } + } + + int change; + do { + change = 0; + for (i = 0; i < nv; i++) { + if (predetermined[i]) continue; + + size_t oldsize = cfg->node[i].possible_naddrspaces.n; + + size_t k; + unsigned int nbr; + float wt; + CG_FOREACH_OUT(&cfg->g, i, k, nbr, wt) { + (void)wt; + usset_union_into(&cfg->node[i].possible_naddrspaces, + &cfg->node[nbr].possible_naddrspaces); + } + CG_FOREACH_IN(&cfg->g, i, k, nbr, wt) { + (void)wt; + usset_union_into(&cfg->node[i].possible_naddrspaces, + &cfg->node[nbr].possible_naddrspaces); + } + if (oldsize != cfg->node[i].possible_naddrspaces.n) + change = 1; + } + } while (change); + + free(predetermined); + sim_free(&sym_to_index); +} + +/* ---------- create_cfg_naddr ---------- */ + +static void create_cfg_naddr(cfg_naddr_t *cfg, iCode *start_ic, ebbIndex *ebbi) { + key_map_t key_to_index; + iCode *ic; + unsigned int i = 0; + + km_init(&key_to_index); + + for (ic = start_ic; ic; ic = ic->next, i++) { + unsigned int v = cfg_add_vertex(cfg); + cfg->node[v].ic = ic; + km_put(&key_to_index, ic->key, v); + } + + for (ic = start_ic; ic; ic = ic->next) { + unsigned int u = km_get(&key_to_index, ic->key); + + if (ic->op != GOTO && ic->op != RETURN && ic->op != JUMPTABLE && ic->next) + cg_add_edge(&cfg->g, u, + km_get(&key_to_index, ic->next->key), 3.0f); + + if (ic->op == GOTO) { + cg_add_edge(&cfg->g, u, + km_get(&key_to_index, + eBBWithEntryLabel(ebbi, ic->label)->sch->key), + 6.0f); + } else if (ic->op == RETURN) { + cg_add_edge(&cfg->g, u, + km_get(&key_to_index, + eBBWithEntryLabel(ebbi, returnLabel)->sch->key), + 6.0f); + } else if (ic->op == IFX) { + symbol *target = IC_TRUE(ic) ? IC_TRUE(ic) : IC_FALSE(ic); + cg_add_edge(&cfg->g, u, + km_get(&key_to_index, + eBBWithEntryLabel(ebbi, target)->sch->key), + 6.0f); + } else if (ic->op == JUMPTABLE) { + symbol *lbl; + for (lbl = (symbol *)setFirstItem(IC_JTLABELS(ic)); lbl; + lbl = (symbol *)setNextItem(IC_JTLABELS(ic))) { + cg_add_edge(&cfg->g, u, + km_get(&key_to_index, + eBBWithEntryLabel(ebbi, lbl)->sch->key), + 6.0f); + } + } + } + + km_free(&key_to_index); +} + +/* ---------- dump_cfg_naddr (plain GraphViz) ---------- */ + +static void dump_cfg_naddr(const cfg_naddr_t *cfg) { + if (!dstFileName) return; + const char *suffix = ".dumpnaddrcfg"; + const char *fname = currFunc ? currFunc->rname : "__global"; + size_t len = strlen(dstFileName) + strlen(suffix) + strlen(fname) + 5; + char *path = (char *)malloc(len); + snprintf(path, len, "%s%s%s.dot", dstFileName, suffix, fname); + FILE *f = fopen(path, "w"); + free(path); + if (!f) return; + + fprintf(f, "digraph G {\n"); + size_t v; + for (v = 0; v < cfg->g.nvertices; v++) { + fprintf(f, " %zu [label=\"%zu, %d: ", v, v, + cfg->node[v].ic ? cfg->node[v].ic->key : -1); + size_t i; + for (i = 0; i < cfg->node[v].possible_naddrspaces.n; i++) { + fprintf(f, "%d ", + (int)ns_dec(cfg->node[v].possible_naddrspaces.items[i])); + } + fprintf(f, "\"];\n"); + } + for (v = 0; v < cfg->g.nvertices; v++) { + size_t i; + unsigned int nbr; + float wt; + CG_FOREACH_OUT(&cfg->g, v, i, nbr, wt) { + fprintf(f, " %zu -> %u [label=\"%g\"];\n", v, nbr, wt); + } + } + fprintf(f, "}\n"); + fclose(f); +} + +/* ---------- Tree DP: leaf/introduce/forget/join ---------- */ + +static void tree_dec_naddrswitch_leaf(tree_dec_naddr_t *T, unsigned int t, + const cfg_naddr_t *G) { + assignment_naddr_t *a = (assignment_naddr_t *)malloc(sizeof(*a)); + a->s = 0.0f; + usset_init(&a->local); + a->global_n = G->g.nvertices; + a->global = (naddrspace_t *)malloc((a->global_n ? a->global_n : 1) * + sizeof(naddrspace_t)); + for (size_t i = 0; i < a->global_n; i++) a->global[i] = -2; + a->prev = a->next = NULL; + al_push_back_take(&T->alist[t], a); +} + +/* Find the (single) child via cg out_edges of the tree (our tree uses + * bidirectional edges; children are out-neighbors from parent). */ +static unsigned int tree_child(const tree_dec_t *td, unsigned int t, int which) { + size_t i; + unsigned int nbr; + float wt; + int seen = 0; + CG_FOREACH_OUT(&td->g, t, i, nbr, wt) { + (void)wt; + if (seen == which) return nbr; + seen++; + } + return (unsigned int)-1; +} + +static int tree_dec_naddrswitch_introduce(tree_dec_naddr_t *T, unsigned int t, + const cfg_naddr_t *G) { + unsigned int c = tree_child(&T->td, t, 0); + assignment_list_naddr_t *alist2 = &T->alist[t]; + assignment_list_naddr_t *alist = &T->alist[c]; + + /* new_inst = bag[t] \ bag[c]. One element. */ + uiset_t new_inst; + uiset_init(&new_inst); + uiset_difference(&T->td.bag[t], &T->td.bag[c], &new_inst); + unsigned short i_var = (unsigned short)new_inst.items[0]; + uiset_free(&new_inst); + + const usset_t *poss = &G->node[i_var].possible_naddrspaces; + + assignment_naddr_t *p = alist->head; + while (p) { + assignment_naddr_t *next = p->next; + + /* Detach p from alist first (we'll either repurpose it or free it). */ + if (p->prev) p->prev->next = p->next; else alist->head = p->next; + if (p->next) p->next->prev = p->prev; else alist->tail = p->prev; + alist->n--; + p->prev = p->next = NULL; + + usset_insert(&p->local, i_var); + + /* For each possible naddrspace, push a copy of (modified) p into + * alist2, with global[i_var] set. The last one reuses p itself. */ + size_t np = poss->n; + if (np == 0) { + a_free(p); + } else { + for (size_t k = 0; k < np - 1; k++) { + assignment_naddr_t *copy = a_clone(p); + copy->global[i_var] = ns_dec(poss->items[k]); + al_push_back_take(alist2, copy); + } + p->global[i_var] = ns_dec(poss->items[np - 1]); + al_push_back_take(alist2, p); + } + + p = next; + } + + /* alist (child) is now empty. */ + return (int)alist2->n <= options.max_allocs_per_node ? 0 : -1; +} + +static void tree_dec_naddrswitch_forget(tree_dec_naddr_t *T, unsigned int t, + const cfg_naddr_t *G) { + unsigned int c = tree_child(&T->td, t, 0); + assignment_list_naddr_t *alist = &T->alist[t]; + + /* Move child assignments into t's list. */ + al_swap(alist, &T->alist[c]); + + /* old_inst = bag[c] \ bag[t]. One element. */ + uiset_t old_inst; + uiset_init(&old_inst); + uiset_difference(&T->td.bag[c], &T->td.bag[t], &old_inst); + unsigned short i_var = (unsigned short)old_inst.items[0]; + uiset_free(&old_inst); + + /* For each assignment, drop i from local, and accumulate the cost of + * switching across edges incident to i whose other endpoint is also local. */ + assignment_naddr_t *ai; + for (ai = alist->head; ai; ai = ai->next) { + usset_erase(&ai->local, i_var); + + size_t k; + unsigned int nbr; + float wt; + /* out-edges: source=i_var, target=nbr */ + CG_FOREACH_OUT(&G->g, i_var, k, nbr, wt) { + if (!usset_contains(&ai->local, (unsigned short)nbr) || + ai->global[nbr] == -1) + continue; + if (ai->global[i_var] == ai->global[nbr]) + continue; + ai->s += wt; + } + /* in-edges: source=nbr, target=i_var */ + CG_FOREACH_IN(&G->g, i_var, k, nbr, wt) { + if (!usset_contains(&ai->local, (unsigned short)nbr) || + ai->global[i_var] == -1) + continue; + if (ai->global[nbr] == ai->global[i_var]) + continue; + ai->s += wt; + } + } + + al_sort(alist); + + /* Collapse locally-identical runs, keeping the lower-cost one. */ + assignment_naddr_t *cur = alist->head; + while (cur) { + assignment_naddr_t *aif = cur; + assignment_naddr_t *nx = cur->next; + while (nx && assignments_naddr_locally_same(aif, nx)) { + if (aif->s > nx->s) { + al_erase(alist, aif); + aif = nx; + nx = nx->next; + } else { + nx = al_erase(alist, nx); + } + } + cur = aif->next; + } +} + +static void tree_dec_naddrswitch_join(tree_dec_naddr_t *T, unsigned int t, + const cfg_naddr_t *G) { + (void)G; + unsigned int c2 = tree_child(&T->td, t, 0); + unsigned int c3 = tree_child(&T->td, t, 1); + + assignment_list_naddr_t *alist1 = &T->alist[t]; + assignment_list_naddr_t *alist2 = &T->alist[c2]; + assignment_list_naddr_t *alist3 = &T->alist[c3]; + + al_sort(alist2); + al_sort(alist3); + + assignment_naddr_t *ai2 = alist2->head; + assignment_naddr_t *ai3 = alist3->head; + + while (ai2 && ai3) { + if (assignments_naddr_locally_same(ai2, ai3)) { + ai2->s += ai3->s; + for (size_t i = 0; i < ai2->global_n; i++) { + if (ai2->global[i] == -2) + ai2->global[i] = ai3->global[i]; + } + /* Take a copy into alist1 (since we still own ai2 in alist2 + * and will free it below). */ + assignment_naddr_t *copy = a_clone(ai2); + al_push_back_take(alist1, copy); + ai2 = ai2->next; + ai3 = ai3->next; + } else { + int cmp = assignment_compare(ai2, ai3); + if (cmp < 0) ai2 = ai2->next; + else if (cmp > 0) ai3 = ai3->next; + else { + /* Same ordering but not locally same — advance both to match + * the C++ behavior of the loop body (which would just keep + * looping). In practice this branch should rarely trigger. */ + ai2 = ai2->next; + ai3 = ai3->next; + } + } + } + + al_clear(alist2); + al_clear(alist3); +} + +static int tree_dec_naddrswitch_nodes(tree_dec_naddr_t *T, unsigned int t, + const cfg_naddr_t *G) { + size_t deg = cg_out_degree(&T->td.g, t); + unsigned int c0, c1; + + switch (deg) { + case 0: + tree_dec_naddrswitch_leaf(T, t, G); + break; + case 1: + c0 = tree_child(&T->td, t, 0); + if (tree_dec_naddrswitch_nodes(T, c0, G)) return -1; + if (T->td.bag[c0].n < T->td.bag[t].n) { + if (tree_dec_naddrswitch_introduce(T, t, G)) return -1; + } else { + tree_dec_naddrswitch_forget(T, t, G); + } + break; + case 2: + c0 = tree_child(&T->td, t, 0); + c1 = tree_child(&T->td, t, 1); + if (T->td.weight[c0] < T->td.weight[c1]) { + unsigned int tmp = c0; c0 = c1; c1 = tmp; + } + if (tree_dec_naddrswitch_nodes(T, c0, G)) return -1; + if (tree_dec_naddrswitch_nodes(T, c1, G)) return -1; + tree_dec_naddrswitch_join(T, t, G); + break; + default: + fprintf(stderr, "Not nice.\n"); + break; + } + return 0; +} + +static void implement_naddr_assignment(const assignment_naddr_t *a, + const cfg_naddr_t *G, + const ns_map_t *addrspaces) { + size_t src; + for (src = 0; src < G->g.nvertices; src++) { + size_t k; + unsigned int tgt; + float wt; + CG_FOREACH_OUT(&G->g, src, k, tgt, wt) { + (void)wt; + naddrspace_t sourcespace = a->global[src]; + naddrspace_t targetspace = a->global[tgt]; + + if (targetspace == -1 || sourcespace == targetspace) + continue; + + if (G->node[src].ic->next != G->node[tgt].ic) + fprintf(stderr, "Trying to switch address space at weird edge in CFG."); + + switchAddressSpaceAt(G->node[tgt].ic, ns_get(addrspaces, targetspace)); + } + } +} + +static int tree_dec_address_switch(tree_dec_naddr_t *T, const cfg_naddr_t *G, + const ns_map_t *addrspaces) { + tdn_sync_arrays(T); + + unsigned int root = tree_dec_find_root(&T->td); + if (tree_dec_naddrswitch_nodes(T, root, G)) + return -1; + + assignment_naddr_t *winner = T->alist[root].head; + if (!winner) return -1; + + implement_naddr_assignment(winner, G, addrspaces); + return 0; +} + +/* ---------- Public entry point ---------- */ + +int switchAddressSpacesOptimally(iCode *ic, ebbIndex *ebbi) { + cfg_naddr_t cfg; + tree_dec_naddr_t td; + ns_map_t addrspaces; + + cfg_init(&cfg); + tdn_init(&td); + ns_init(&addrspaces); + + create_cfg_naddr(&cfg, ic, ebbi); + annotate_cfg_naddr(&cfg, &addrspaces); + + if (options.dump_graphs) + dump_cfg_naddr(&cfg); + + tree_dec_thorup(&td.td, &cfg.g); + tree_dec_nicify(&td.td); + tdn_sync_arrays(&td); + + int rc = tree_dec_address_switch(&td, &cfg, &addrspaces); + + tdn_free(&td); + ns_free(&addrspaces); + cfg_free(&cfg); + + return rc; +} diff --git a/src/SDCCnaddr.cc b/src/SDCCnaddr.cc deleted file mode 100644 index 25f901824..000000000 --- a/src/SDCCnaddr.cc +++ /dev/null @@ -1,88 +0,0 @@ -// Philipp Klaus Krause, philipp@informatik.uni-frankfurt.de, pkk@spth.de, 2011 -// -// (c) 2011 Goethe-Universität Frankfurt -// -// This program is free software; you can redistribute it and/or modify it -// under the terms of the GNU General Public License as published by the -// Free Software Foundation; either version 2, or (at your option) any -// later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -// -// -// Optimal placement of bank switching instructions for named address spaces. - -#include "SDCCnaddr.hpp" - -// A quick-and-dirty function to get the CFG from sdcc (a simplified version of -// the function from SDCCralloc.hpp). -void create_cfg_naddr(cfg_t &cfg, iCode *start_ic, ebbIndex *ebbi) { - iCode *ic; - - std::map key_to_index; - { - int i; - - for (ic = start_ic, i = 0; ic; ic = ic->next, i++) { - boost::add_vertex(cfg); - key_to_index[ic->key] = i; - cfg[i].ic = ic; - } - } - - // Get control flow graph from sdcc. - for (ic = start_ic; ic; ic = ic->next) { - if (ic->op != GOTO && ic->op != RETURN && ic->op != JUMPTABLE && ic->next) - boost::add_edge(key_to_index[ic->key], key_to_index[ic->next->key], 3.0f, - cfg); - - if (ic->op == GOTO) - boost::add_edge( - key_to_index[ic->key], - key_to_index[eBBWithEntryLabel(ebbi, ic->label)->sch->key], 6.0f, - cfg); - else if (ic->op == RETURN) - boost::add_edge( - key_to_index[ic->key], - key_to_index[eBBWithEntryLabel(ebbi, returnLabel)->sch->key], 6.0f, - cfg); - else if (ic->op == IFX) - boost::add_edge( - key_to_index[ic->key], - key_to_index[eBBWithEntryLabel(ebbi, IC_TRUE(ic) ? IC_TRUE(ic) - : IC_FALSE(ic)) - ->sch->key], - 6.0f, cfg); - else if (ic->op == JUMPTABLE) - for (symbol *lbl = (symbol *)(setFirstItem(IC_JTLABELS(ic))); lbl; - lbl = (symbol *)(setNextItem(IC_JTLABELS(ic)))) - boost::add_edge(key_to_index[ic->key], - key_to_index[eBBWithEntryLabel(ebbi, lbl)->sch->key], - 6.0f, cfg); - } -} - -int switchAddressSpacesOptimally(iCode *ic, ebbIndex *ebbi) { - cfg_t control_flow_graph; - tree_dec_naddr_t tree_decomposition; - std::map addrspaces; - - create_cfg_naddr(control_flow_graph, ic, ebbi); - annotate_cfg_naddr(control_flow_graph, addrspaces); - - if (options.dump_graphs) - dump_cfg_naddr(control_flow_graph); - - thorup_tree_decomposition(tree_decomposition, control_flow_graph); - nicify(tree_decomposition); - - return (tree_dec_address_switch(tree_decomposition, control_flow_graph, - addrspaces)); -} diff --git a/src/SDCCnaddr.hpp b/src/SDCCnaddr.hpp deleted file mode 100644 index 10bd9a484..000000000 --- a/src/SDCCnaddr.hpp +++ /dev/null @@ -1,493 +0,0 @@ -// Philipp Klaus Krause, philipp@informatik.uni-frankfurt.de, pkk@spth.de, 2011 -// -// (c) 2011 Goethe-Universität Frankfurt -// -// This program is free software; you can redistribute it and/or modify it -// under the terms of the GNU General Public License as published by the -// Free Software Foundation; either version 2, or (at your option) any -// later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -// -// -// Optimal placement of bank switching instructions for named address spaces. -// -// For details, see: -// -// Philipp Klaus Krause, -// "Optimal Placement of Bank Selection Instructions in Polynomial Time", -// Proceedings of the 16th International Workshop on Software and Compilers for -// Embedded Systems, M-SCOPES '13, pp. 23-30. Association for Computing -// Machinery, 2013. - -#ifndef SDCCNADDR_HH -#define SDCCNADDR_HH 1 - -#include -#include -#include -#include - -#include - -#include "SDCCtree_dec.hpp" - -extern "C" { -#include "SDCCBBlock.h" -#include "SDCCicode.h" -#include "SDCCopt.h" -#include "SDCCsymt.h" -#include "SDCCy.h" -} - -#ifdef HAVE_STX_BTREE_SET_H -#include -#endif - -typedef short int - naddrspace_t; // Named address spaces. -1: Undefined, Others: see map. - -#ifdef HAVE_STX_BTREE_SET_H -typedef stx::btree_set - naddrspaceset_t; // Faster than std::set -#else -typedef std::set naddrspaceset_t; -#endif - -struct assignment_naddr { - float s; - naddrspaceset_t local; - std::vector global; - - bool operator<(const assignment_naddr &a) const { - naddrspaceset_t::const_iterator i, ai, i_end, ai_end; - - i_end = local.end(); - ai_end = a.local.end(); - - for (i = local.begin(), ai = a.local.begin();; ++i, ++ai) { - if (i == i_end) - return (true); - if (ai == ai_end) - return (false); - - if (*i < *ai) - return (true); - if (*i > *ai) - return (false); - - if (global[*i] < a.global[*ai]) - return (true); - if (global[*i] > a.global[*ai]) - return (false); - } - } -}; - -bool assignments_naddr_locally_same(const assignment_naddr &a1, - const assignment_naddr &a2) { - if (a1.local != a2.local) - return (false); - - naddrspaceset_t::const_iterator i, i_end; - for (i = a1.local.begin(), i_end = a1.local.end(); i != i_end; ++i) - if (a1.global[*i] != a2.global[*i]) - return (false); - - return (true); -} - -struct cfg_naddr_node { - iCode *ic; - naddrspaceset_t possible_naddrspaces; -}; - -typedef std::list assignment_list_naddr_t; - -struct tree_dec_naddr_node { - std::set bag; - assignment_list_naddr_t assignments; - unsigned weight; // The weight is the number of nodes at which intermediate - // results need to be remembered. In general, to minimize - // memory consumption, at join nodes the child with maximum - // weight should be processed first. -}; - -typedef boost::adjacency_list - cfg_t; // The edge property is the cost of subdividing the edge and - // inserting a bank switching instruction. -typedef boost::adjacency_list - tree_dec_naddr_t; - -// Annotate nodes of the control flow graph with the set of possible named -// address spaces active there. -void annotate_cfg_naddr(cfg_t &cfg, - std::map &addrspaces) { - /* MSVC 2010 doesn't like the typename here, though it accepts it elsewhere */ - typedef /*typename*/ boost::graph_traits::vertex_descriptor vertex_t; - - std::map sym_to_index; - naddrspace_t na_max = -1; - - std::vector predetermined(boost::num_vertices(cfg), false); - - // Initialize the cfg vertices where there is information on the desired named - // address space. - for (vertex_t i = 0; i < boost::num_vertices(cfg); i++) { - const iCode *ic = cfg[i].ic; - const symbol *addrspace; - - // We do not know the current named address space when entering a function - // or after calling one. - if (ic->op == CALL || ic->op == PCALL || ic->op == FUNCTION) - predetermined[i] = true; - - // Set the required named address spaces - if (addrspace = getAddrspaceiCode(ic)) { - naddrspace_t na; - - if (sym_to_index.find(addrspace) == sym_to_index.end()) - sym_to_index[addrspace] = ++na_max; - na = sym_to_index[addrspace]; - addrspaces[na] = addrspace; - - cfg[i].possible_naddrspaces.insert(na); - predetermined[i] = true; - } else - cfg[i].possible_naddrspaces.insert(-1); - } - - // Extend. - bool change; - do { - change = false; - for (vertex_t i = 0; i < boost::num_vertices(cfg); i++) { - if (predetermined[i]) - continue; - - size_t oldsize = cfg[i].possible_naddrspaces.size(); - { - /* MSVC 2010 doesn't like the typename here, though it accepts it - * elsewhere */ - typedef /*typename*/ boost::graph_traits::out_edge_iterator - n_iter_t; - n_iter_t n, n_end; - for (boost::tie(n, n_end) = boost::out_edges(i, cfg); n != n_end; ++n) { - vertex_t v = boost::target(*n, cfg); - cfg[i].possible_naddrspaces.insert( - cfg[v].possible_naddrspaces.begin(), - cfg[v].possible_naddrspaces.end()); - } - } - { - /* MSVC 2010 doesn't like the typename here, though it accepts it - * elsewhere */ - typedef /*typename*/ boost::graph_traits::in_edge_iterator - n_iter_t; - n_iter_t n, n_end; - for (boost::tie(n, n_end) = boost::in_edges(i, cfg); n != n_end; ++n) { - vertex_t v = boost::source(*n, cfg); - cfg[i].possible_naddrspaces.insert( - cfg[v].possible_naddrspaces.begin(), - cfg[v].possible_naddrspaces.end()); - } - } - - if (oldsize != cfg[i].possible_naddrspaces.size()) - change = true; - } - } while (change); -} - -// Handle Leaf nodes in the nice tree decomposition -template -void tree_dec_naddrswitch_leaf( - T_t &T, typename boost::graph_traits::vertex_descriptor t, - const G_t &G) { - assignment_naddr a; - assignment_list_naddr_t &alist = T[t].assignments; - - a.s = 0; - a.global.resize(boost::num_vertices(G), -2); - alist.push_back(a); -} - -// Handle introduce nodes in the nice tree decomposition -template -int tree_dec_naddrswitch_introduce( - T_t &T, typename boost::graph_traits::vertex_descriptor t, - const G_t &G) { - typedef - typename boost::graph_traits::adjacency_iterator adjacency_iter_t; - adjacency_iter_t c, c_end; - assignment_list_naddr_t::iterator ai; - boost::tie(c, c_end) = adjacent_vertices(t, T); - - assignment_list_naddr_t &alist2 = T[t].assignments; - assignment_list_naddr_t &alist = T[*c].assignments; - - std::set new_inst; - std::set_difference(T[t].bag.begin(), T[t].bag.end(), T[*c].bag.begin(), - T[*c].bag.end(), std::inserter(new_inst, new_inst.end())); - unsigned short int i = *(new_inst.begin()); - - for (ai = alist.begin(); ai != alist.end(); ++ai) { - ai->local.insert(i); - - naddrspaceset_t::const_iterator ni, ni_end; - for (ni = G[i].possible_naddrspaces.begin(), - ni_end = G[i].possible_naddrspaces.end(); - ni != ni_end; ++ni) { - ai->global[i] = *ni; - alist2.push_back(*ai); - } - } - - alist.clear(); - - return ((int)alist2.size() <= options.max_allocs_per_node ? 0 : -1); -} - -// Handle forget nodes in the nice tree decomposition -template -void tree_dec_naddrswitch_forget( - T_t &T, typename boost::graph_traits::vertex_descriptor t, - const G_t &G) { - typedef - typename boost::graph_traits::adjacency_iterator adjacency_iter_t; - adjacency_iter_t c, c_end; - boost::tie(c, c_end) = adjacent_vertices(t, T); - - assignment_list_naddr_t &alist = T[t].assignments; - - std::swap(alist, T[*c].assignments); - - std::set old_inst; - std::set_difference(T[*c].bag.begin(), T[*c].bag.end(), T[t].bag.begin(), - T[t].bag.end(), std::inserter(old_inst, old_inst.end())); - unsigned short int i = *(old_inst.begin()); - - assignment_list_naddr_t::iterator ai, aif; - - // Restrict assignments (locally) to current variables. - for (ai = alist.begin(); ai != alist.end(); ++ai) { - ai->local.erase(i); - { - typedef typename boost::graph_traits::out_edge_iterator n_iter_t; - n_iter_t n, n_end; - for (boost::tie(n, n_end) = boost::out_edges(i, G); n != n_end; ++n) { - if (ai->local.find(boost::target(*n, G)) == ai->local.end() || - ai->global[boost::target(*n, G)] == -1) - continue; - if (ai->global[boost::source(*n, G)] == - ai->global[boost::target(*n, G)]) - continue; - ai->s += G[*n]; - } - } - { - typedef typename boost::graph_traits::in_edge_iterator n_iter_t; - n_iter_t n, n_end; - for (boost::tie(n, n_end) = boost::in_edges(i, G); n != n_end; ++n) { - if (ai->local.find(boost::source(*n, G)) == ai->local.end() || - ai->global[boost::target(*n, G)] == -1) - continue; - if (ai->global[boost::source(*n, G)] == - ai->global[boost::target(*n, G)]) - continue; - ai->s += G[*n]; - } - } - } - - alist.sort(); - - // Collapse (locally) identical assignments. - for (ai = alist.begin(); ai != alist.end();) { - aif = ai; - - for (++ai; - ai != alist.end() && assignments_naddr_locally_same(*aif, *ai);) { - if (aif->s > ai->s) { - alist.erase(aif); - aif = ai; - ++ai; - } else { - alist.erase(ai); - ai = aif; - ++ai; - } - } - } -} - -// Handle join nodes in the nice tree decomposition -template -void tree_dec_naddrswitch_join( - T_t &T, typename boost::graph_traits::vertex_descriptor t, - const G_t &G) { - typedef - typename boost::graph_traits::adjacency_iterator adjacency_iter_t; - adjacency_iter_t c, c_end, c2, c3; - boost::tie(c, c_end) = adjacent_vertices(t, T); - - c2 = c; - ++c; - c3 = c; - - assignment_list_naddr_t &alist1 = T[t].assignments; - assignment_list_naddr_t &alist2 = T[*c2].assignments; - assignment_list_naddr_t &alist3 = T[*c3].assignments; - - alist2.sort(); - alist3.sort(); - - assignment_list_naddr_t::iterator ai2, ai3; - for (ai2 = alist2.begin(), ai3 = alist3.begin(); - ai2 != alist2.end() && ai3 != alist3.end();) { - if (assignments_naddr_locally_same(*ai2, *ai3)) { - ai2->s += ai3->s; - for (size_t i = 0; i < ai2->global.size(); i++) - ai2->global[i] = - ((ai2->global[i] != -2) ? ai2->global[i] : ai3->global[i]); - alist1.push_back(*ai2); - ++ai2; - ++ai3; - } else if (*ai2 < *ai3) { - ++ai2; - continue; - } else if (*ai3 < *ai2) { - ++ai3; - continue; - } - } - - alist2.clear(); - alist3.clear(); -} - -template -int tree_dec_naddrswitch_nodes( - T_t &T, typename boost::graph_traits::vertex_descriptor t, - const G_t &G) { - typedef - typename boost::graph_traits::adjacency_iterator adjacency_iter_t; - - adjacency_iter_t c, c_end; - typename boost::graph_traits::vertex_descriptor c0, c1; - - boost::tie(c, c_end) = adjacent_vertices(t, T); - - switch (out_degree(t, T)) { - case 0: - tree_dec_naddrswitch_leaf(T, t, G); - break; - case 1: - c0 = *c; - tree_dec_naddrswitch_nodes(T, c0, G); - if (T[c0].bag.size() < T[t].bag.size()) { - if (tree_dec_naddrswitch_introduce(T, t, G)) - return (-1); - } else - tree_dec_naddrswitch_forget(T, t, G); - break; - case 2: - c0 = *c++; - c1 = *c; - - if (T[c0].weight < T[c1].weight) // Minimize memory consumption. - std::swap(c0, c1); - - tree_dec_naddrswitch_nodes(T, c0, G); - tree_dec_naddrswitch_nodes(T, c1, G); - tree_dec_naddrswitch_join(T, t, G); - break; - default: - std::cerr << "Not nice.\n"; - break; - } - return (0); -} - -template -static void implement_naddr_assignment( - const assignment_naddr &a, const G_t &G, - const std::map addrspaces) { - typedef typename boost::graph_traits::vertex_descriptor vertex_t; - typedef typename boost::graph_traits::edge_iterator ei_t; - ei_t e, e_end; - - for (boost::tie(e, e_end) = boost::edges(G); e != e_end; ++e) { - const vertex_t source = boost::source(*e, G); - const vertex_t target = boost::target(*e, G); - const naddrspace_t sourcespace = a.global[source]; - const naddrspace_t targetspace = a.global[target]; - - // Nothing to do if the space doesn't change, or we just forget it. - if (targetspace == -1 || sourcespace == targetspace) - continue; - - // This shouldn't happen with the CFGs sdcc generates and a cost function - // based on code size. - if (G[source].ic->next != G[target].ic) - std::cerr << "Trying to switch address space at weird edge in CFG."; - - switchAddressSpaceAt(G[target].ic, addrspaces.find(targetspace)->second); - } -} - -template -int tree_dec_address_switch( - T_t &T, const G_t &G, - const std::map addrspaces) { - if (tree_dec_naddrswitch_nodes(T, find_root(T), G)) - return (-1); - - const assignment_naddr &winner = *(T[find_root(T)].assignments.begin()); - -#if 0 - std::cout << "Winner: "; - for(unsigned int i = 0; i < boost::num_vertices(G); i++) - { - std::cout << "(" << i << ", " << int(winner.global[i]) << ") "; - } - std::cout << "\n"; - std::cout << "Cost: " << winner.s << "\n"; - std::cout.flush(); -#endif - - implement_naddr_assignment(winner, G, addrspaces); - - return (0); -} - -// Dump cfg, with numbered nodes, show possible address spaces at each node. -void dump_cfg_naddr(const cfg_t &cfg) { - std::ofstream dump_file((std::string(dstFileName) + ".dumpnaddrcfg" + - (currFunc ? currFunc->rname : "__global") + ".dot") - .c_str()); - - std::string *name = new std::string[num_vertices(cfg)]; - for (unsigned int i = 0; i < boost::num_vertices(cfg); i++) { - std::ostringstream os; - os << i << ", " << cfg[i].ic->key << ": "; - naddrspaceset_t::const_iterator n; - for (n = cfg[i].possible_naddrspaces.begin(); - n != cfg[i].possible_naddrspaces.end(); ++n) - os << *n << " "; - name[i] = os.str(); - } - boost::write_graphviz(dump_file, cfg, boost::make_label_writer(name)); - delete[] name; -} - -#endif diff --git a/src/SDCCralloc.c b/src/SDCCralloc.c new file mode 100644 index 000000000..992aa0437 --- /dev/null +++ b/src/SDCCralloc.c @@ -0,0 +1,1295 @@ +/* SDCCralloc.c — pure-C port of SDCCralloc.hpp. + * + * An optimal, polynomial-time register allocator (Krause 2013). This file + * implements the generic dynamic-programming algorithm over a nice tree + * decomposition. Port-specific cost/feasibility hooks are supplied by the + * backend (see backend/ralloc2.c). + */ + +#include "SDCCralloc.h" + +#include +#include +#include +#include +#include +#include + +#include "common.h" +#include "SDCCbtree.h" + +/* Infinity sentinel used pervasively by the algorithm. */ +static const float RA_INF = 1.0f / 0.0f; + +static int ra_is_inf(float f) { return f == RA_INF || !(f < RA_INF); } + +/* -------------------- i_assignment_t -------------------- */ + +void i_assignment_init(i_assignment_t *ia) { + reg_t r; + int i; + for (r = 0; r < MAX_NUM_REGS; r++) + for (i = 0; i < 2; i++) + ia->registers[r][i] = -1; +} + +void i_assignment_add_var(i_assignment_t *ia, short v, signed char r) { + if (ia->registers[r][1] < v) { + ia->registers[r][0] = ia->registers[r][1]; + ia->registers[r][1] = v; + } else { + ia->registers[r][0] = v; + } +} + +void i_assignment_remove_var(i_assignment_t *ia, short v) { + reg_t r; + for (r = 0; r < port->num_regs; r++) { + if (ia->registers[r][1] == v) { + ia->registers[r][1] = ia->registers[r][0]; + ia->registers[r][0] = -1; + } else if (ia->registers[r][0] == v) { + ia->registers[r][0] = -1; + } + } +} + +/* -------------------- assignment_t -------------------- */ + +void assignment_init(assignment_t *a) { + a->s = 0.0f; + sss_init(&a->local); + a->global = NULL; + a->global_n = 0; + a->i_costs = NULL; + a->i_costs_n = 0; + a->i_costs_cap = 0; + i_assignment_init(&a->i_assignment); + a->marked = 0; +} + +void assignment_free(assignment_t *a) { + sss_free(&a->local); + free(a->global); + a->global = NULL; + a->global_n = 0; + free(a->i_costs); + a->i_costs = NULL; + a->i_costs_n = a->i_costs_cap = 0; +} + +void assignment_copy(assignment_t *dst, const assignment_t *src) { + assignment_free(dst); + dst->s = src->s; + sss_init(&dst->local); + sss_copy(&dst->local, &src->local); + dst->global_n = src->global_n; + if (src->global_n) { + dst->global = (signed char *)malloc(src->global_n * sizeof(signed char)); + memcpy(dst->global, src->global, src->global_n * sizeof(signed char)); + } else { + dst->global = NULL; + } + dst->i_costs_n = src->i_costs_n; + dst->i_costs_cap = src->i_costs_n; + if (src->i_costs_n) { + dst->i_costs = (icost_entry_t *)malloc(src->i_costs_n * sizeof(icost_entry_t)); + memcpy(dst->i_costs, src->i_costs, src->i_costs_n * sizeof(icost_entry_t)); + } else { + dst->i_costs = NULL; + } + dst->i_assignment = src->i_assignment; + dst->marked = src->marked; +} + +void assignment_move(assignment_t *dst, assignment_t *src) { + assignment_free(dst); + *dst = *src; + /* Zero out src so it can be safely free'd. */ + sss_init(&src->local); + src->global = NULL; + src->global_n = 0; + src->i_costs = NULL; + src->i_costs_n = src->i_costs_cap = 0; + i_assignment_init(&src->i_assignment); + src->marked = 0; + src->s = 0.0f; +} + +/* Binary search for key in i_costs. Returns insertion index; *found=1 if + * exact match at returned index. */ +static size_t icost_lower(const assignment_t *a, int key, int *found) { + size_t lo = 0, hi = a->i_costs_n; + while (lo < hi) { + size_t mid = lo + ((hi - lo) >> 1); + if (a->i_costs[mid].key < key) + lo = mid + 1; + else + hi = mid; + } + *found = (lo < a->i_costs_n && a->i_costs[lo].key == key); + return lo; +} + +void assignment_icost_set(assignment_t *a, int key, float val) { + int found; + size_t ix = icost_lower(a, key, &found); + if (found) { + a->i_costs[ix].val = val; + return; + } + if (a->i_costs_n == a->i_costs_cap) { + a->i_costs_cap = a->i_costs_cap ? a->i_costs_cap * 2 : 4; + a->i_costs = (icost_entry_t *)realloc(a->i_costs, + a->i_costs_cap * sizeof(icost_entry_t)); + } + memmove(&a->i_costs[ix + 1], &a->i_costs[ix], + (a->i_costs_n - ix) * sizeof(icost_entry_t)); + a->i_costs[ix].key = key; + a->i_costs[ix].val = val; + a->i_costs_n++; +} + +int assignment_icost_get(const assignment_t *a, int key, float *out) { + int found; + size_t ix = icost_lower(a, key, &found); + if (!found) { + /* Default: if key was never set the original std::map would insert 0. */ + if (out) *out = 0.0f; + return 0; + } + if (out) *out = a->i_costs[ix].val; + return 1; +} + +void assignment_icost_erase(assignment_t *a, int key) { + int found; + size_t ix = icost_lower(a, key, &found); + if (!found) return; + memmove(&a->i_costs[ix], &a->i_costs[ix + 1], + (a->i_costs_n - ix - 1) * sizeof(icost_entry_t)); + a->i_costs_n--; +} + +int assignment_compare(const assignment_t *x, const assignment_t *y) { + size_t i = 0, j = 0; + while (1) { + if (i == x->local.n) return (j == y->local.n) ? 0 : -1; + if (j == y->local.n) return 1; + short xi = x->local.items[i]; + short yj = y->local.items[j]; + if (xi < yj) return -1; + if (xi > yj) return 1; + signed char gx = x->global[xi]; + signed char gy = y->global[yj]; + if (gx < gy) return -1; + if (gx > gy) return 1; + i++; + j++; + } +} + +/* -------------------- cfg_ralloc_t -------------------- */ + +void cfg_ralloc_init(cfg_ralloc_t *c) { + cg_init(&c->g, CG_BIDIRECTIONAL, 0); + c->node = NULL; + c->cap = 0; +} + +void cfg_ralloc_free(cfg_ralloc_t *c) { + size_t v; + for (v = 0; v < c->g.nvertices; v++) { + free(c->node[v].operands); + sss_free(&c->node[v].alive); + sss_free(&c->node[v].dying); + } + free(c->node); + c->node = NULL; + c->cap = 0; + cg_free(&c->g); +} + +static unsigned int cfg_add_vertex(cfg_ralloc_t *c) { + if (c->g.nvertices >= c->cap) { + size_t nc = c->cap ? c->cap * 2 : 16; + c->node = (cfg_node_t *)realloc(c->node, nc * sizeof(cfg_node_t)); + c->cap = nc; + } + unsigned int v = cg_add_vertex(&c->g); + c->node[v].ic = NULL; + c->node[v].operands = NULL; + c->node[v].operands_n = 0; + c->node[v].operands_cap = 0; + sss_init(&c->node[v].alive); + sss_init(&c->node[v].dying); + return v; +} + +size_t cfg_operands_lower_bound(const cfg_node_t *n, int k) { + size_t lo = 0, hi = n->operands_n; + while (lo < hi) { + size_t mid = lo + ((hi - lo) >> 1); + if (n->operands[mid].key < k) + lo = mid + 1; + else + hi = mid; + } + return lo; +} + +size_t cfg_operands_equal_range(const cfg_node_t *n, int k, size_t *e_out) { + size_t s = cfg_operands_lower_bound(n, k); + size_t e = s; + while (e < n->operands_n && n->operands[e].key == k) e++; + *e_out = e; + return s; +} + +/* Insert into operand multimap, preserving sorted-by-key order. Duplicate (k,v) + * pairs retained — mimics std::multimap::insert behaviour. */ +static void cfg_operands_insert(cfg_node_t *n, int k, short v) { + if (n->operands_n == n->operands_cap) { + n->operands_cap = n->operands_cap ? n->operands_cap * 2 : 4; + n->operands = (operand_entry_t *)realloc( + n->operands, n->operands_cap * sizeof(operand_entry_t)); + } + size_t ix = cfg_operands_lower_bound(n, k); + /* Insert after existing entries with the same key (upper_bound). */ + while (ix < n->operands_n && n->operands[ix].key == k) ix++; + memmove(&n->operands[ix + 1], &n->operands[ix], + (n->operands_n - ix) * sizeof(operand_entry_t)); + n->operands[ix].key = k; + n->operands[ix].var = v; + n->operands_n++; +} + +/* Returns 1 if operand multimap has any entry with key k. */ +static int cfg_operands_has_key(const cfg_node_t *n, int k) { + size_t e; + size_t s = cfg_operands_equal_range(n, k, &e); + return s < e; +} + +/* -------------------- con_t -------------------- */ + +void con_init(con_t *c) { + cg_init(&c->g, CG_UNDIRECTED, 0); + c->node = NULL; + c->cap = 0; +} + +void con_free(con_t *c) { + free(c->node); + c->node = NULL; + c->cap = 0; + cg_free(&c->g); +} + +static unsigned int con_add_vertex(con_t *c) { + if (c->g.nvertices >= c->cap) { + size_t nc = c->cap ? c->cap * 2 : 16; + c->node = (con_node_t *)realloc(c->node, nc * sizeof(con_node_t)); + c->cap = nc; + } + unsigned int v = cg_add_vertex(&c->g); + c->node[v].v = -1; + c->node[v].byte = 0; + c->node[v].size = 0; + c->node[v].name = NULL; + return v; +} + +/* -------------------- tree_dec_ralloc_t -------------------- */ + +void tree_dec_ralloc_init(tree_dec_ralloc_t *T) { + tree_dec_init(&T->td); + T->node = NULL; + T->cap = 0; +} + +static void free_alist(tree_dec_ralloc_node_t *n) { + assignment_node_t *p = n->alist_head; + while (p) { + assignment_node_t *nx = p->next; + assignment_free(&p->a); + free(p); + p = nx; + } + n->alist_head = n->alist_tail = NULL; + n->alist_n = 0; +} + +void tree_dec_ralloc_free(tree_dec_ralloc_t *T) { + size_t v; + for (v = 0; v < T->td.g.nvertices; v++) { + sss_free(&T->node[v].alive); + free_alist(&T->node[v]); + } + free(T->node); + T->node = NULL; + T->cap = 0; + tree_dec_free(&T->td); +} + +/* -------------------- assignment list helpers -------------------- */ + +static assignment_node_t *alist_push_back_move(tree_dec_ralloc_node_t *n, + assignment_t *a_to_move) { + assignment_node_t *node = (assignment_node_t *)malloc(sizeof(assignment_node_t)); + assignment_init(&node->a); + assignment_move(&node->a, a_to_move); + node->prev = n->alist_tail; + node->next = NULL; + if (n->alist_tail) n->alist_tail->next = node; else n->alist_head = node; + n->alist_tail = node; + n->alist_n++; + return node; +} + +static assignment_node_t *alist_push_back_copy(tree_dec_ralloc_node_t *n, + const assignment_t *a) { + assignment_node_t *node = (assignment_node_t *)malloc(sizeof(assignment_node_t)); + assignment_init(&node->a); + assignment_copy(&node->a, a); + node->prev = n->alist_tail; + node->next = NULL; + if (n->alist_tail) n->alist_tail->next = node; else n->alist_head = node; + n->alist_tail = node; + n->alist_n++; + return node; +} + +static assignment_node_t *alist_erase(tree_dec_ralloc_node_t *n, + assignment_node_t *node) { + assignment_node_t *nx = node->next; + if (node->prev) node->prev->next = node->next; else n->alist_head = node->next; + if (node->next) node->next->prev = node->prev; else n->alist_tail = node->prev; + assignment_free(&node->a); + free(node); + n->alist_n--; + return nx; +} + +static void alist_clear(tree_dec_ralloc_node_t *n) { + free_alist(n); +} + +static void alist_swap(tree_dec_ralloc_node_t *a, tree_dec_ralloc_node_t *b) { + assignment_node_t *h = a->alist_head, *t = a->alist_tail; + size_t nn = a->alist_n; + a->alist_head = b->alist_head; + a->alist_tail = b->alist_tail; + a->alist_n = b->alist_n; + b->alist_head = h; + b->alist_tail = t; + b->alist_n = nn; +} + +/* Merge-sort the doubly-linked assignment list by assignment_compare. */ +static assignment_node_t *alist_merge(assignment_node_t *a, assignment_node_t *b) { + assignment_node_t head; + assignment_node_t *tail = &head; + head.next = NULL; + while (a && b) { + if (assignment_compare(&a->a, &b->a) <= 0) { + tail->next = a; a->prev = tail; a = a->next; + } else { + tail->next = b; b->prev = tail; b = b->next; + } + tail = tail->next; + } + if (a) { tail->next = a; a->prev = tail; } + else { tail->next = b; if (b) b->prev = tail; } + if (head.next) head.next->prev = NULL; + return head.next; +} + +static assignment_node_t *alist_sort_nodes(assignment_node_t *head) { + if (!head || !head->next) return head; + assignment_node_t *slow = head, *fast = head->next; + while (fast && fast->next) { slow = slow->next; fast = fast->next->next; } + assignment_node_t *mid = slow->next; + slow->next = NULL; + if (mid) mid->prev = NULL; + assignment_node_t *left = alist_sort_nodes(head); + assignment_node_t *right = alist_sort_nodes(mid); + return alist_merge(left, right); +} + +static void alist_sort(tree_dec_ralloc_node_t *n) { + n->alist_head = alist_sort_nodes(n->alist_head); + assignment_node_t *p = n->alist_head, *last = NULL; + while (p) { last = p; p = p->next; } + n->alist_tail = last; +} + +/* -------------------- add_operand_to_cfg_node helper -------------------- */ + +/* Key -> var map (OP_SYMBOL->key -> first var index). The original used a + * std::map, var_t>. We flatten to (key, byte) -> var. */ +typedef struct { + int key; + int byte; + short var; +} sym_map_entry_t; + +typedef struct { + sym_map_entry_t *items; + size_t n; + size_t cap; +} sym_map_t; + +static void sym_map_init(sym_map_t *m) { m->items = NULL; m->n = m->cap = 0; } +static void sym_map_free(sym_map_t *m) { free(m->items); m->items = NULL; m->n = m->cap = 0; } + +static int sym_map_find(const sym_map_t *m, int key, int byte, short *out) { + size_t i; + for (i = 0; i < m->n; i++) + if (m->items[i].key == key && m->items[i].byte == byte) { + *out = m->items[i].var; return 1; + } + return 0; +} + +static void sym_map_put(sym_map_t *m, int key, int byte, short var) { + if (m->n == m->cap) { + m->cap = m->cap ? m->cap * 2 : 32; + m->items = (sym_map_entry_t *)realloc(m->items, m->cap * sizeof(sym_map_entry_t)); + } + m->items[m->n].key = key; + m->items[m->n].byte = byte; + m->items[m->n].var = var; + m->n++; +} + +/* Key -> cfg vertex index map. */ +typedef struct { + int *keys; + unsigned int *idxs; + size_t n, cap; +} key_idx_map_t; + +static void kim_init(key_idx_map_t *m) { m->keys = NULL; m->idxs = NULL; m->n = m->cap = 0; } +static void kim_free(key_idx_map_t *m) { free(m->keys); free(m->idxs); m->keys = NULL; m->idxs = NULL; m->n = m->cap = 0; } + +static void kim_put(key_idx_map_t *m, int key, unsigned int idx) { + if (m->n == m->cap) { + m->cap = m->cap ? m->cap * 2 : 32; + m->keys = (int *)realloc(m->keys, m->cap * sizeof(int)); + m->idxs = (unsigned int *)realloc(m->idxs, m->cap * sizeof(unsigned int)); + } + m->keys[m->n] = key; + m->idxs[m->n] = idx; + m->n++; +} + +static int kim_get(const key_idx_map_t *m, int key, unsigned int *out) { + size_t i; + for (i = 0; i < m->n; i++) + if (m->keys[i] == key) { *out = m->idxs[i]; return 1; } + return 0; +} + +static void add_operand_to_cfg_node(cfg_node_t *n, operand *o, const sym_map_t *sm) { + if (!o || !IS_SYMOP(o)) return; + int k0key = OP_SYMBOL_CONST(o)->key; + short v0; + if (!sym_map_find(sm, k0key, 0, &v0)) return; + if (cfg_operands_has_key(n, k0key)) return; + int nRegs = OP_SYMBOL_CONST(o)->nRegs; + int k; + for (k = 0; k < nRegs; k++) { + short v; + if (sym_map_find(sm, k0key, k, &v)) + cfg_operands_insert(n, k0key, v); + } +} + +/* -------------------- create_cfg -------------------- */ + +iCode *ralloc_create_cfg(cfg_ralloc_t *cfg, con_t *con, ebbIndex *ebbi) { + eBBlock **ebbs = ebbi->bbOrder; + iCode *start_ic, *ic; + + key_idx_map_t key_to_index; + sym_map_t sym_to_index; + kim_init(&key_to_index); + sym_map_init(&sym_to_index); + + start_ic = iCodeLabelOptimize(iCodeFromeBBlock(ebbs, ebbi->count)); + + /* Pass 1: create cfg vertices, build conflict graph vertices from live ranges. */ + { + int i; + short j; + wassertl(!cg_num_vertices(&cfg->g), "CFG non-empty before creation."); + for (ic = start_ic, i = 0, j = 0; ic; ic = ic->next, i++) { + cfg_add_vertex(cfg); + kim_put(&key_to_index, ic->key, (unsigned int)i); + + if (ic->op == SEND && ic->builtinSEND) { + operand *bi_parms[MAX_BUILTIN_ARGS]; + int nbi_parms; + getBuiltinParms(ic, &nbi_parms, bi_parms); + } + + ralloc_extra_ic_generated(ic); + + cfg->node[i].ic = ic; + + if (ic->generated) continue; + + int j2; + for (j2 = 0; j2 <= operandKey; j2++) { + if (bitVectBitValue(ic->rlive, j2)) { + symbol *sym = (symbol *)(hTabItemWithKey(liveRanges, j2)); + if (!sym->for_newralloc) continue; + + short dummy; + if (sym_map_find(&sym_to_index, j2, 0, &dummy)) continue; + + int k; + for (k = 0; k < sym->nRegs; k++) { + con_add_vertex(con); + con->node[j].v = j2; + con->node[j].byte = k; + con->node[j].size = sym->nRegs; + con->node[j].name = sym->name; + sym_map_put(&sym_to_index, j2, k, j); + int l; + for (l = 0; l < k; l++) + cg_add_edge(&con->g, (unsigned int)(j - l - 1), + (unsigned int)j, 0.0f); + j++; + } + } + } + } + } + + /* Pass 2: edges + operand maps + alive sets. */ + for (ic = start_ic; ic; ic = ic->next) { + unsigned int my_idx; + if (!kim_get(&key_to_index, ic->key, &my_idx)) continue; + + if (ic->op != GOTO && ic->op != RETURN && ic->op != JUMPTABLE && ic->next) { + unsigned int nx; + if (kim_get(&key_to_index, ic->next->key, &nx)) + cg_add_edge(&cfg->g, my_idx, nx, 0.0f); + } + + if (ic->op == GOTO) { + unsigned int tgt; + if (kim_get(&key_to_index, + eBBWithEntryLabel(ebbi, ic->label)->sch->key, &tgt)) + cg_add_edge(&cfg->g, my_idx, tgt, 0.0f); + } else if (ic->op == RETURN) { + unsigned int tgt; + if (kim_get(&key_to_index, + eBBWithEntryLabel(ebbi, returnLabel)->sch->key, &tgt)) + cg_add_edge(&cfg->g, my_idx, tgt, 0.0f); + } else if (ic->op == IFX) { + symbol *lbl = IC_TRUE(ic) ? IC_TRUE(ic) : IC_FALSE(ic); + unsigned int tgt; + if (kim_get(&key_to_index, eBBWithEntryLabel(ebbi, lbl)->sch->key, &tgt)) + cg_add_edge(&cfg->g, my_idx, tgt, 0.0f); + } else if (ic->op == JUMPTABLE) { + symbol *lbl; + for (lbl = (symbol *)setFirstItem(IC_JTLABELS(ic)); lbl; + lbl = (symbol *)setNextItem(IC_JTLABELS(ic))) { + unsigned int tgt; + if (kim_get(&key_to_index, + eBBWithEntryLabel(ebbi, lbl)->sch->key, &tgt)) + cg_add_edge(&cfg->g, my_idx, tgt, 0.0f); + } + } + + int i; + for (i = 0; i <= operandKey; i++) { + short dummy; + if (!sym_map_find(&sym_to_index, i, 0, &dummy)) continue; + if (bitVectBitValue(ic->rlive, i)) { + symbol *isym = (symbol *)hTabItemWithKey(liveRanges, i); + int k; + for (k = 0; k < isym->nRegs; k++) { + short v; + if (sym_map_find(&sym_to_index, i, k, &v)) + sss_insert(&cfg->node[my_idx].alive, v); + } + if (isym->block) + isym->block = btree_lowest_common_ancestor(isym->block, ic->block); + else + isym->block = ic->block; + } + } + + if (ic->op == IFX) + add_operand_to_cfg_node(&cfg->node[my_idx], IC_COND(ic), &sym_to_index); + else if (ic->op == JUMPTABLE) + add_operand_to_cfg_node(&cfg->node[my_idx], IC_JTCOND(ic), &sym_to_index); + else { + add_operand_to_cfg_node(&cfg->node[my_idx], IC_RESULT(ic), &sym_to_index); + add_operand_to_cfg_node(&cfg->node[my_idx], IC_LEFT(ic), &sym_to_index); + add_operand_to_cfg_node(&cfg->node[my_idx], IC_RIGHT(ic), &sym_to_index); + } + + ralloc_add_operand_conflicts_in_node(&cfg->node[my_idx], con); + } + + /* Non-connected live ranges workaround — unchanged from original logic. + * We skip this pass if there are no con vertices. */ + { + size_t ncon = cg_num_vertices(&con->g); + size_t ncfg = cg_num_vertices(&cfg->g); + for (short ii = (short)ncon - 1; ii >= 0; ii--) { + cgraph_t cfg2; + cg_init(&cfg2, CG_UNDIRECTED, 0); + cg_copy_topology(&cfg2, &cfg->g, CG_UNDIRECTED); + /* "Remove" vertices where alive does not contain ii by clearing their edges. + * In our cgraph we can't cheaply remove vertices, so we just zero out edges + * for nodes that don't contain ii. */ + unsigned int j; + for (j = 0; j < ncfg; j++) { + if (!sss_contains(&cfg->node[j].alive, ii)) { + /* Clear j's adjacency by removing each edge individually. */ + /* Copy out[j].dst into a buffer first since remove mutates. */ + size_t deg = cfg2.out[j].n; + unsigned int *buf = (unsigned int *)malloc(deg * sizeof(unsigned int)); + size_t kk; + for (kk = 0; kk < deg; kk++) buf[kk] = cfg2.out[j].dst[kk]; + for (kk = 0; kk < deg; kk++) cg_remove_edge(&cfg2, j, buf[kk]); + free(buf); + } + } + unsigned int *comp = (unsigned int *)malloc(ncfg * sizeof(unsigned int)); + size_t ncomp = cg_connected_components(&cfg2, comp); + if (ncomp > 1) { + fprintf(stderr, + "Warning: Non-connected liverange found and extended to " + "connected component of the CFG: %s. Please contact sdcc " + "authors with source code to reproduce.\n", + con->node[ii].name ? con->node[ii].name : "?"); + /* Recompute over the full (unfiltered) cfg topology. */ + cgraph_t cfg3; + cg_init(&cfg3, CG_UNDIRECTED, 0); + cg_copy_topology(&cfg3, &cfg->g, CG_UNDIRECTED); + unsigned int *comp2 = (unsigned int *)malloc(ncfg * sizeof(unsigned int)); + cg_connected_components(&cfg3, comp2); + unsigned int jj; + for (jj = 0; jj + 1 < ncfg; jj++) { + if (sss_contains(&cfg->node[jj].alive, ii)) { + unsigned int kk; + for (kk = 0; kk + 1 < ncfg; kk++) { + if (comp2[jj] == comp2[kk]) + sss_insert(&cfg->node[kk].alive, ii); + } + } + } + free(comp2); + cg_free(&cfg3); + } + free(comp); + cg_free(&cfg2); + } + } + + /* Compute dying sets. dying = alive - {variables needed by successors in + * meaningful ways; see original}. */ + { + size_t ncfg = cg_num_vertices(&cfg->g); + unsigned int i; + for (i = 0; i < ncfg; i++) { + sss_copy(&cfg->node[i].dying, &cfg->node[i].alive); + /* Walk outgoing neighbours. */ + size_t k; + unsigned int nbr; + float wt; + CG_FOREACH_OUT(&cfg->g, i, k, nbr, wt) { + (void)wt; + size_t vi; + for (vi = 0; vi < cfg->node[nbr].alive.n; vi++) { + short vv = cfg->node[nbr].alive.items[vi]; + const symbol *vsym = + (symbol *)hTabItemWithKey(liveRanges, con->node[vv].v); + const operand *left = IC_LEFT(cfg->node[nbr].ic); + const operand *right = IC_RIGHT(cfg->node[nbr].ic); + const operand *result = IC_RESULT(cfg->node[nbr].ic); + if (!POINTER_SET(cfg->node[nbr].ic) && + (!left || !IS_SYMOP(left) || + OP_SYMBOL_CONST(left)->key != vsym->key) && + (!right || !IS_SYMOP(right) || + OP_SYMBOL_CONST(right)->key != vsym->key) && + result && IS_SYMOP(result) && + OP_SYMBOL_CONST(result)->key == vsym->key) + continue; + sss_erase(&cfg->node[i].dying, vv); + } + } + } + } + + /* Add conflict graph edges: pairs of surviving variables at each CFG node. */ + { + size_t ncfg = cg_num_vertices(&cfg->g); + unsigned int i; + for (i = 0; i < ncfg; i++) { + const iCode *icn = cfg->node[i].ic; + size_t vi; + for (vi = 0; vi < cfg->node[i].alive.n; vi++) { + short v = cfg->node[i].alive.items[vi]; + if (sss_contains(&cfg->node[i].dying, v)) continue; + /* Skip variables that are the "result of this instruction" — no self-conflict. */ + int skip_this = 0; + if (icn->op != IFX && icn->op != JUMPTABLE && IC_RESULT(icn) && + IS_SYMOP(IC_RESULT(icn))) { + size_t oe; + size_t os = cfg_operands_equal_range( + &cfg->node[i], OP_SYMBOL_CONST(IC_RESULT(icn))->key, &oe); + size_t oi; + for (oi = os; oi < oe; oi++) + if (cfg->node[i].operands[oi].var == v) { skip_this = 1; break; } + } + if (skip_this) continue; + + size_t vi2; + for (vi2 = 0; vi2 < cfg->node[i].alive.n; vi2++) { + short v2 = cfg->node[i].alive.items[vi2]; + if (v == v2) continue; + if (sss_contains(&cfg->node[i].dying, v2)) continue; + if (!cg_has_edge(&con->g, (unsigned int)v, (unsigned int)v2)) + cg_add_edge(&con->g, (unsigned int)v, (unsigned int)v2, 0.0f); + } + } + } + } + + kim_free(&key_to_index); + sym_map_free(&sym_to_index); + return start_ic; +} + +/* -------------------- alive_tree_dec -------------------- */ + +void ralloc_alive_tree_dec(tree_dec_ralloc_t *T, const cfg_ralloc_t *G) { + size_t nt = cg_num_vertices(&T->td.g); + unsigned int i; + for (i = 0; i < nt; i++) { + sss_clear(&T->node[i].alive); + size_t bi; + for (bi = 0; bi < T->td.bag[i].n; bi++) { + unsigned int v = T->td.bag[i].items[bi]; + size_t k; + for (k = 0; k < G->node[v].alive.n; k++) + sss_insert(&T->node[i].alive, G->node[v].alive.items[k]); + } + } +} + +/* -------------------- assignment helpers for DP -------------------- */ + +static int assignment_conflict(const assignment_t *a, const con_t *I, short v, + reg_t r) { + size_t i; + for (i = 0; i < a->local.n; i++) { + short w = a->local.items[i]; + if (a->global[w] != r) continue; + if (cg_has_edge(&I->g, (unsigned int)w, (unsigned int)v)) return 1; + } + return 0; +} + +static int assignments_locally_same(const assignment_t *a1, const assignment_t *a2) { + if (!sss_equal(&a1->local, &a2->local)) return 0; + size_t i; + for (i = 0; i < a1->local.n; i++) { + short v = a1->local.items[i]; + if (a1->global[v] != a2->global[v]) return 0; + } + return 1; +} + +/* Compute set-intersection (a.local ∩ G[i].alive) into out (caller-init'd). */ +static void sss_intersect_into(sssset_t *out, const sssset_t *a, const sssset_t *b) { + sss_clear(out); + size_t i = 0, j = 0; + while (i < a->n && j < b->n) { + if (a->items[i] < b->items[j]) i++; + else if (a->items[i] > b->items[j]) j++; + else { sss_insert(out, a->items[i]); i++; j++; } + } +} + +static void assignments_introduce_instruction(tree_dec_ralloc_node_t *n, + unsigned short i, + const cfg_ralloc_t *G) { + assignment_node_t *an; + sssset_t tmp; + sss_init(&tmp); + for (an = n->alist_head; an; an = an->next) { + sss_intersect_into(&tmp, &an->a.local, &G->node[i].alive); + i_assignment_t ia; + i_assignment_init(&ia); + size_t k; + for (k = 0; k < tmp.n; k++) { + short v = tmp.items[k]; + if (an->a.global[v] >= 0) + i_assignment_add_var(&ia, v, an->a.global[v]); + } + an->a.i_assignment = ia; + } + sss_free(&tmp); +} + +static void assignments_introduce_variable(tree_dec_ralloc_node_t *n, + unsigned short i, short v, + const cfg_ralloc_t *G, const con_t *I) { + /* Take a snapshot of the current list head/tail; new entries appended during + * the loop must not be revisited. */ + size_t orig_n = n->alist_n; + assignment_node_t *an = n->alist_head; + size_t seen = 0; + int a_initialized; + assignment_t a; + assignment_init(&a); + + while (an && seen < orig_n) { + a_initialized = 0; + reg_t r; + for (r = 0; r < port->num_regs; r++) { + if (!assignment_conflict(&an->a, I, v, r)) { + if (!a_initialized) { + assignment_copy(&a, &an->a); + an->a.marked = 1; + a.marked = 0; + sss_insert(&a.local, v); + a_initialized = 1; + } + a.global[v] = r; + i_assignment_add_var(&a.i_assignment, v, r); + if (!ralloc_assignment_hopeless(&a, i, G, I, v)) + alist_push_back_copy(n, &a); + i_assignment_remove_var(&a.i_assignment, v); + } + } + an = an->next; + seen++; + } + assignment_free(&a); +} + +/* -------------------- drop_worst_assignments -------------------- */ + +typedef struct { + assignment_node_t *node; + float s; +} drop_rep_t; + +static float compability_cost(const assignment_t *a, const assignment_t *ac, + const con_t *I) { + float c = 0.0f; + size_t vi; + (void)I; + for (vi = 0; vi < ac->local.n; vi++) { + short v = ac->local.items[vi]; + if (a->global[v] != ac->global[v]) { c += 1000.0f; continue; } + } + return c; +} + +static int drop_rep_cmp(const void *x, const void *y) { + float a = ((const drop_rep_t *)x)->s; + float b = ((const drop_rep_t *)y)->s; + if (a < b) return -1; + if (a > b) return 1; + return 0; +} + +static void drop_worst_assignments(tree_dec_ralloc_node_t *n, unsigned short i, + const cfg_ralloc_t *G, const con_t *I, + const assignment_t *ac, + int *assignment_optimal) { + size_t alist_size = n->alist_n; + if (alist_size * (size_t)port->num_regs <= (size_t)options.max_allocs_per_node || + alist_size <= 1) + return; + + *assignment_optimal = 0; + + drop_rep_t *arep = (drop_rep_t *)malloc(alist_size * sizeof(drop_rep_t)); + size_t idx = 0; + assignment_node_t *an; + for (an = n->alist_head; an; an = an->next) { + arep[idx].node = an; + arep[idx].s = an->a.s + ralloc_rough_cost_estimate(&an->a, i, G, I) + + compability_cost(&an->a, ac, I); + idx++; + } + + /* Keep the best `keep` entries (excluding the very first one, which we + * preserve always — matches the std::nth_element with `arep + 1`). */ + size_t keep = (size_t)options.max_allocs_per_node / (size_t)port->num_regs; + if (keep == 0) keep = 1; + /* Sort the suffix [1..alist_size) by score and drop the tail. */ + qsort(arep + 1, alist_size - 1, sizeof(drop_rep_t), drop_rep_cmp); + + size_t k; + for (k = keep + 1; k < alist_size; k++) { + alist_erase(n, arep[k].node); + } + free(arep); +} + +/* -------------------- tree_dec_ralloc_leaf -------------------- */ + +static void tree_dec_ralloc_leaf(tree_dec_ralloc_t *T, unsigned int t, + const cfg_ralloc_t *G, const con_t *I) { + (void)G; + assignment_t a; + assignment_init(&a); + a.s = 0.0f; + a.global_n = cg_num_vertices(&I->g); + a.global = (signed char *)malloc(a.global_n * sizeof(signed char)); + size_t k; + for (k = 0; k < a.global_n; k++) a.global[k] = -1; + alist_push_back_move(&T->node[t], &a); + assignment_free(&a); +} + +/* -------------------- tree_dec_ralloc_introduce -------------------- */ + +static unsigned int first_out_neighbour(const cgraph_t *g, unsigned int v) { + return g->out[v].dst[0]; +} + +static void tree_dec_ralloc_introduce(tree_dec_ralloc_t *T, unsigned int t, + const cfg_ralloc_t *G, const con_t *I, + const assignment_t *ac, + int *assignment_optimal) { + unsigned int c = first_out_neighbour(&T->td.g, t); + + alist_swap(&T->node[t], &T->node[c]); + + sssset_t new_vars; + sss_init(&new_vars); + { + /* set_difference: T[t].alive - T[c].alive where alive is sssset. */ + size_t i = 0, j = 0; + while (i < T->node[t].alive.n && j < T->node[c].alive.n) { + short a = T->node[t].alive.items[i]; + short b = T->node[c].alive.items[j]; + if (a < b) { sss_insert(&new_vars, a); i++; } + else if (a > b) j++; + else { i++; j++; } + } + while (i < T->node[t].alive.n) { + sss_insert(&new_vars, T->node[t].alive.items[i]); i++; + } + } + + /* set_difference over bag (unsigned int). */ + unsigned int new_i; + { + size_t i = 0, j = 0; + new_i = 0; + int found = 0; + while (i < T->td.bag[t].n && j < T->td.bag[c].n) { + unsigned int a = T->td.bag[t].items[i]; + unsigned int b = T->td.bag[c].items[j]; + if (a < b) { new_i = a; found = 1; break; } + else if (a > b) j++; + else { i++; j++; } + } + if (!found && i < T->td.bag[t].n) new_i = T->td.bag[t].items[i]; + } + unsigned short i_idx = (unsigned short)new_i; + + assignments_introduce_instruction(&T->node[t], i_idx, G); + + size_t vi; + for (vi = 0; vi < new_vars.n; vi++) { + drop_worst_assignments(&T->node[t], i_idx, G, I, ac, assignment_optimal); + assignments_introduce_variable(&T->node[t], i_idx, new_vars.items[vi], G, I); + } + + /* Accumulate instruction cost; erase assignments with infinite cost. */ + { + assignment_node_t *an = T->node[t].alist_head; + while (an) { + float c_inst = ralloc_instruction_cost(&an->a, i_idx, G, I); + assignment_icost_set(&an->a, (int)i_idx, c_inst); + an->a.s += c_inst; + if (ra_is_inf(an->a.s)) { + an = alist_erase(&T->node[t], an); + } else { + an = an->next; + } + } + } + + sss_free(&new_vars); +} + +/* -------------------- tree_dec_ralloc_forget -------------------- */ + +static void tree_dec_ralloc_forget(tree_dec_ralloc_t *T, unsigned int t, + const cfg_ralloc_t *G, const con_t *I) { + (void)G; (void)I; + unsigned int c = first_out_neighbour(&T->td.g, t); + + alist_swap(&T->node[t], &T->node[c]); + + /* old_inst: T[c].bag - T[t].bag. Take first element. */ + unsigned int old_i; + int old_i_found = 0; + { + size_t i = 0, j = 0; + while (i < T->td.bag[c].n && j < T->td.bag[t].n) { + unsigned int a = T->td.bag[c].items[i]; + unsigned int b = T->td.bag[t].items[j]; + if (a < b) { old_i = a; old_i_found = 1; break; } + else if (a > b) j++; + else { i++; j++; } + } + if (!old_i_found && i < T->td.bag[c].n) { old_i = T->td.bag[c].items[i]; old_i_found = 1; } + } + + /* old_vars: T[c].alive - T[t].alive. */ + sssset_t old_vars; + sss_init(&old_vars); + { + size_t i = 0, j = 0; + while (i < T->node[c].alive.n && j < T->node[t].alive.n) { + short a = T->node[c].alive.items[i]; + short b = T->node[t].alive.items[j]; + if (a < b) { sss_insert(&old_vars, a); i++; } + else if (a > b) j++; + else { i++; j++; } + } + while (i < T->node[c].alive.n) { + sss_insert(&old_vars, T->node[c].alive.items[i]); i++; + } + } + + /* Restrict each assignment's local to current. */ + { + assignment_node_t *an; + for (an = T->node[t].alist_head; an; an = an->next) { + size_t k; + for (k = 0; k < old_vars.n; k++) + sss_erase(&an->a.local, old_vars.items[k]); + if (old_i_found) + assignment_icost_erase(&an->a, (int)old_i); + } + } + + alist_sort(&T->node[t]); + + /* Collapse locally-identical assignments, keeping the minimum-cost one. */ + { + assignment_node_t *ai = T->node[t].alist_head; + while (ai) { + assignment_node_t *aif = ai; + ai = ai->next; + while (ai && assignments_locally_same(&aif->a, &ai->a)) { + if (aif->a.s > ai->a.s) { + alist_erase(&T->node[t], aif); + aif = ai; + ai = ai->next; + } else { + ai = alist_erase(&T->node[t], ai); + } + } + } + } + + sss_free(&old_vars); +} + +/* -------------------- tree_dec_ralloc_join -------------------- */ + +static void tree_dec_ralloc_join(tree_dec_ralloc_t *T, unsigned int t, + const cfg_ralloc_t *G, const con_t *I) { + (void)G; (void)I; + unsigned int c2 = T->td.g.out[t].dst[0]; + unsigned int c3 = T->td.g.out[t].dst[1]; + + tree_dec_ralloc_node_t *alist1 = &T->node[t]; + tree_dec_ralloc_node_t *alist2 = &T->node[c2]; + tree_dec_ralloc_node_t *alist3 = &T->node[c3]; + + alist_sort(alist2); + alist_sort(alist3); + + assignment_node_t *ai2 = alist2->alist_head; + assignment_node_t *ai3 = alist3->alist_head; + while (ai2 && ai3) { + if (assignments_locally_same(&ai2->a, &ai3->a)) { + ai2->a.s += ai3->a.s; + /* Avoid double-counting instruction costs in shared bag. */ + size_t bi; + for (bi = 0; bi < T->td.bag[t].n; bi++) { + int key = (int)T->td.bag[t].items[bi]; + float v; + if (assignment_icost_get(&ai2->a, key, &v)) + ai2->a.s -= v; + } + size_t k; + for (k = 0; k < ai2->a.global_n; k++) + if (ai2->a.global[k] == -1) ai2->a.global[k] = ai3->a.global[k]; + alist_push_back_copy(alist1, &ai2->a); + ai2 = ai2->next; + ai3 = ai3->next; + } else { + int cmp = assignment_compare(&ai2->a, &ai3->a); + if (cmp < 0) ai2 = ai2->next; + else if (cmp > 0) ai3 = ai3->next; + else { /* same by cmp but not "locally same" should be impossible; guard */ + ai2 = ai2->next; ai3 = ai3->next; + } + } + } + + alist_clear(alist2); + alist_clear(alist3); +} + +/* -------------------- tree_dec_ralloc_nodes -------------------- */ + +static void tree_dec_ralloc_nodes(tree_dec_ralloc_t *T, unsigned int t, + const cfg_ralloc_t *G, const con_t *I, + const assignment_t *ac, + int *assignment_optimal) { + size_t od = cg_out_degree(&T->td.g, t); + switch (od) { + case 0: + tree_dec_ralloc_leaf(T, t, G, I); + break; + case 1: { + unsigned int c0 = T->td.g.out[t].dst[0]; + tree_dec_ralloc_nodes(T, c0, G, I, ac, assignment_optimal); + if (T->td.bag[c0].n < T->td.bag[t].n) + tree_dec_ralloc_introduce(T, t, G, I, ac, assignment_optimal); + else + tree_dec_ralloc_forget(T, t, G, I); + break; + } + case 2: { + unsigned int c0 = T->td.g.out[t].dst[0]; + unsigned int c1 = T->td.g.out[t].dst[1]; + tree_dec_ralloc_nodes(T, c0, G, I, ac, assignment_optimal); + { + assignment_t ac2; + assignment_init(&ac2); + ralloc_get_best_local_assignment_biased(&ac2, c0, T); + tree_dec_ralloc_nodes(T, c1, G, I, &ac2, assignment_optimal); + assignment_free(&ac2); + } + tree_dec_ralloc_join(T, t, G, I); + break; + } + default: + fprintf(stderr, "Not nice.\n"); + break; + } +} + +/* -------------------- re_root / good_re_root -------------------- */ + +/* (vertex, size) pair used by find_best_root. */ +typedef struct { unsigned int v; size_t s; } vs_pair_t; + +static vs_pair_t find_best_root(const tree_dec_ralloc_t *T, unsigned int t, + size_t t_s, unsigned int t_old, size_t t_old_s) { + size_t od = cg_out_degree(&T->td.g, t); + vs_pair_t r; + switch (od) { + case 0: + if (t_s > t_old_s) { r.v = t; r.s = t_s; } + else { r.v = t_old; r.s = t_old_s; } + return r; + case 1: { + unsigned int c = T->td.g.out[t].dst[0]; + size_t cs = T->node[c].alive.n ? T->node[c].alive.n : t_s; + return find_best_root(T, c, cs, t_old, t_old_s); + } + case 2: { + unsigned int c0 = T->td.g.out[t].dst[0]; + unsigned int c1 = T->td.g.out[t].dst[1]; + size_t c0s = T->node[c0].alive.n ? T->node[c0].alive.n : t_s; + vs_pair_t t0 = find_best_root(T, c0, c0s, t_old, t_old_s); + size_t c1s = T->node[c1].alive.n ? T->node[c1].alive.n : t_s; + unsigned int new_old = t0.s > t_old_s ? t0.v : t_old; + size_t new_old_s = t0.s > t_old_s ? t0.s : t_old_s; + return find_best_root(T, c1, c1s, new_old, new_old_s); + } + default: + fprintf(stderr, "Not nice.\n"); + r.v = t_old; r.s = t_old_s; return r; + } +} + +static void re_root(tree_dec_ralloc_t *T, unsigned int t) { + if (T->td.g.in[t].n == 0) return; + unsigned int s0 = t; + unsigned int s1 = T->td.g.in[t].dst[0]; + while (T->td.g.in[s1].n > 0) { + unsigned int s2 = T->td.g.in[s1].dst[0]; + cg_remove_edge(&T->td.g, s1, s0); + cg_add_edge(&T->td.g, s0, s1, 0.0f); + s0 = s1; + s1 = s2; + } + cg_remove_edge(&T->td.g, s1, s0); + cg_add_edge(&T->td.g, s0, s1, 0.0f); +} + +void ralloc_good_re_root(tree_dec_ralloc_t *T) { + unsigned int t = tree_dec_find_root(&T->td); + + /* Walk down while the first child has empty alive. */ + while (cg_out_degree(&T->td.g, t) > 0) { + unsigned int c = T->td.g.out[t].dst[0]; + if (T->node[c].alive.n) break; + t = c; + } + + size_t t_s = (cg_out_degree(&T->td.g, t) > 0 ? + T->node[T->td.g.out[t].dst[0]].alive.n : 0); + vs_pair_t best = find_best_root(T, t, t_s, t, t_s); + t = best.v; + + if (T->node[t].alive.n) { + fprintf(stderr, "Error: Invalid root.\n"); + return; + } + + re_root(T, t); +} + +/* -------------------- tree_dec_ralloc top-level -------------------- */ + +int ralloc_tree_dec_ralloc(tree_dec_ralloc_t *T, const cfg_ralloc_t *G, + const con_t *I, assignment_t *winner_out) { + int assignment_optimal = 1; + assignment_t ac; + assignment_init(&ac); + + unsigned int root = tree_dec_find_root(&T->td); + tree_dec_ralloc_nodes(T, root, G, I, &ac, &assignment_optimal); + + /* Winner = first assignment in root's list (should be the only/best one). */ + if (!T->node[root].alist_head) { + fprintf(stderr, "ERROR: No Assignments at root\n"); + exit(-1); + } + assignment_copy(winner_out, &T->node[root].alist_head->a); + + if (winner_out->global_n != cg_num_vertices(&I->g)) { + fprintf(stderr, "ERROR: No Assignments at root\n"); + exit(-1); + } + + assignment_free(&ac); + return !assignment_optimal; +} diff --git a/src/SDCCralloc.h b/src/SDCCralloc.h new file mode 100644 index 000000000..428d4bd89 --- /dev/null +++ b/src/SDCCralloc.h @@ -0,0 +1,205 @@ +/* SDCCralloc.h — public API for the C port of SDCCralloc.hpp. + * + * An optimal, polynomial-time register allocator (Krause 2013). The generic + * DP algorithm lives in SDCCralloc.c; the six port-specific customization + * hooks are defined by the backend (see backend/ralloc2.c for Z80). + */ +#ifndef KCC_SDCCRALLOC_H +#define KCC_SDCCRALLOC_H + +#include + +#include "SDCCtree_dec.h" +#include "util/cgraph.h" +#include "util/uiset.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct iCode; /* from common.h / SDCCicode.h */ +struct ebbIndex; /* from SDCCBBlock.h */ + +/* A signed-short variable index; matches var_t in the original. */ +typedef short var_t; +typedef signed char reg_t; + +/* Integer upper bound on port->num_regs; used to size fixed per-instruction + * state. */ +#define MAX_NUM_REGS 9 + +/* -------------------- per-instruction assignment state -------------------- */ + +typedef struct { + short registers[MAX_NUM_REGS][2]; +} i_assignment_t; + +void i_assignment_init(i_assignment_t *ia); +void i_assignment_add_var(i_assignment_t *ia, short v, signed char r); +void i_assignment_remove_var(i_assignment_t *ia, short v); + +/* -------------------- map helper -------------------- */ + +typedef struct { + int key; + float val; +} icost_entry_t; + +/* -------------------- assignment (DP state) -------------------- */ + +typedef struct { + float s; + sssset_t local; /* set */ + signed char *global; /* global[var] = reg (-1 if none); length global_n */ + size_t global_n; + + icost_entry_t *i_costs; /* sorted by key */ + size_t i_costs_n; + size_t i_costs_cap; + + i_assignment_t i_assignment; + int marked; +} assignment_t; + +void assignment_init(assignment_t *a); +void assignment_free(assignment_t *a); +/* Deep copy src->dst. Frees dst first. */ +void assignment_copy(assignment_t *dst, const assignment_t *src); +/* Move src -> dst (dst takes ownership; src becomes empty). */ +void assignment_move(assignment_t *dst, assignment_t *src); + +/* icost map ops. */ +void assignment_icost_set(assignment_t *a, int key, float val); +int assignment_icost_get(const assignment_t *a, int key, float *out); +void assignment_icost_erase(assignment_t *a, int key); + +/* Lexicographic compare for the sorted-merge at join nodes (same semantics + * as the original `operator<`). Returns -1/0/1. */ +int assignment_compare(const assignment_t *x, const assignment_t *y); + +/* -------------------- cfg node -------------------- */ + +/* Multimap entry: key (SDCC symbol key) -> var; multiple entries may share + * key (one per byte). Kept sorted by key. */ +typedef struct { + int key; + short var; +} operand_entry_t; + +typedef struct { + struct iCode *ic; + operand_entry_t *operands; + size_t operands_n; + size_t operands_cap; + sssset_t alive; + sssset_t dying; +} cfg_node_t; + +typedef struct { + cgraph_t g; /* CG_BIDIRECTIONAL */ + cfg_node_t *node; + size_t cap; +} cfg_ralloc_t; + +void cfg_ralloc_init(cfg_ralloc_t *c); +void cfg_ralloc_free(cfg_ralloc_t *c); + +/* Return first index with key >= k (lower_bound in operands array). */ +size_t cfg_operands_lower_bound(const cfg_node_t *n, int k); +/* Return start/end indices of entries with operand key k. end stored in *e_out. */ +size_t cfg_operands_equal_range(const cfg_node_t *n, int k, size_t *e_out); + +/* -------------------- conflict graph -------------------- */ + +typedef struct { + int v; + int byte; + int size; + char *name; +} con_node_t; + +typedef struct { + cgraph_t g; /* CG_UNDIRECTED */ + con_node_t *node; + size_t cap; +} con_t; + +void con_init(con_t *c); +void con_free(con_t *c); + +/* -------------------- tree decomposition node data -------------------- */ + +typedef struct assignment_node { + assignment_t a; + struct assignment_node *prev; + struct assignment_node *next; +} assignment_node_t; + +typedef struct { + sssset_t alive; + assignment_node_t *alist_head; + assignment_node_t *alist_tail; + size_t alist_n; +} tree_dec_ralloc_node_t; + +typedef struct { + tree_dec_t td; + tree_dec_ralloc_node_t *node; + size_t cap; +} tree_dec_ralloc_t; + +void tree_dec_ralloc_init(tree_dec_ralloc_t *T); +void tree_dec_ralloc_free(tree_dec_ralloc_t *T); + +/* -------------------- generic algorithm entry points -------------------- */ + +/* Build the CFG + conflict graph from SDCC iCodes. Returns the first ic. */ +struct iCode *ralloc_create_cfg(cfg_ralloc_t *cfg, con_t *conflict_graph, + struct ebbIndex *ebbi); + +/* Populate tree_dec.alive sets from the CFG. */ +void ralloc_alive_tree_dec(tree_dec_ralloc_t *T, const cfg_ralloc_t *G); + +/* Re-root tree decomposition to improve the assignment-removal heuristic. */ +void ralloc_good_re_root(tree_dec_ralloc_t *T); + +/* Top-level allocator. Returns 0 if assignment is optimal, 1 if pruning kicked + * in. Writes the best assignment to the winner out-parameter. Caller owns the + * returned assignment's internal buffers and must assignment_free() it. */ +int ralloc_tree_dec_ralloc(tree_dec_ralloc_t *T, const cfg_ralloc_t *G, + const con_t *I, assignment_t *winner_out); + +/* -------------------- port-specific customization hooks -------------------- */ + +/* Computed cost of running instruction i under assignment a. */ +float ralloc_instruction_cost(const assignment_t *a, unsigned int i, + const cfg_ralloc_t *G, const con_t *I); + +/* Early-prune: may the given partial assignment be extended to a valid one? */ +int ralloc_assignment_hopeless(const assignment_t *a, unsigned int i, + const cfg_ralloc_t *G, const con_t *I, + short lastvar); + +/* Rough cost estimate, used for dropping worst assignments. */ +float ralloc_rough_cost_estimate(const assignment_t *a, unsigned int i, + const cfg_ralloc_t *G, const con_t *I); + +/* Add conflict-graph edges due to operand-usage patterns at this cfg node. */ +void ralloc_add_operand_conflicts_in_node(const cfg_node_t *n, con_t *I); + +/* Like get_best_local_assignment but biased to avoid "risky" registers. */ +void ralloc_get_best_local_assignment_biased(assignment_t *out, + unsigned int t, + const tree_dec_ralloc_t *T); + +/* Mark iCodes whose code gets generated as a side effect of another. */ +void ralloc_extra_ic_generated(struct iCode *ic); + +/* Public entry point used by ralloc.c. */ +struct iCode *z80_ralloc2_cc(struct ebbIndex *ebbi); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/SDCCralloc.hpp b/src/SDCCralloc.hpp deleted file mode 100644 index b0f9105fa..000000000 --- a/src/SDCCralloc.hpp +++ /dev/null @@ -1,1267 +0,0 @@ -// Philipp Klaus Krause, philipp@informatik.uni-frankfurt.de, pkk@spth.de, 2010 -// - 2013 -// -// (c) 2010-2012 Goethe-Universität Frankfurt -// -// This program is free software; you can redistribute it and/or modify it -// under the terms of the GNU General Public License as published by the -// Free Software Foundation; either version 2, or (at your option) any -// later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -// -// An optimal, polynomial-time register allocator. -// -// For details, see: -// -// Philipp Klaus Krause, -// "Optimal Register Allocation in Polynomial Time", -// Compiler Construction - 22nd International Conference, CC 2013, Held as Part -// of the European Joint Conferences on Theory and Practice of Software, ETAPS -// 2013. Proceedings, Lecture Notes in Computer Science, volume 7791, pp. 1-20. -// Springer, -// 2013. -// -// To use this from a port do the following: -// -// 1) Supply a cost function -// template -// float instruction_cost(const assignment &a, unsigned short int i, const G_t -// &G, const I_t &I); Which can range from simple, e.g. cost 1 for each byte -// accessed in a register, cost 4 for each byte accessed in memory to quite -// involved, e.g. the number of bytes of code the code generator would generate. -// -// 2) Call -// create_cfg(), thorup_tree_decomposition(), nicify(), alive_tree_dec(), -// tree_dec_ralloc_nodes(). -// -// The Z80 port can serve as an example, see z80_ralloc2_cc() in z80/ralloc2.cc. - -#ifndef SDCCRALLOC_HH -#define SDCCRALLOC_HH 1 - -#include -#include -#include -#include -#include - -#include -#if BOOST_VERSION >= 106000 -// Workaround for https://svn.boost.org/trac/boost/ticket/11880 -#include -#endif -#include -#include -#include - -#include "SDCCtree_dec.hpp" -#include "common.h" - -#ifdef HAVE_STX_BTREE_SET_H -#include -#endif -#ifdef HAVE_STX_BTREE_MAP_H -#include -#endif - -extern "C" { -#include "SDCCbtree.h" -} - -typedef short int var_t; -typedef signed char reg_t; - -// Integer constant upper bound on port->num_regs -#define MAX_NUM_REGS 9 - -// Assignment at an instruction -struct i_assignment_t { - var_t registers[MAX_NUM_REGS][2]; - - i_assignment_t(void) { - for (reg_t r = 0; r < MAX_NUM_REGS; r++) - for (unsigned int i = 0; i < 2; i++) - registers[r][i] = -1; - } - -#if 0 - bool operator<(const i_assignment_t &i_a) const - { - for (reg_t r = 0; r < port->num_regs; r++) - for (unsigned int i = 0; i < 2; i++) - { - if (registers[r][i] < i_a.registers[r][i]) - return(true); - else if (registers[r][i] > i_a.registers[r][i]) - return(false); - } - return(false); - } -#endif - - void add_var(var_t v, reg_t r) { - if (registers[r][1] < v) { - registers[r][0] = registers[r][1]; - registers[r][1] = v; - } else - registers[r][0] = v; - } - - void remove_var(var_t v) { - for (reg_t r = 0; r < port->num_regs; r++) { - if (registers[r][1] == v) { - registers[r][1] = registers[r][0]; - registers[r][0] = -1; - } else if (registers[r][0] == v) - registers[r][0] = -1; - } - } -}; - -#ifdef HAVE_STX_BTREE_SET_H -typedef stx::btree_set varset_t; // Faster than std::set -#else -typedef std::set varset_t; -#endif -// typedef std::set, boost::fast_pool_allocator > -// varset_t; // Slower than ordinary std::set - -#ifdef HAVE_STX_BTREE_MAP_H -typedef stx::btree_map icosts_t; // Faster than std::map -#else -typedef std::map icosts_t; -#endif -// typedef std::tr1::unordered_set varset_t; // Speed about the same as -// std::set - -struct assignment { - float s; - - varset_t local; // Entries: var - std::vector - global; // Entries: global[var] = reg (-1 if no reg assigned) - icosts_t i_costs; // Costs for all instructions in bag (needed to avoid double - // counting costs at join nodes) - i_assignment_t i_assignment; // Assignment at the instruction currently being - // added in an introduce node; - - bool marked; - - bool operator<(const assignment &a) const { - varset_t::const_iterator i, ai, i_end, ai_end; - - i_end = local.end(); - ai_end = a.local.end(); - - for (i = local.begin(), ai = a.local.begin();; ++i, ++ai) { - if (i == i_end) - return (true); - if (ai == ai_end) - return (false); - - if (*i < *ai) - return (true); - if (*i > *ai) - return (false); - - if (global[*i] < a.global[*ai]) - return (true); - if (global[*i] > a.global[*ai]) - return (false); - } - } -}; - -typedef std::list assignment_list_t; -// typedef std::vector assignment_list_t; // Probably faster, but -// would require some code reorganization. - -struct tree_dec_node { - std::set bag; - std::set alive; - assignment_list_t assignments; - unsigned weight; // The weight is the number of nodes at which intermediate - // results need to be remembered. In general, to minimize - // memory consumption, at join nodes the child with maximum - // weight should be processed first. -}; - -typedef std::multimap operand_map_t; -// typedef stx::btree_multimap operand_map_t; // Slightly slower -// than std::multimap. - -struct cfg_node { - iCode *ic; - operand_map_t operands; - std::set alive; - std::set dying; - -#ifdef DEBUG_SEGV - cfg_node(void); -#endif -}; - -#ifdef DEBUG_SEGV -// This only exists to track down #3506333 and #3475617. -bool default_constructor_of_cfg_node_called; -cfg_node::cfg_node(void) { default_constructor_of_cfg_node_called = true; } -#endif - -struct con_node { - int v; - int byte; - int size; - char *name; -}; - -typedef boost::adjacency_list - tree_dec_t; -typedef boost::adjacency_list - con_t; -typedef boost::adjacency_matrix con2_t; -typedef boost::adjacency_list - cfg_t; -typedef boost::adjacency_list - cfg_sym_t; - -// Cost function. Port-specific. -template -static float instruction_cost(const assignment &a, unsigned short int i, - const G_t &G, const I_t &I); - -// For early removel of assignments that cannot be extended to valid -// assignments. Port-specific. -template -static bool assignment_hopeless(const assignment &a, unsigned short int i, - const G_t &G, const I_t &I, - const var_t lastvar); - -// Rough cost estimate. Port-specific. -template -static float rough_cost_estimate(const assignment &a, unsigned short int i, - const G_t &G, const I_t &I); - -// Avoid overwriting operands that are still needed by the result. -// Port-specific. -template -static void add_operand_conflicts_in_node(const cfg_node &n, I_t &I); - -// Port-specific -template -static void get_best_local_assignment_biased( - assignment &a, typename boost::graph_traits::vertex_descriptor t, - const T_t &T); - -// Code for another ic is generated when generating this one. Mark the other as -// generated. Port-specific. -static void extra_ic_generated(iCode *ic); - -inline void -add_operand_to_cfg_node(cfg_node &n, operand *o, - std::map, var_t> &sym_to_index) { - reg_t k; - if (o && IS_SYMOP(o) && - sym_to_index.find(std::pair(OP_SYMBOL_CONST(o)->key, 0)) != - sym_to_index.end()) { - if (n.operands.find(OP_SYMBOL_CONST(o)->key) == n.operands.end()) - for (k = 0; k < OP_SYMBOL_CONST(o)->nRegs; k++) - n.operands.insert(std::pair( - OP_SYMBOL_CONST(o)->key, - sym_to_index[std::pair(OP_SYMBOL_CONST(o)->key, k)])); - } -} - -// A quick-and-dirty function to get the CFG from sdcc. -static inline iCode *create_cfg(cfg_t &cfg, con_t &con, ebbIndex *ebbi) { - eBBlock **ebbs = ebbi->bbOrder; - iCode *start_ic; - iCode *ic; - - std::map key_to_index; - std::map, var_t> sym_to_index; - - start_ic = iCodeLabelOptimize(iCodeFromeBBlock(ebbs, ebbi->count)); - { - int i; - var_t j; - wassertl(!boost::num_vertices(cfg), "CFG non-empty before creation."); - for (ic = start_ic, i = 0, j = 0; ic; ic = ic->next, i++) { -#ifdef DEBUG_SEGV - default_constructor_of_cfg_node_called = false; -#endif - boost::add_vertex(cfg); - -#ifdef DEBUG_SEGV - wassertl(default_constructor_of_cfg_node_called, - "add_vertex failed to call default constructor of cfg_node!"); -#endif - wassertl(cfg[i].alive.empty(), "Alive set non-empty upon creation."); - key_to_index[ic->key] = i; - - if (ic->op == SEND && ic->builtinSEND) // Ensure that only the very first - // send iCode is active. - { - operand *bi_parms[MAX_BUILTIN_ARGS]; - int nbi_parms; - getBuiltinParms(ic, &nbi_parms, bi_parms); - } - - extra_ic_generated(ic); - - cfg[i].ic = ic; - - if (ic->generated) - continue; - - for (int j2 = 0; j2 <= operandKey; j2++) { - if (bitVectBitValue(ic->rlive, j2)) { - symbol *sym = (symbol *)(hTabItemWithKey(liveRanges, j2)); - - if (!sym->for_newralloc) - continue; - - // Add node to conflict graph: - if (sym_to_index.find(std::pair(j2, 0)) != - sym_to_index.end()) - continue; - - // Other parts of the allocator may rely on the variables - // corresponding to bytes from the same sdcc variable to have - // subsequent numbers. - for (reg_t k = 0; k < sym->nRegs; k++) { - boost::add_vertex(con); - con[j].v = j2; - con[j].byte = k; - con[j].size = sym->nRegs; - con[j].name = sym->name; - sym_to_index[std::pair(j2, k)] = j; - for (reg_t l = 0; l < k; l++) - boost::add_edge(j - l - 1, j, con); - j++; - } - } - } - } - } - - // Get control flow graph from sdcc. - for (ic = start_ic; ic; ic = ic->next) { - wassertl(key_to_index[ic->key] < boost::num_vertices(cfg), - "Node not in CFG."); - - if (ic->op != GOTO && ic->op != RETURN && ic->op != JUMPTABLE && ic->next) { - wassertl(key_to_index[ic->next->key] < boost::num_vertices(cfg), - "Next node not in CFG."); - boost::add_edge(key_to_index[ic->key], key_to_index[ic->next->key], cfg); - } - - if (ic->op == GOTO) { - wassertl(key_to_index[eBBWithEntryLabel(ebbi, ic->label)->sch->key] < - boost::num_vertices(cfg), - "GOTO target not in CFG."); - boost::add_edge( - key_to_index[ic->key], - key_to_index[eBBWithEntryLabel(ebbi, ic->label)->sch->key], cfg); - } else if (ic->op == RETURN) { - wassertl(key_to_index[eBBWithEntryLabel(ebbi, returnLabel)->sch->key] < - boost::num_vertices(cfg), - "RETURN target not in CFG."); - boost::add_edge( - key_to_index[ic->key], - key_to_index[eBBWithEntryLabel(ebbi, returnLabel)->sch->key], cfg); - } else if (ic->op == IFX) { - wassertl(key_to_index[eBBWithEntryLabel(ebbi, IC_TRUE(ic) ? IC_TRUE(ic) - : IC_FALSE(ic)) - ->sch->key] < boost::num_vertices(cfg), - "IFX target not in CFG."); - boost::add_edge( - key_to_index[ic->key], - key_to_index[eBBWithEntryLabel(ebbi, IC_TRUE(ic) ? IC_TRUE(ic) - : IC_FALSE(ic)) - ->sch->key], - cfg); - } else if (ic->op == JUMPTABLE) - for (symbol *lbl = (symbol *)(setFirstItem(IC_JTLABELS(ic))); lbl; - lbl = (symbol *)(setNextItem(IC_JTLABELS(ic)))) { - wassertl(key_to_index[eBBWithEntryLabel(ebbi, lbl)->sch->key] < - boost::num_vertices(cfg), - "GOTO target not in CFG."); - boost::add_edge(key_to_index[ic->key], - key_to_index[eBBWithEntryLabel(ebbi, lbl)->sch->key], - cfg); - } - - for (int i = 0; i <= operandKey; i++) { - if (sym_to_index.find(std::pair(i, 0)) == sym_to_index.end()) - continue; - - if (bitVectBitValue(ic->rlive, i)) { - symbol *isym = (symbol *)(hTabItemWithKey(liveRanges, i)); - for (reg_t k = 0; k < isym->nRegs; k++) { - wassert(key_to_index.find(ic->key) != key_to_index.end()); - wassert(sym_to_index.find(std::pair(i, k)) != - sym_to_index.end()); - wassertl(key_to_index[ic->key] < boost::num_vertices(cfg), - "Node not in CFG."); - cfg[key_to_index[ic->key]].alive.insert( - sym_to_index[std::pair(i, k)]); - } - - // TODO: Move this to a place where it also works when using the old - // allocator! - if (isym->block) - isym->block = btree_lowest_common_ancestor(isym->block, ic->block); - else - isym->block = ic->block; - } - } - - if (ic->op == IFX) - add_operand_to_cfg_node(cfg[key_to_index[ic->key]], IC_COND(ic), - sym_to_index); - else if (ic->op == JUMPTABLE) - add_operand_to_cfg_node(cfg[key_to_index[ic->key]], IC_JTCOND(ic), - sym_to_index); - else { - add_operand_to_cfg_node(cfg[key_to_index[ic->key]], IC_RESULT(ic), - sym_to_index); - add_operand_to_cfg_node(cfg[key_to_index[ic->key]], IC_LEFT(ic), - sym_to_index); - add_operand_to_cfg_node(cfg[key_to_index[ic->key]], IC_RIGHT(ic), - sym_to_index); - } - - // TODO: Extend live-ranges of returns of built-in function calls back to - // first SEND. - - add_operand_conflicts_in_node(cfg[key_to_index[ic->key]], con); - } - -#if 0 - // Get conflict graph from sdcc - for (var_t i = 0; static_cast::vertices_size_type>(i) < num_vertices(con); i++) - { - symbol *isym = (symbol *)(hTabItemWithKey(liveRanges, con[i].v)); - for (int j = 0; j <= operandKey; j++) - if (bitVectBitValue(isym->clashes, j)) - { - symbol *jsym = (symbol *)(hTabItemWithKey(liveRanges, j)); - if (sym_to_index.find(std::pair(j, 0)) == sym_to_index.end()) - continue; - for (reg_t k = 0; k < jsym->nRegs; k++) - boost::add_edge(i, sym_to_index[std::pair(j, k)], con); - } - } -#endif - - // Check for unconnected live ranges, some might have survived dead code - // elimination. This is essentially a workaround for broken dead code - // alimination. Todo: Improve efficiency, e.g. using subgraph or - // filtered_graph. Todo: Split live ranges instead? - for (var_t i = boost::num_vertices(con) - 1; i >= 0; i--) { - cfg_sym_t cfg2; - boost::copy_graph( - cfg, cfg2, - boost::vertex_copy(forget_properties()).edge_copy(forget_properties())); - for (int j = boost::num_vertices(cfg) - 1; j >= 0; j--) { - if (cfg[j].alive.find(i) == cfg[j].alive.end()) { - boost::clear_vertex(j, cfg2); - boost::remove_vertex(j, cfg2); - } - } - std::vector::vertices_size_type> component( - num_vertices(cfg2)); - if (boost::connected_components(cfg2, &component[0]) > 1) { - // Non-connected CFGs are created by at least GCSE and lospre. We now have - // a live-range splitter that fixes them, so this should no longer be - // necessary, but we leave this code here for now, so in case one gets - // through, we can still generate correct code. - std::cerr - << "Warning: Non-connected liverange found and extended to connected " - "component of the CFG:" - << con[i].name - << ". Please contact sdcc authors with source code to reproduce.\n"; - - cfg_sym_t cfg2; - boost::copy_graph(cfg, cfg2, - boost::vertex_copy(forget_properties()) - .edge_copy(forget_properties())); - std::vector::vertices_size_type> component( - num_vertices(cfg2)); - boost::connected_components(cfg2, &component[0]); - - for (boost::graph_traits::vertices_size_type j = 0; - j < boost::num_vertices(cfg) - 1; j++) { - if (cfg[j].alive.find(i) != cfg[j].alive.end()) { - for (boost::graph_traits::vertices_size_type k = 0; - k < boost::num_vertices(cfg) - 1; k++) { - if (component[j] == component[k]) - cfg[k].alive.insert(i); - } - } - } - } - } - - for (boost::graph_traits::vertices_size_type i = 0; - i < num_vertices(cfg); i++) { - cfg[i].dying = cfg[i].alive; - typedef boost::graph_traits::adjacency_iterator adjacency_iter_t; - adjacency_iter_t j, j_end; - for (boost::tie(j, j_end) = adjacent_vertices(i, cfg); j != j_end; ++j) { - std::set::const_iterator v, v_end; - for (v = cfg[*j].alive.begin(), v_end = cfg[*j].alive.end(); v != v_end; - ++v) { - const symbol *const vsym = - (symbol *)(hTabItemWithKey(liveRanges, con[*v].v)); - - const operand *const left = IC_LEFT(cfg[*j].ic); - const operand *const right = IC_RIGHT(cfg[*j].ic); - const operand *const result = IC_RESULT(cfg[*j].ic); - - if (!POINTER_SET(cfg[*j].ic) && - (!left || !IS_SYMOP(left) || - OP_SYMBOL_CONST(left)->key != vsym->key) && - (!right || !IS_SYMOP(right) || - OP_SYMBOL_CONST(right)->key != vsym->key) && - result && IS_SYMOP(result) && - OP_SYMBOL_CONST(result)->key == vsym->key) - continue; - - cfg[i].dying.erase(*v); - } - } - } - - // Construct conflict graph - for (boost::graph_traits::vertices_size_type i = 0; - i < num_vertices(cfg); i++) { - std::set::const_iterator v, v_end; - const iCode *ic = cfg[i].ic; - - for (v = cfg[i].alive.begin(), v_end = cfg[i].alive.end(); v != v_end; - ++v) { - std::set::const_iterator v2, v2_end; - - // Conflict between operands are handled by - // add_operand_conflicts_in_node(). - if (cfg[i].dying.find(*v) != cfg[i].dying.end()) - continue; - if (ic->op != IFX && ic->op != JUMPTABLE && IC_RESULT(ic) && - IS_SYMOP(IC_RESULT(ic))) { - operand_map_t::const_iterator oi, oi_end; - for (boost::tie(oi, oi_end) = cfg[i].operands.equal_range( - OP_SYMBOL_CONST(IC_RESULT(ic))->key); - oi != oi_end; ++oi) - if (oi->second == *v) - goto next_var; - } - - // Here, v is a variable that survives cfg[i]. - // TODO: Check if we can use v, ++v2 instead of cfg[i].alive.begin() to - // speed things up. - for (v2 = cfg[i].alive.begin(), v2_end = cfg[i].alive.end(); v2 != v2_end; - ++v2) { - if (*v == *v2) - continue; - if (cfg[i].dying.find(*v2) != cfg[i].dying.end()) - continue; - - boost::add_edge(*v, *v2, con); - } - - next_var:; - } - } - - return (start_ic); -} - -// Computes live ranges for tree decomposition from live ranges from cfg. -inline void alive_tree_dec(tree_dec_t &tree_dec, const cfg_t &cfg) { - for (unsigned int i = 0; i < num_vertices(tree_dec); i++) { - std::set::const_iterator v; - for (v = tree_dec[i].bag.begin(); v != tree_dec[i].bag.end(); ++v) - tree_dec[i].alive.insert(cfg[*v].alive.begin(), cfg[*v].alive.end()); - } -} - -#if defined(DEBUG_RALLOC_DEC) || defined(DEBUG_RALLOC_DEC_ASS) -static void print_assignment(const assignment &a) { - varset_t::const_iterator i; - std::cout << "["; - for (i = a.local.begin(); i != a.local.end(); ++i) - std::cout << "(" << int(*i) << ", " << int(a.global[*i]) << "), "; - std::cout << "c: " << a.s << "]"; -} -#endif - -template -bool assignment_conflict(const assignment &a, const I_t &I, var_t v, reg_t r) { - varset_t::const_iterator i, i_end; - - for (i = a.local.begin(), i_end = a.local.end(); i != i_end; ++i) { - if (a.global[*i] != r) - continue; - if (boost::edge(*i, v, I).second) - return (true); - } - - return (false); -} - -template -void assignments_introduce_instruction(assignment_list_t &alist, - unsigned short int i, const G_t &G) { - assignment_list_t::iterator ai, ai_end; - - for (ai = alist.begin(), ai_end = alist.end(); ai != ai_end; ++ai) { - std::set i_variables; - - std::set_intersection(ai->local.begin(), ai->local.end(), - G[i].alive.begin(), G[i].alive.end(), - std::inserter(i_variables, i_variables.end())); - - i_assignment_t ia; - - std::set::const_iterator v, v_end; - for (v = i_variables.begin(), v_end = i_variables.end(); v != v_end; ++v) - if (ai->global[*v] >= 0) - ia.add_var(*v, ai->global[*v]); - - ai->i_assignment = ia; - } -} - -template -static void assignments_introduce_variable(assignment_list_t &alist, - unsigned short int i, short int v, - const G_t &G, const I_t &I) { - assignment_list_t::iterator ai; - bool a_initialized; - assignment a; - size_t c, c_end; - - for (ai = alist.begin(), c = 0, c_end = alist.size(); c < c_end; c++, ai++) { - a_initialized = false; - - for (reg_t r = 0; r < port->num_regs; r++) { - if (!assignment_conflict(*ai, I, v, r)) { - if (!a_initialized) { - a = *ai; - ai->marked = true; - a.marked = false; - a.local.insert(v); - } - a.global[v] = r; - a.i_assignment.add_var(v, r); - if (!assignment_hopeless(a, i, G, I, v)) - alist.push_back(a); - a.i_assignment.remove_var(v); - } - } - } -} - -struct assignment_rep { - assignment_list_t::iterator i; - float s; - - bool operator<(const assignment_rep &a) const { return (s < a.s); } -}; - -template -float compability_cost(const assignment &a, const assignment &ac, - const I_t &I) { - float c = 0.0f; - - varset_t::const_iterator vi, vi_end; - - for (vi = ac.local.begin(), vi_end = ac.local.end(); vi != vi_end; ++vi) { - const var_t v = *vi; - if (a.global[v] != ac.global[v]) { - c += 1000.0f; - continue; - } -#if 0 // This improves the quality of assignments, but it has a big runtime - // overhead for some cases. - adjacency_iter_t j, j_end; - for (boost::tie(j, j_end) = adjacent_vertices(v, I); j != j_end; ++j) - if(ac.global[v] != -1 && a.global[*j] == ac.global[v]) - { - c += 1000.0f; - break; - } -#endif - } - - return (c); -} - -// Ensure that we never get more than options.max_allocs_per_node assignments at -// a single node of the tree decomposition. Tries to drop the worst ones first -// (but never drop the empty assignment, as it's the only one guaranteed to be -// always valid). -template -static void drop_worst_assignments(assignment_list_t &alist, - unsigned short int i, const G_t &G, - const I_t &I, const assignment &ac, - bool *const assignment_optimal) { - unsigned int n; - size_t alist_size; - assignment_list_t::iterator ai, an; - - if ((alist_size = alist.size()) * port->num_regs <= - static_cast(options.max_allocs_per_node) || - alist_size <= 1) - return; - - *assignment_optimal = false; - -#ifdef DEBUG_RALLOC_DEC - std::cout << "Too many assignments here (" << i << "):" << alist_size << " > " - << options.max_allocs_per_node / port->num_regs - << ". Dropping some.\n"; - std::cout.flush(); -#endif - - assignment_rep *arep = new assignment_rep[alist_size]; - - for (n = 0, ai = alist.begin(); n < alist_size; ++ai, n++) { - arep[n].i = ai; - arep[n].s = ai->s + rough_cost_estimate(*ai, i, G, I) + - compability_cost(*ai, ac, I); - } - - std::nth_element(arep + 1, - arep + options.max_allocs_per_node / port->num_regs, - arep + alist_size); - - // std::cout << "nth elem. est. cost: " << arep[options.max_allocs_per_node / - // port->num_regs].s << "\n"; std::cout.flush(); - - for (n = options.max_allocs_per_node / port->num_regs + 1; n < alist_size; - n++) - alist.erase(arep[n].i); - - delete[] arep; -} - -// Handle Leaf nodes in the nice tree decomposition -template -static void -tree_dec_ralloc_leaf(T_t &T, - typename boost::graph_traits::vertex_descriptor t, - const G_t &G, const I_t &I) { -#ifdef DEBUG_RALLOC_DEC - std::cout << "Leaf (" << t << "):\n"; - std::cout.flush(); -#endif - - assignment a; - assignment_list_t &alist = T[t].assignments; - - a.s = 0; - a.global.resize(boost::num_vertices(I), -1); - alist.push_back(a); - -#ifdef DEBUG_RALLOC_DEC_ASS - assignment_list_t::iterator ai; - for (ai = alist.begin(); ai != alist.end(); ++ai) { - print_assignment(*ai); - std::cout << "\n"; - } - assignment best; - get_best_local_assignment(best, t, T); - std::cout << "Best: "; - print_assignment(best); - std::cout << "\n"; -#endif -} - -// Handle introduce nodes in the nice tree decomposition -template -static void tree_dec_ralloc_introduce( - T_t &T, typename boost::graph_traits::vertex_descriptor t, - const G_t &G, const I_t &I, const assignment &ac, - bool *const assignment_optimal) { - typedef - typename boost::graph_traits::adjacency_iterator adjacency_iter_t; - adjacency_iter_t c, c_end; - assignment_list_t::iterator ai; - boost::tie(c, c_end) = adjacent_vertices(t, T); - -#ifdef DEBUG_RALLOC_DEC - std::cout << "Introduce (" << t << "):\n"; - std::cout.flush(); - std::cout << "ac: "; - print_assignment(ac); - std::cout << "\n"; -#endif - - assignment_list_t &alist = T[t].assignments; - - std::swap(alist, T[*c].assignments); - - std::set new_vars; - std::set_difference(T[t].alive.begin(), T[t].alive.end(), T[*c].alive.begin(), - T[*c].alive.end(), - std::inserter(new_vars, new_vars.end())); - - std::set new_inst; - std::set_difference(T[t].bag.begin(), T[t].bag.end(), T[*c].bag.begin(), - T[*c].bag.end(), std::inserter(new_inst, new_inst.end())); - unsigned short int i = *(new_inst.begin()); - - // Extend to new instruction. - assignments_introduce_instruction(alist, i, G); - - std::set::const_iterator v; - for (v = new_vars.begin(); v != new_vars.end(); ++v) { - drop_worst_assignments(alist, i, G, I, ac, assignment_optimal); - assignments_introduce_variable(alist, i, *v, G, I); - } - - // Summation of costs and early removal of assignments. - for (ai = alist.begin(); ai != alist.end();) { - if ((ai->s += (ai->i_costs[i] = instruction_cost(*ai, i, G, I))) == - std::numeric_limits::infinity()) - ai = alist.erase(ai); - else - ++ai; - } - - // Free memory in the std::set > that live - // in the assignments in the list. - // boost::singleton_pool::release_memory(); - -#ifdef DEBUG_RALLOC_DEC_ASS - for (ai = alist.begin(); ai != alist.end(); ++ai) { - print_assignment(*ai); - std::cout << "\n"; - } - - assignment best; - get_best_local_assignment(best, t, T); - std::cout << "Best: "; - print_assignment(best); - std::cout << "\n"; -#endif -} - -static bool assignments_locally_same(const assignment &a1, - const assignment &a2) { - if (a1.local != a2.local) - return (false); - - varset_t::const_iterator i, i_end; - for (i = a1.local.begin(), i_end = a1.local.end(); i != i_end; ++i) - if (a1.global[*i] != a2.global[*i]) - return (false); - - return (true); -} - -// Handle forget nodes in the nice tree decomposition -template -static void -tree_dec_ralloc_forget(T_t &T, - typename boost::graph_traits::vertex_descriptor t, - const G_t &G, const I_t &I) { - typedef - typename boost::graph_traits::adjacency_iterator adjacency_iter_t; - adjacency_iter_t c, c_end; - boost::tie(c, c_end) = adjacent_vertices(t, T); - -#ifdef DEBUG_RALLOC_DEC - std::cout << "Forget (" << t << "):\n"; - std::cout.flush(); -#endif - - assignment_list_t &alist = T[t].assignments; - - std::swap(alist, T[*c].assignments); - - std::set old_inst; - std::set_difference(T[*c].bag.begin(), T[*c].bag.end(), T[t].bag.begin(), - T[t].bag.end(), std::inserter(old_inst, old_inst.end())); - unsigned short int i = *(old_inst.begin()); - - std::set old_vars; - std::set_difference(T[*c].alive.begin(), T[*c].alive.end(), - T[t].alive.begin(), T[t].alive.end(), - std::inserter(old_vars, old_vars.end())); - - assignment_list_t::iterator ai, aif; - - // Restrict assignments (locally) to current variables. - for (ai = alist.begin(); ai != alist.end(); ++ai) { - // Erasing by iterators doesn't work with B-Trees, and erasing by value - // invalidates iterators. - std::set::const_iterator oi, oi_end; - for (oi = old_vars.begin(), oi_end = old_vars.end(); oi != oi_end; ++oi) - ai->local.erase(*oi); - - ai->i_costs.erase(i); - } - - alist.sort(); - // std::sort(alist.begin(), alist.end()); - - // Collapse (locally) identical assignments. - for (ai = alist.begin(); ai != alist.end();) { - aif = ai; - - for (++ai; ai != alist.end() && assignments_locally_same(*aif, *ai);) { - if (aif->s > ai->s) { - alist.erase(aif); - aif = ai; - ++ai; - } else { - alist.erase(ai); - ai = aif; - ++ai; - } - } - } - - // Free memory in the std::set > that live - // in the assignments in the list. - // boost::singleton_pool::release_memory(); - -#ifdef DEBUG_RALLOC_DEC - std::cout << "Remaining assignments: " << alist.size() << "\n"; - std::cout.flush(); -#endif - -#ifdef DEBUG_RALLOC_DEC_ASS - for (ai = alist.begin(); ai != alist.end(); ++ai) { - print_assignment(*ai); - std::cout << "\n"; - } - - assignment best; - get_best_local_assignment(best, t, T); - std::cout << "Best: "; - print_assignment(best); - std::cout << "\n"; -#endif -} - -// Handle join nodes in the nice tree decomposition -template -static void -tree_dec_ralloc_join(T_t &T, - typename boost::graph_traits::vertex_descriptor t, - const G_t &G, const I_t &I) { - typedef - typename boost::graph_traits::adjacency_iterator adjacency_iter_t; - adjacency_iter_t c, c_end, c2, c3; - boost::tie(c, c_end) = adjacent_vertices(t, T); - -#ifdef DEBUG_RALLOC_DEC - std::cout << "Join (" << t << "):\n"; - std::cout.flush(); -#endif - - c2 = c; - ++c; - c3 = c; - - assignment_list_t &alist1 = T[t].assignments; - assignment_list_t &alist2 = T[*c2].assignments; - assignment_list_t &alist3 = T[*c3].assignments; - - alist2.sort(); - // std::sort(alist2.begin(), alist2.end()); - alist3.sort(); - // std::sort(alist3.begin(), alist3.end()); - - assignment_list_t::iterator ai2, ai3; - for (ai2 = alist2.begin(), ai3 = alist3.begin(); - ai2 != alist2.end() && ai3 != alist3.end();) { - if (assignments_locally_same(*ai2, *ai3)) { - ai2->s += ai3->s; - // Avoid double-counting instruction costs. - std::set::iterator bi; - for (bi = T[t].bag.begin(); bi != T[t].bag.end(); ++bi) - ai2->s -= ai2->i_costs[*bi]; - for (size_t i = 0; i < ai2->global.size(); i++) - ai2->global[i] = - ((ai2->global[i] != -1) ? ai2->global[i] : ai3->global[i]); - alist1.push_back(*ai2); - ++ai2; - ++ai3; - } else if (*ai2 < *ai3) { - ++ai2; - continue; - } else if (*ai3 < *ai2) { - ++ai3; - continue; - } - } - - alist2.clear(); - alist3.clear(); - -#ifdef DEBUG_RALLOC_DEC - std::cout << "Remaining assignments: " << alist1.size() << "\n"; - std::cout.flush(); -#endif - -#ifdef DEBUG_RALLOC_DEC_ASS - std::list::iterator ai; - for (ai = alist1.begin(); ai != alist1.end(); ++ai) { - print_assignment(*ai); - std::cout << "\n"; - } -#endif -} - -template -void get_best_local_assignment( - assignment &a, typename boost::graph_traits::vertex_descriptor t, - const T_t &T) { - const assignment_list_t &alist = T[t].assignments; - - assignment_list_t::const_iterator ai, ai_end, ai_best; - for (ai = ai_best = alist.begin(), ai_end = alist.end(); ai != ai_end; ++ai) - if (ai->s < ai_best->s) - ai_best = ai; - - a = *ai_best; -} - -// Handle nodes in the tree decomposition, by detecting their type and calling -// the appropriate function. Recurses. -template -static void -tree_dec_ralloc_nodes(T_t &T, - typename boost::graph_traits::vertex_descriptor t, - const G_t &G, const I_t &I, const assignment &ac, - bool *const assignment_optimal) { - typedef - typename boost::graph_traits::adjacency_iterator adjacency_iter_t; - - adjacency_iter_t c, c_end; - typename boost::graph_traits::vertex_descriptor c0, c1; - - boost::tie(c, c_end) = adjacent_vertices(t, T); - - switch (out_degree(t, T)) { - case 0: - tree_dec_ralloc_leaf(T, t, G, I); - break; - case 1: - c0 = *c; - tree_dec_ralloc_nodes(T, c0, G, I, ac, assignment_optimal); - T[c0].bag.size() < T[t].bag.size() - ? tree_dec_ralloc_introduce(T, t, G, I, ac, assignment_optimal) - : tree_dec_ralloc_forget(T, t, G, I); - break; - case 2: - c0 = *c++; - c1 = *c; - - if (T[c0].weight < T[c1].weight) // Minimize memory consumption. - { - /*std::swap (c0, c1)*/ /* Causes code size regressions - don't know why. - */ - } - - tree_dec_ralloc_nodes(T, c0, G, I, ac, assignment_optimal); - { - assignment *ac2 = new assignment; - get_best_local_assignment_biased(*ac2, c0, T); - tree_dec_ralloc_nodes(T, c1, G, I, *ac2, assignment_optimal); - delete ac2; - } - tree_dec_ralloc_join(T, t, G, I); - break; - default: - std::cerr << "Not nice.\n"; - break; - } -} - -// Find the best root selecting from t_old and the leafs under t. -template -static std::pair::vertex_descriptor, size_t> -find_best_root(const T_t &T, - typename boost::graph_traits::vertex_descriptor t, - size_t t_s, - typename boost::graph_traits::vertex_descriptor t_old, - size_t t_old_s) { - typedef - typename boost::graph_traits::adjacency_iterator adjacency_iter_t; - adjacency_iter_t c, c_end; - typename boost::graph_traits::vertex_descriptor c0, c1, t0; - size_t t0_s; - - boost::tie(c, c_end) = adjacent_vertices(t, T); - - switch (out_degree(t, T)) { - case 0: - return ( - t_s > t_old_s - ? std::pair::vertex_descriptor, - size_t>(t, t_s) - : std::pair::vertex_descriptor, - size_t>(t_old, t_old_s)); - case 1: - return (find_best_root(T, *c, T[*c].alive.size() ? T[*c].alive.size() : t_s, - t_old, t_old_s)); - case 2: - c0 = *c++; - c1 = *c; - boost::tie(t0, t0_s) = find_best_root( - T, c0, T[c0].alive.size() ? T[c0].alive.size() : t_s, t_old, t_old_s); - return (find_best_root(T, c1, T[c1].alive.size() ? T[c1].alive.size() : t_s, - t0_s > t_old_s ? t0 : t_old, - t0_s > t_old_s ? t0_s : t_old_s)); - break; - default: - std::cerr << "Not nice.\n"; - break; - } - - return ( - std::pair::vertex_descriptor, size_t>( - t_old, t_old_s)); -} - -// Change the root to t. -template -static void re_root(T_t &T, - typename boost::graph_traits::vertex_descriptor t) { - typename boost::graph_traits::vertex_descriptor s0, s1, s2; - typename boost::graph_traits::in_edge_iterator e, e_end; - - boost::tie(e, e_end) = boost::in_edges(t, T); - if (e == e_end) - return; - - s0 = t; - s1 = boost::source(*e, T); - - for (boost::tie(e, e_end) = boost::in_edges(s1, T); e != e_end; - boost::tie(e, e_end) = boost::in_edges(s1, T)) { - s2 = boost::source(*e, T); - boost::remove_edge(s1, s0, T); - boost::add_edge(s0, s1, T); - s0 = s1; - s1 = s2; - } - boost::remove_edge(s1, s0, T); - boost::add_edge(s0, s1, T); -} - -// Change the root to improve the assignment removal heuristic. -template static void good_re_root(T_t &T) { - typename boost::graph_traits::vertex_descriptor t; - - typedef - typename boost::graph_traits::adjacency_iterator adjacency_iter_t; - adjacency_iter_t c, c_end; - - t = find_root(T); - - for (boost::tie(c, c_end) = boost::adjacent_vertices(t, T); - c != c_end && !T[*c].alive.size();) - boost::tie(c, c_end) = boost::adjacent_vertices(*c, T); - - size_t t_s = (c != c_end ? T[*c].alive.size() : 0); - t = find_best_root(T, t, t_s, t, t_s).first; - - if (T[t].alive.size()) { - std::cerr << "Error: Invalid root.\n"; - return; - } - - re_root(T, t); -} - -// Dump conflict graph, with numbered nodes, show live variables at each node. -static void dump_con(const con_t &con) { - std::ofstream dump_file( - (std::string(dstFileName) + ".dumpcon" + currFunc->rname + ".dot") - .c_str()); - - std::string *name = new std::string[num_vertices(con)]; - for (var_t i = 0; static_cast::vertices_size_type>( - i) < boost::num_vertices(con); - i++) { - std::ostringstream os; - os << i; - if (con[i].name) - os << " : " << con[i].name << ":" << con[i].byte; - name[i] = os.str(); - } - boost::write_graphviz(dump_file, con, boost::make_label_writer(name)); - delete[] name; -} - -// Dump cfg, with numbered nodes, show live variables at each node. -static void dump_cfg(const cfg_t &cfg) { - std::ofstream dump_file( - (std::string(dstFileName) + ".dumpcfg" + currFunc->rname + ".dot") - .c_str()); - - std::string *name = new std::string[num_vertices(cfg)]; - for (unsigned int i = 0; i < boost::num_vertices(cfg); i++) { - std::ostringstream os; - os << i << ", " << cfg[i].ic->key << ": "; - std::set::const_iterator v; - for (v = cfg[i].alive.begin(); v != cfg[i].alive.end(); ++v) - os << *v << " "; - name[i] = os.str(); - } - boost::write_graphviz(dump_file, cfg, boost::make_label_writer(name)); - delete[] name; -} - -// Dump tree decomposition, show bag and live variables at each node. -static void dump_tree_decomposition(const tree_dec_t &tree_dec) { - std::ofstream dump_file( - (std::string(dstFileName) + ".dumpdec" + currFunc->rname + ".dot") - .c_str()); - - unsigned int w = 0; - - std::string *name = new std::string[num_vertices(tree_dec)]; - for (unsigned int i = 0; i < boost::num_vertices(tree_dec); i++) { - if (tree_dec[i].bag.size() > w) - w = tree_dec[i].bag.size(); - std::ostringstream os; - std::set::const_iterator v1; - os << i << " | "; - for (v1 = tree_dec[i].bag.begin(); v1 != tree_dec[i].bag.end(); ++v1) - os << *v1 << " "; - os << ": "; - std::set::const_iterator v2; - for (v2 = tree_dec[i].alive.begin(); v2 != tree_dec[i].alive.end(); ++v2) - os << *v2 << " "; - name[i] = os.str(); - } - boost::write_graphviz(dump_file, tree_dec, boost::make_label_writer(name)); - delete[] name; - -#ifdef D_RALLOC_DEC - std::cout << "Width: " << (w - 1) << "(" << currFunc->name << ")\n"; -#endif -} - -#endif diff --git a/src/SDCCtree_dec.c b/src/SDCCtree_dec.c new file mode 100644 index 000000000..70407a273 --- /dev/null +++ b/src/SDCCtree_dec.c @@ -0,0 +1,577 @@ +/* Tree decomposition — C port of SDCCtree_dec.hpp. + * + * See the original header for references. The algorithms are Thorup's + * heuristic elimination-ordering tree decomposition (D and E), plus the + * three nicify passes that produce a nice tree decomposition. */ + +#include "SDCCtree_dec.h" + +#include +#include +#include + +/* ---------- tree_dec lifecycle ---------- */ + +void tree_dec_init(tree_dec_t *t) { + cg_init(&t->g, CG_BIDIRECTIONAL, 0); + t->bag = NULL; + t->weight = NULL; + t->cap = 0; +} + +void tree_dec_free(tree_dec_t *t) { + for (size_t v = 0; v < t->g.nvertices; v++) uiset_free(&t->bag[v]); + free(t->bag); + free(t->weight); + cg_free(&t->g); + t->bag = NULL; + t->weight = NULL; + t->cap = 0; +} + +static void tree_dec_grow(tree_dec_t *t) { + if (t->g.nvertices < t->cap) return; + size_t nc = t->cap ? t->cap * 2 : 8; + t->bag = (uiset_t *)realloc(t->bag, nc * sizeof(uiset_t)); + t->weight = (unsigned *)realloc(t->weight, nc * sizeof(unsigned)); + t->cap = nc; +} + +unsigned int tree_dec_add_vertex(tree_dec_t *t) { + tree_dec_grow(t); + unsigned int v = cg_add_vertex(&t->g); + uiset_init(&t->bag[v]); + t->weight[v] = 0; + return v; +} + +/* ---------- multimap helper ---------- */ + +typedef struct { + unsigned int k, v; +} mm_pair_t; + +typedef struct { + mm_pair_t *items; + size_t n, cap; + int sorted; +} uu_mmap_t; + +static void mm_init(uu_mmap_t *m) { + m->items = NULL; m->n = 0; m->cap = 0; m->sorted = 1; +} + +static void mm_free(uu_mmap_t *m) { + free(m->items); m->items = NULL; m->n = 0; m->cap = 0; +} + +static void mm_insert(uu_mmap_t *m, unsigned int k, unsigned int v) { + if (m->n == m->cap) { + m->cap = m->cap ? m->cap * 2 : 8; + m->items = (mm_pair_t *)realloc(m->items, m->cap * sizeof(mm_pair_t)); + } + m->items[m->n].k = k; m->items[m->n].v = v; m->n++; + m->sorted = 0; +} + +static int mm_cmp(const void *a, const void *b) { + const mm_pair_t *x = (const mm_pair_t *)a; + const mm_pair_t *y = (const mm_pair_t *)b; + if (x->k != y->k) return x->k < y->k ? -1 : 1; + if (x->v != y->v) return x->v < y->v ? -1 : 1; + return 0; +} + +static void mm_sort(uu_mmap_t *m) { + if (m->sorted) return; + qsort(m->items, m->n, sizeof(mm_pair_t), mm_cmp); + m->sorted = 1; +} + +/* Returns the start index of elements with key k; stores end in *end_out. */ +static size_t mm_equal_range(uu_mmap_t *m, unsigned int k, size_t *end_out) { + mm_sort(m); + size_t lo = 0, hi = m->n; + while (lo < hi) { + size_t mid = lo + ((hi - lo) >> 1); + if (m->items[mid].k < k) lo = mid + 1; else hi = mid; + } + size_t start = lo; + hi = m->n; + while (lo < hi) { + size_t mid = lo + ((hi - lo) >> 1); + if (m->items[mid].k <= k) lo = mid + 1; else hi = mid; + } + *end_out = lo; + return start; +} + +/* ---------- ordered list helper (thorup ordering) ---------- + * Used as elimination ordering and then iterated in reverse. Array is + * sufficient. */ +typedef struct { + unsigned int *items; + size_t n, cap; +} ui_list_t; + +static void ui_list_init(ui_list_t *l) { l->items = NULL; l->n = 0; l->cap = 0; } +static void ui_list_free(ui_list_t *l) { free(l->items); l->items = NULL; l->n = 0; l->cap = 0; } +static void ui_list_clear(ui_list_t *l) { l->n = 0; } +static void ui_list_push(ui_list_t *l, unsigned int v) { + if (l->n == l->cap) { + l->cap = l->cap ? l->cap * 2 : 8; + l->items = (unsigned int *)realloc(l->items, l->cap * sizeof(unsigned int)); + } + l->items[l->n++] = v; +} + +/* ---------- Thorup D ---------- */ +/* See SDCCtree_dec.hpp for the algorithm. */ +static void thorup_D(ui_list_t *l, + uu_mmap_t *MJ, + uu_mmap_t *MS, + unsigned int n) { + /* m: unsigned int -> unsigned int, sorted by key */ + typedef struct { unsigned int k, v; } kv_t; + kv_t *m = (kv_t *)malloc(n * sizeof(kv_t)); + char *has = (char *)calloc(n, 1); + unsigned int *slot = (unsigned int *)malloc(n * sizeof(unsigned int)); + size_t m_n = 0; + unsigned int i = 0; + + ui_list_clear(l); + + for (unsigned int j = n; j > 0;) { + j--; + if (!has[j]) { + has[j] = 1; + slot[j] = i; + m[m_n].k = j; m[m_n].v = i; m_n++; + i++; + } + + size_t e; + size_t a = mm_equal_range(MS, j, &e); + for (size_t k = a; k < e; k++) { + unsigned int kv = MS->items[k].v; + if (!has[kv]) { + has[kv] = 1; + slot[kv] = i; + m[m_n].k = kv; m[m_n].v = i; m_n++; + i++; + } + } + + a = mm_equal_range(MJ, j, &e); + for (size_t k = a; k < e; k++) { + unsigned int kv = MJ->items[k].v; + if (!has[kv]) { + has[kv] = 1; + slot[kv] = i; + m[m_n].k = kv; m[m_n].v = i; m_n++; + i++; + } + } + } + + /* build v[i] = key by inverting m: we have slot[k] = i */ + unsigned int *inv = (unsigned int *)malloc(n * sizeof(unsigned int)); + for (size_t mi = 0; mi < m_n; mi++) inv[m[mi].v] = m[mi].k; + + for (unsigned int k = 0; k < n; k++) ui_list_push(l, inv[k]); + + free(m); free(has); free(slot); free(inv); +} + +/* ---------- Thorup E ---------- */ +/* Build M from graph I. We iterate i = 0..n-1; j = max(i, max neighbour + * index). Use a stack of (first, second) pairs. */ +typedef struct { int first; unsigned int second; } e_pair_t; + +static void thorup_E(uu_mmap_t *M, const cgraph_t *I) { + size_t n = cg_num_vertices(I); + e_pair_t *stk = (e_pair_t *)malloc((n + 2) * sizeof(e_pair_t)); + size_t sp = 0; + + /* M.clear() is implied by caller; we just append. */ + stk[sp].first = -1; + stk[sp].second = (unsigned int)n; + sp++; + + for (unsigned int i = 0; i < n; i++) { + unsigned int j = i; + for (size_t k = 0; k < I->out[i].n; k++) { + unsigned int nb = I->out[i].dst[k]; + if (nb > j) j = nb; + } + if (j == i) continue; + + while (stk[sp - 1].second <= i) { + /* M.insert(pair(second, first)) */ + mm_insert(M, stk[sp - 1].second, (unsigned int)stk[sp - 1].first); + sp--; + } + + unsigned int i2 = i; + while (j >= stk[sp - 1].second && stk[sp - 1].second > i2) { + i2 = (unsigned int)stk[sp - 1].first; + sp--; + } + + stk[sp].first = (int)i2; + stk[sp].second = j; + sp++; + } + + /* Flush stack, except the initial sentinel. */ + while (sp > 1) { + mm_insert(M, stk[sp - 1].second, (unsigned int)stk[sp - 1].first); + sp--; + } + + free(stk); +} + +/* ---------- thorup_elimination_ordering ---------- */ + +static void thorup_elimination_ordering(ui_list_t *l, const cgraph_t *G) { + /* J: directed copy of G with sequential edges (i, i+1) removed. */ + cgraph_t J, S; + cg_init(&J, CG_DIRECTED, 0); + cg_init(&S, CG_UNDIRECTED, 0); + + cg_copy_topology(&J, G, CG_DIRECTED); + size_t nJ = cg_num_vertices(&J); + if (nJ > 0) { + for (unsigned int i = 0; i + 1 < nJ; i++) + cg_remove_edge(&J, i, i + 1); + } + + /* S: undirected copy of J. */ + cg_copy_topology(&S, &J, CG_UNDIRECTED); + + uu_mmap_t MJ, MS; + mm_init(&MJ); + mm_init(&MS); + + thorup_E(&MJ, &J); + thorup_E(&MS, &S); + + thorup_D(l, &MJ, &MS, (unsigned int)nJ); + + mm_free(&MJ); + mm_free(&MS); + cg_free(&J); + cg_free(&S); +} + +/* ---------- find_bag ---------- */ +/* Linear scan over tree vertices for the LAST (newest) bag that includes + * all elements of X. Returns (size_t)-1 if not found. */ +static size_t find_bag_index(const uiset_t *X, const tree_dec_t *T) { + size_t found = (size_t)-1; + for (size_t t = 0; t < T->g.nvertices; t++) { + if (uiset_includes(&T->bag[t], X)) found = t; + } + if (found == (size_t)-1) { + fprintf(stderr, "find_bag() failed.\n"); + fflush(stderr); + } + return found; +} + +/* ---------- make_clique ---------- */ +static void make_clique(const uiset_t *X, cgraph_t *G) { + for (size_t i = 0; i < X->n; i++) + for (size_t j = i + 1; j < X->n; j++) + cg_add_edge(G, X->items[i], X->items[j], 0.0f); +} + +/* ---------- add_vertices_to_tree_decomposition (iterative) ---------- * + * The original is tail-recursive over the elimination ordering. We flatten it + * to an iterative loop to avoid blowing the C stack on long elim orderings. + */ +static void add_vertices_to_tree_decomposition(tree_dec_t *T, + const unsigned int *order, + size_t n_order, + cgraph_t *G, + char *active) { + /* Recursion unrolled: we first recurse on the tail with active[v]=false, + * THEN attach the current bag. To flatten, we must simulate the same + * order: emit the deepest recursion's base case (empty bag) first, + * then walk back up attaching bags. + * + * Process order[n_order-1], order[n_order-2], ...: for each v we compute + * its neighbours (in the current, reduced G), mark v inactive, then add + * its clique edges. At the BASE of recursion we emit a vertex. Then, on + * unwinding, we attach (neighbours ∪ {v}) as a new bag connected to the + * bag found via find_bag(neighbours). + * + * Stack per step: stored (v, neighbours) from forward pass, replayed + * backwards on unwind. */ + + if (n_order == 0) { + tree_dec_add_vertex(T); /* base case: empty bag */ + return; + } + + /* Forward pass: collect per-step neighbour sets and mark-inactive ops. */ + uiset_t *nbrs = (uiset_t *)malloc(n_order * sizeof(uiset_t)); + unsigned int *vs = (unsigned int *)malloc(n_order * sizeof(unsigned int)); + + for (size_t step = 0; step < n_order; step++) { + /* Original recursion: v = order[step], then recurse on step+1. + * active[neighbour] guarded; compute neighbours of v. */ + unsigned int v = order[step]; + vs[step] = v; + uiset_init(&nbrs[step]); + for (size_t k = 0; k < G->out[v].n; k++) { + unsigned int nb = G->out[v].dst[k]; + if (active[nb]) uiset_insert(&nbrs[step], nb); + } + active[v] = 0; + make_clique(&nbrs[step], G); + } + + /* Base case of deepest recursion. */ + tree_dec_add_vertex(T); + + /* Unwind: attach bags from innermost to outermost (reverse of step). */ + for (size_t si = n_order; si > 0; si--) { + size_t step = si - 1; + size_t t = find_bag_index(&nbrs[step], T); + unsigned int s = tree_dec_add_vertex(T); + if (t != (size_t)-1) cg_add_edge(&T->g, (unsigned int)t, s, 0.0f); + /* Bag of s = neighbours ∪ {v}. */ + uiset_copy(&T->bag[s], &nbrs[step]); + uiset_insert(&T->bag[s], vs[step]); + } + + for (size_t step = 0; step < n_order; step++) uiset_free(&nbrs[step]); + free(nbrs); + free(vs); +} + +/* ---------- tree_decomposition_from_elimination_ordering ---------- */ +static void tree_decomposition_from_elimination_ordering(tree_dec_t *T, + const ui_list_t *l, + const cgraph_t *G) { + /* Build an undirected symmetrization of G. */ + cgraph_t G_sym; + cg_init(&G_sym, CG_UNDIRECTED, 0); + cg_copy_topology(&G_sym, G, CG_UNDIRECTED); + + size_t n = cg_num_vertices(G); + char *active = (char *)calloc(n, 1); + for (size_t i = 0; i < n; i++) active[i] = 1; + + /* The original iterates the list in REVERSE. */ + unsigned int *order = (unsigned int *)malloc(l->n * sizeof(unsigned int)); + for (size_t i = 0; i < l->n; i++) order[i] = l->items[l->n - 1 - i]; + + add_vertices_to_tree_decomposition(T, order, l->n, &G_sym, active); + + free(order); + free(active); + cg_free(&G_sym); +} + +/* ---------- thorup_tree_decomposition ---------- */ + +void tree_dec_thorup(tree_dec_t *out, const cgraph_t *cfg) { + ui_list_t order; + ui_list_init(&order); + thorup_elimination_ordering(&order, cfg); + tree_decomposition_from_elimination_ordering(out, &order, cfg); + ui_list_free(&order); +} + +/* ---------- find_root ---------- */ +unsigned int tree_dec_find_root(const tree_dec_t *t) { + unsigned int v = 0; + /* Walk up via in-edges until we find a source. */ + while (t->g.in[v].n > 0) { + v = t->g.in[v].dst[0]; + } + return v; +} + +/* ---------- nicify_joins ---------- * + * Each join node (two children) must share the bag of its children. Each + * single-child or leaf node is recursed into. If a node has >2 children, + * we introduce a shim node between it and two of its children and recurse. + * + * The recursion is at most O(nvertices) deep in the worst case — matches + * the original which is also recursive. */ +static void nicify_joins(tree_dec_t *T, unsigned int t) { + for (;;) { + size_t od = cg_out_degree(&T->g, t); + if (od == 0) return; + if (od == 1) { + unsigned int c = T->g.out[t].dst[0]; + t = c; + continue; + } + if (od == 2) { + unsigned int c0 = T->g.out[t].dst[0]; + unsigned int c1 = T->g.out[t].dst[1]; + nicify_joins(T, c0); + if (!uiset_equal(&T->bag[t], &T->bag[c0])) { + unsigned int d = tree_dec_add_vertex(T); + uiset_copy(&T->bag[d], &T->bag[t]); + /* Re-resolve c0 and c1 — add_vertex may have grown arrays + * but cg edges haven't moved. We stored them before. */ + cg_add_edge(&T->g, d, c0, 0.0f); + cg_remove_edge(&T->g, t, c0); + cg_add_edge(&T->g, t, d, 0.0f); + } + nicify_joins(T, c1); + if (!uiset_equal(&T->bag[t], &T->bag[c1])) { + unsigned int d = tree_dec_add_vertex(T); + uiset_copy(&T->bag[d], &T->bag[t]); + cg_add_edge(&T->g, d, c1, 0.0f); + cg_remove_edge(&T->g, t, c1); + cg_add_edge(&T->g, t, d, 0.0f); + } + return; + } + /* od >= 3: introduce a shim combining two children, then retry. */ + unsigned int c0 = T->g.out[t].dst[0]; + unsigned int c1 = T->g.out[t].dst[1]; + unsigned int d = tree_dec_add_vertex(T); + uiset_copy(&T->bag[d], &T->bag[t]); + cg_add_edge(&T->g, d, c0, 0.0f); + cg_add_edge(&T->g, d, c1, 0.0f); + cg_remove_edge(&T->g, t, c0); + cg_remove_edge(&T->g, t, c1); + cg_add_edge(&T->g, t, d, 0.0f); + /* loop — same t */ + } +} + +/* ---------- nicify_diffs ---------- * + * Each non-leaf node with a single child must either contain the child's + * bag or be contained in it. If not, insert a shim with the intersection. */ +static void nicify_diffs(tree_dec_t *T, unsigned int t) { + size_t od = cg_out_degree(&T->g, t); + if (od == 0) { + if (T->bag[t].n > 0) { + unsigned int d = tree_dec_add_vertex(T); + cg_add_edge(&T->g, t, d, 0.0f); + /* bag[d] is empty already */ + } + return; + } + if (od == 2) { + unsigned int c0 = T->g.out[t].dst[0]; + unsigned int c1 = T->g.out[t].dst[1]; + nicify_diffs(T, c0); + nicify_diffs(T, c1); + return; + } + if (od != 1) { + fprintf(stderr, "nicify_diffs error.\n"); + return; + } + + unsigned int c0 = T->g.out[t].dst[0]; + nicify_diffs(T, c0); + + if (uiset_includes(&T->bag[t], &T->bag[c0]) || + uiset_includes(&T->bag[c0], &T->bag[t])) + return; + + unsigned int d = tree_dec_add_vertex(T); + /* Edges first — we already have c0 captured. */ + cg_add_edge(&T->g, d, c0, 0.0f); + cg_remove_edge(&T->g, t, c0); + uiset_intersection(&T->bag[t], &T->bag[c0], &T->bag[d]); + cg_add_edge(&T->g, t, d, 0.0f); +} + +/* ---------- nicify_diffs_more ---------- * + * Ensure consecutive bags differ by at most one element. Also assigns + * weight[v] for each node. */ +static void nicify_diffs_more(tree_dec_t *T, unsigned int t) { + for (;;) { + size_t od = cg_out_degree(&T->g, t); + if (od == 0) { + if (T->bag[t].n > 1) { + unsigned int d = tree_dec_add_vertex(T); + uiset_copy(&T->bag[d], &T->bag[t]); + /* erase smallest (equivalent to erasing begin()). */ + memmove(&T->bag[d].items[0], &T->bag[d].items[1], + (T->bag[d].n - 1) * sizeof(unsigned int)); + T->bag[d].n--; + T->weight[d] = 0; + cg_add_edge(&T->g, t, d, 0.0f); + /* retry with same t */ + continue; + } + T->weight[t] = 0; + return; + } + if (od == 2) { + unsigned int c0 = T->g.out[t].dst[0]; + unsigned int c1 = T->g.out[t].dst[1]; + nicify_diffs_more(T, c0); + nicify_diffs_more(T, c1); + unsigned wmin = T->weight[c0] < T->weight[c1] ? T->weight[c0] : T->weight[c1]; + T->weight[t] = wmin + 1; + return; + } + if (od != 1) { + fprintf(stderr, "nicify_diffs_more error.\n"); + return; + } + + unsigned int c0 = T->g.out[t].dst[0]; + size_t ts = T->bag[t].n; + size_t c0s = T->bag[c0].n; + + if (ts <= c0s + 1 && ts + 1 >= c0s) { + nicify_diffs_more(T, c0); + T->weight[t] = T->weight[c0]; + return; + } + + unsigned int d = tree_dec_add_vertex(T); + cg_add_edge(&T->g, d, c0, 0.0f); + cg_remove_edge(&T->g, t, c0); + + /* Copy larger bag into d, then erase the first element not in the + * smaller bag. */ + const uiset_t *bigger = ts > c0s ? &T->bag[t] : &T->bag[c0]; + const uiset_t *smaller = ts < c0s ? &T->bag[t] : &T->bag[c0]; + uiset_copy(&T->bag[d], bigger); + + size_t i; + for (i = 0; i < T->bag[d].n; i++) { + if (!uiset_contains(smaller, T->bag[d].items[i])) break; + } + /* erase element at index i */ + if (i < T->bag[d].n) { + memmove(&T->bag[d].items[i], &T->bag[d].items[i + 1], + (T->bag[d].n - i - 1) * sizeof(unsigned int)); + T->bag[d].n--; + } + cg_add_edge(&T->g, t, d, 0.0f); + /* retry with same t */ + } +} + +/* ---------- nicify ---------- */ +void tree_dec_nicify(tree_dec_t *T) { + if (T->g.nvertices == 0) return; + unsigned int t = tree_dec_find_root(T); + + if (T->bag[t].n > 0) { + unsigned int d = t; + t = tree_dec_add_vertex(T); + cg_add_edge(&T->g, t, d, 0.0f); + } + + nicify_joins(T, t); + nicify_diffs(T, t); + nicify_diffs_more(T, t); +} diff --git a/src/SDCCtree_dec.h b/src/SDCCtree_dec.h new file mode 100644 index 000000000..837cc0c40 --- /dev/null +++ b/src/SDCCtree_dec.h @@ -0,0 +1,41 @@ +/* Tree decomposition — C port of SDCCtree_dec.hpp. + * + * A tree decomposition of a graph G is a tree T whose vertices (bags) are + * subsets of V(G) satisfying the tree-decomposition properties. We use the + * Thorup heuristic to build T from an elimination ordering, then "nicify" + * it to produce a nice tree decomposition suitable for dynamic-programming + * register allocation and related optimizations. + * + * See the original header for the algorithms' references (Thorup 1998). + */ +#ifndef KCC_SDCCTREE_DEC_H +#define KCC_SDCCTREE_DEC_H + +#include + +#include "util/cgraph.h" +#include "util/uiset.h" + +typedef struct tree_dec { + cgraph_t g; /* bidirectional tree, no edge weights */ + uiset_t *bag; /* bag[v] — set of input-graph vertex indices */ + unsigned *weight; /* weight[v] — filled by nicify_diffs_more */ + size_t cap; /* capacity of bag[]/weight[] */ +} tree_dec_t; + +void tree_dec_init(tree_dec_t *t); +void tree_dec_free(tree_dec_t *t); +unsigned int tree_dec_add_vertex(tree_dec_t *t); + +/* Build a tree decomposition of cfg using Thorup's heuristic. */ +void tree_dec_thorup(tree_dec_t *out, const cgraph_t *cfg); + +/* Transform T into a nice tree decomposition. T must already be rooted-like: + * our build produces an in-tree with exactly one source per component. */ +void tree_dec_nicify(tree_dec_t *t); + +/* Find the (single) root of the tree T (walks up in_edges until there are + * none). */ +unsigned int tree_dec_find_root(const tree_dec_t *t); + +#endif diff --git a/src/SDCCtree_dec.hpp b/src/SDCCtree_dec.hpp deleted file mode 100644 index ca4274137..000000000 --- a/src/SDCCtree_dec.hpp +++ /dev/null @@ -1,502 +0,0 @@ -// Philipp Klaus Krause, philipp@informatik.uni-frankfurt.de, pkk@spth.de, 2010 -// - 2011 -// -// (c) 2010-2011 Goethe-Universität Frankfurt -// -// This program is free software; you can redistribute it and/or modify it -// under the terms of the GNU General Public License as published by the -// Free Software Foundation; either version 2, or (at your option) any -// later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -// -// -// Some routines for tree-decompositions. -// -// A tree decomposition is a graph that has a set of vertex indices as bundled -// property, e.g.: -// -// struct tree_dec_node -// { -// std::set bag; -// }; -// typedef boost::adjacency_list tree_dec_t; -// -// The following are the routines that are most likely to be interesting for -// outside use: -// -// void nicify(T_t &T) -// Transforms a tree decomposition T into a nice tree decomposition -// -// void thorup_tree_decomposition(T_t &tree_decomposition, const G_t &cfg) -// Creates a tree decomposition T from a graph cfg using Thorup's heuristic. -// -// void tree_decomposition_from_elimination_ordering(T_t &T, std::list& l, const G_t &G) Creates a tree decomposition T of a graph G from an -// elimination ordering l. -// -// void thorup_elimination_ordering(l_t &l, const J_t &J) -// Creates an elimination ordering l of a graph J using Thorup's heuristic. - -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -struct forget_properties { - template void operator()(const T1 &, const T2 &) const {} -}; - -// Thorup algorithm D. -// The use of the multimap makes the complexity of this O(|I|log|I|), which -// could be reduced to O(|I|). -template -void thorup_D(l_t &l, const std::multimap &MJ, - const std::multimap &MS, - const unsigned int n) { - std::map m; - - l.clear(); - - unsigned int i = 0; - for (unsigned int j = n; j > 0;) { - j--; - if (m.find(j) == m.end()) - m[j] = i++; - - std::multimap::const_iterator k, k_end; - - for (boost::tie(k, k_end) = MS.equal_range(j); k != k_end; ++k) - if (m.find(k->second) == m.end()) - m[k->second] = i++; - - for (boost::tie(k, k_end) = MJ.equal_range(j); k != k_end; ++k) - if (m.find(k->second) == m.end()) - m[k->second] = i++; - } - - std::vector v(n); - - std::map::iterator mi; - - for (mi = m.begin(); mi != m.end(); ++mi) - v[mi->second] = mi->first; - - for (i = 0; i < n; i++) - l.push_back(v[i]); -} - -// Thorup algorithm E. -// The use of the multimap makes the complexity of this O(|I|log|I|), which -// could be reduced to O(|I|). -template -void thorup_E(std::multimap &M, const I_t &I) { - typedef - typename boost::graph_traits::adjacency_iterator adjacency_iter_t; - typedef - typename boost::property_map::type index_map; - index_map index = boost::get(boost::vertex_index, I); - - std::stack> s; - - M.clear(); - - s.push(std::pair(-1, boost::num_vertices(I))); - - for (unsigned int i = 0; i < boost::num_vertices(I); i++) { - unsigned int j = i; - adjacency_iter_t j_curr, j_end; - - for (boost::tie(j_curr, j_end) = boost::adjacent_vertices(i, I); - j_curr != j_end; ++j_curr) - if (index[*j_curr] > j) - j = index[*j_curr]; - - if (j == i) - continue; - - while (s.top().second <= i) { - M.insert( - std::pair(s.top().second, s.top().first)); - s.pop(); - } - - unsigned int i2 = i; - while (j >= s.top().second && s.top().second > i2) { - i2 = s.top().first; - s.pop(); - } - - s.push(std::pair(i2, j)); - } - - // Not in Thorup's paper, but without this the algorithm gives incorrect - // results. - while (s.size() > 1) { - M.insert( - std::pair(s.top().second, s.top().first)); - s.pop(); - } -} - -// Heuristically give an elimination ordering for a directed graph. -// For a description of this, including algorithms D and E, see -// Mikkel Thorup, "All Structured Programs have Small Tree-Width and Good -// Register Allocation", Appendix A. The use of the multimap makes the -// complexity of this O(|I|log|I|), could be reduced to O(|I|). -template -void thorup_elimination_ordering(l_t &l, const G_t &G) { - // Should we do this? Or just use G as J? The Thorup paper seems unclear, it - // speaks of statements that contain jumps to other statements, but does it - // count as a jump, when they're just subsequent? - boost::adjacency_list J; - boost::copy_graph( - G, J, - boost::vertex_copy(forget_properties()).edge_copy(forget_properties())); - for (unsigned int i = 0; i < boost::num_vertices(J) - 1; i++) - remove_edge(i, i + 1, J); - - // Todo: Implement a graph adaptor for boost that allows to treat directed - // graphs as undirected graphs. - boost::adjacency_list S; - boost::copy_graph(J, S); - - std::multimap MJ, MS; - - thorup_E(MJ, J); - - thorup_E(MS, S); - - thorup_D(l, MJ, MS, num_vertices(J)); -} - -// Finds a (the newest) bag that contains all vertices in X in the tree -// decomposition T. -template -typename boost::graph_traits::vertex_iterator -find_bag(const std::set &X, const T_t &T) { - typedef typename boost::graph_traits::vertex_iterator T_vertex_iter_t; - typedef typename std::set::const_iterator vertex_index_iter_t; - - T_vertex_iter_t t, t_end, t_found; - vertex_index_iter_t v; - - for (boost::tie(t, t_end) = vertices(T), t_found = t_end; t != t_end; ++t) { - for (v = X.begin(); v != X.end(); ++v) - if (T[*t].bag.find(*v) == T[*t].bag.end()) - break; - - if (v == X.end()) - t_found = t; - } - - if (t_found == t_end) // Todo: Better error handling (throw exception?) - { - std::cerr << "find_bag() failed.\n"; - std::cerr.flush(); - } - - return (t_found); -} - -// Add edges to make the vertices in X a clique in G. -template void make_clique(const std::set &X, G_t &G) { - std::set::const_iterator n1, n2; - for (n1 = X.begin(); n1 != X.end(); n1++) - for (n2 = n1, ++n2; n2 != X.end(); ++n2) - add_edge(*n1, *n2, G); -} - -template -void add_vertices_to_tree_decomposition(T_t &T, const v_t v, const v_t v_end, - G_t &G, std::vector &active) { - // Base case: Empty graph. Create an empty bag. - if (v == v_end) { - boost::add_vertex(T); - return; - } - - // Todo: A more elegant solution, e.g. using subgraphs or filtered graphs. - - typedef - typename boost::graph_traits::adjacency_iterator adjacency_iter_t; - typedef - typename boost::property_map::type index_map; - index_map index = boost::get(boost::vertex_index, G); - - // Get the neigbours - adjacency_iter_t n, n_end; - std::set neighbours; - for (boost::tie(n, n_end) = boost::adjacent_vertices(*v, G); n != n_end; ++n) - if (active[index[*n]]) - neighbours.insert(index[*n]); - - // Recurse - active[*v] = false; - make_clique(neighbours, G); - v_t v_next = v; - add_vertices_to_tree_decomposition(T, ++v_next, v_end, G, active); - - // Add new bag - typename boost::graph_traits::vertex_iterator t; - typename boost::graph_traits::vertex_descriptor s; - t = find_bag(neighbours, T); - s = boost::add_vertex(T); - boost::add_edge(*t, s, T); - T[s].bag = neighbours; - T[s].bag.insert(*v); -} - -// Create a tree decomposition from en elimination ordering. -template -void tree_decomposition_from_elimination_ordering( - T_t &T, const std::list &l, const G_t &G) { - std::list::const_reverse_iterator v, v_end; - v = l.rbegin(), v_end = l.rend(); - - // Todo: Implement a graph adaptor for boost that allows to treat directed - // graphs as undirected graphs. - boost::adjacency_list G_sym; - boost::copy_graph( - G, G_sym, - boost::vertex_copy(forget_properties()).edge_copy(forget_properties())); - - std::vector active(boost::num_vertices(G), true); - - add_vertices_to_tree_decomposition(T, v, v_end, G_sym, active); -} - -template -void thorup_tree_decomposition(T_t &tree_decomposition, const G_t &cfg) { - std::list elimination_ordering; - - thorup_elimination_ordering(elimination_ordering, cfg); - - tree_decomposition_from_elimination_ordering(tree_decomposition, - elimination_ordering, cfg); -} - -// Ensure that all joins are at proper join nodes: Each node that has two -// children has the same bag as its children. Complexity: Linear in the number -// of vertices of T. -template -void nicify_joins(T_t &T, - typename boost::graph_traits::vertex_descriptor t) { - typedef - typename boost::graph_traits::adjacency_iterator adjacency_iter_t; - - adjacency_iter_t c, c_end; - typename boost::graph_traits::vertex_descriptor c0, c1; - - boost::tie(c, c_end) = boost::adjacent_vertices(t, T); - - switch (out_degree(t, T)) { - case 0: - return; - case 1: - nicify_joins(T, *c); - return; - case 2: - break; - default: - c0 = *c++; - c1 = *c; - typename boost::graph_traits::vertex_descriptor d; - d = boost::add_vertex(T); - add_edge(d, c0, T); - add_edge(d, c1, T); - boost::remove_edge(t, c0, T); - boost::remove_edge(t, c1, T); - T[d].bag = T[t].bag; - boost::add_edge(t, d, T); - nicify_joins(T, t); - return; - } - - c0 = *c++; - c1 = *c; - nicify_joins(T, c0); - if (T[t].bag != T[c0].bag) { - typename boost::graph_traits::vertex_descriptor d; - d = boost::add_vertex(T); - boost::add_edge(d, c0, T); - boost::remove_edge(t, c0, T); - T[d].bag = T[t].bag; - boost::add_edge(t, d, T); - } - nicify_joins(T, c1); - if (T[t].bag != T[c1].bag) { - typename boost::graph_traits::vertex_descriptor d = - boost::add_vertex(T); - boost::add_edge(d, c1, T); - boost::remove_edge(t, c1, T); - T[d].bag = T[t].bag; - boost::add_edge(t, d, T); - } -} - -// Ensure that all nodes' bags are either a subset or a superset of their -// successors'. Complexity: Linear in the number of vertices of T. -template -void nicify_diffs(T_t &T, - typename boost::graph_traits::vertex_descriptor t) { - typedef - typename boost::graph_traits::adjacency_iterator adjacency_iter_t; - - adjacency_iter_t c, c_end; - typename boost::graph_traits::vertex_descriptor c0, c1; - - boost::tie(c, c_end) = adjacent_vertices(t, T); - - switch (boost::out_degree(t, T)) { - case 0: - if (T[t].bag.size()) - boost::add_edge(t, boost::add_vertex(T), T); - return; - case 1: - break; - case 2: - c0 = *c++; - c1 = *c; - nicify_diffs(T, c0); - nicify_diffs(T, c1); - return; - default: - std::cerr << "nicify_diffs error.\n"; - return; - } - - c0 = *c; - nicify_diffs(T, c0); - - if (std::includes(T[t].bag.begin(), T[t].bag.end(), T[c0].bag.begin(), - T[c0].bag.end()) || - std::includes(T[c0].bag.begin(), T[c0].bag.end(), T[t].bag.begin(), - T[t].bag.end())) - return; - - typename boost::graph_traits::vertex_descriptor d = boost::add_vertex(T); - - boost::add_edge(d, c0, T); - boost::remove_edge(t, c0, T); - std::set_intersection(T[t].bag.begin(), T[t].bag.end(), T[c0].bag.begin(), - T[c0].bag.end(), - std::inserter(T[d].bag, T[d].bag.begin())); - boost::add_edge(t, d, T); -} - -// // Ensure that all nodes' bags' sizes differ by at most one to their -// successors'. -template -void nicify_diffs_more(T_t &T, - typename boost::graph_traits::vertex_descriptor t) { - typedef - typename boost::graph_traits::adjacency_iterator adjacency_iter_t; - - adjacency_iter_t c, c_end; - typename boost::graph_traits::vertex_descriptor c0, c1; - - boost::tie(c, c_end) = adjacent_vertices(t, T); - - switch (boost::out_degree(t, T)) { - case 0: - if (T[t].bag.size() > 1) { - typename boost::graph_traits::vertex_descriptor d = - boost::add_vertex(T); - T[d].bag = T[t].bag; - T[d].bag.erase(T[d].bag.begin()); - T[d].weight = 0; - boost::add_edge(t, d, T); - nicify_diffs_more(T, t); - } else - T[t].weight = 0; - return; - case 1: - break; - case 2: - c0 = *c++; - c1 = *c; - nicify_diffs_more(T, c0); - nicify_diffs_more(T, c1); - T[t].weight = std::min(T[c0].weight, T[c1].weight) + 1; - return; - default: - std::cerr << "nicify_diffs_more error.\n"; - return; - } - - c0 = *c; - - size_t c0_size, t_size; - t_size = T[t].bag.size(); - c0_size = T[c0].bag.size(); - - if (t_size <= c0_size + 1 && t_size + 1 >= c0_size) { - nicify_diffs_more(T, c0); - T[t].weight = T[c0].weight; - return; - } - - typename boost::graph_traits::vertex_descriptor d = add_vertex(T); - boost::add_edge(d, c0, T); - boost::remove_edge(t, c0, T); - T[d].bag = T[t_size > c0_size ? t : c0].bag; - std::set::iterator i; - for (i = T[d].bag.begin(); T[t_size < c0_size ? t : c0].bag.find(*i) != - T[t_size < c0_size ? t : c0].bag.end(); - ++i) - ; - T[d].bag.erase(i); - boost::add_edge(t, d, T); - - nicify_diffs_more(T, t); -} - -// Find a root of an acyclic graph T -// Complexity: Linear in the number of vertices of T. -template -typename boost::graph_traits::vertex_descriptor find_root(T_t &T) { - typename boost::graph_traits::vertex_descriptor t; - typename boost::graph_traits::in_edge_iterator e, e_end; - - t = *(boost::vertices(T).first); - - for (boost::tie(e, e_end) = boost::in_edges(t, T); e != e_end; - boost::tie(e, e_end) = boost::in_edges(t, T)) - t = boost::source(*e, T); - - return (t); -} - -// Transform a tree decomposition into a nice tree decomposition. -template void nicify(T_t &T) { - typename boost::graph_traits::vertex_descriptor t; - - t = find_root(T); - - // Ensure we have an empty bag at the root. - if (T[t].bag.size()) { - typename boost::graph_traits::vertex_descriptor d = t; - t = add_vertex(T); - boost::add_edge(t, d, T); - } - - nicify_joins(T, t); - nicify_diffs(T, t); - nicify_diffs_more(T, t); -} diff --git a/src/backend/ralloc2.c b/src/backend/ralloc2.c new file mode 100644 index 000000000..e276c0366 --- /dev/null +++ b/src/backend/ralloc2.c @@ -0,0 +1,1322 @@ +/* backend/ralloc2.c — pure-C port of backend/ralloc2.cc. + * + * Z80 port-specific instruction-cost function and public entry point for the + * optimal register allocator (Krause 2013). The generic DP lives in + * SDCCralloc.c; this file supplies the six customization hooks plus + * z80_ralloc2_cc(). + */ + +#include +#include +#include +#include + +#include "common.h" + +#include "SDCCralloc.h" +#include "z80.h" +#include "gen.h" +#include "ralloc.h" + +/* dryZ80iCode lives in gen.c; not declared in any public header. */ +unsigned char dryZ80iCode(iCode *ic); + +bool z80_assignment_optimal; +bool should_omit_frame_ptr; + +#define REG_C 0 +#define REG_B 1 +#define REG_E 2 +#define REG_D 3 +#define REG_L 4 +#define REG_H 5 +#define REG_IYL 6 +#define REG_IYH 7 +#define REG_A (port->num_regs - 1) + +static const float RA_INF = 1.0f / 0.0f; +static int ra_is_inf(float f) { return f == RA_INF || !(f < RA_INF); } + +/* -------------------- default_operand_cost -------------------- */ + +static float default_operand_cost(const operand *o, const assignment_t *a, + unsigned short i, const cfg_ralloc_t *G, + const con_t *I) { + float c = 0.0f; + short byteregs[4]; + unsigned short size; + + if (!o || !IS_SYMOP(o)) return 0.0f; + + size_t oe; + size_t os = cfg_operands_equal_range(&G->node[i], OP_SYMBOL_CONST(o)->key, &oe); + if (os >= oe) return 0.0f; + + short v = G->node[i].operands[os].var; + if (sss_contains(&a->local, v)) { + /* In registers. */ + c += 1.0f; + byteregs[I->node[v].byte] = a->global[v]; + size = 1; + + size_t p; + for (p = os + 1; p < oe; p++) { + v = G->node[i].operands[p].var; + c += (sss_contains(&a->local, v) ? 1.0f : RA_INF); + byteregs[I->node[v].byte] = a->global[v]; + size++; + } + + if ((size == 2 || size == 4) && + (byteregs[1] != byteregs[0] + 1 || + (byteregs[0] != REG_C && byteregs[0] != REG_E && byteregs[0] != REG_L))) + c += 2.0f; + if (size == 4 && + (byteregs[3] != byteregs[2] + 1 || + (byteregs[2] != REG_C && byteregs[2] != REG_E && byteregs[0] != REG_L))) + c += 2.0f; + + /* Code generator cannot handle variables only partially in A. */ + if (size > 1) { + unsigned short k; + for (k = 0; k < size; k++) + if (byteregs[k] == REG_A) c += RA_INF; + } + + if (byteregs[0] == REG_A) c -= 0.4f; + else if (OPTRALLOC_HL && byteregs[0] == REG_L) c -= 0.1f; + else if ((OPTRALLOC_IY && byteregs[0] == REG_IYL) || byteregs[0] == REG_IYH) + c += 0.1f; + } else { + /* Spilt. */ + c += OP_SYMBOL_CONST(o)->remat ? 1.5f : 4.0f; + size_t p; + for (p = os + 1; p < oe; p++) { + v = G->node[i].operands[p].var; + c += (sss_contains(&a->local, v) ? RA_INF : 4.0f); + } + } + + return c; +} + +static int operand_sane(const operand *o, const assignment_t *a, + unsigned short i, const cfg_ralloc_t *G, + const con_t *I) { + (void)I; + if (!o || !IS_SYMOP(o)) return 1; + size_t oe; + size_t os = cfg_operands_equal_range(&G->node[i], OP_SYMBOL_CONST(o)->key, &oe); + if (os >= oe) return 1; + int first_in = sss_contains(&a->local, G->node[i].operands[os].var); + size_t p; + for (p = os + 1; p < oe; p++) { + int p_in = sss_contains(&a->local, G->node[i].operands[p].var); + if (first_in && !p_in) return 0; + if (!first_in && p_in) return 0; + } + return 1; +} + +static float default_instruction_cost(const assignment_t *a, unsigned short i, + const cfg_ralloc_t *G, const con_t *I) { + const iCode *ic = G->node[i].ic; + float c = 0.0f; + c += default_operand_cost(IC_RESULT(ic), a, i, G, I); + c += default_operand_cost(IC_LEFT(ic), a, i, G, I); + c += default_operand_cost(IC_RIGHT(ic), a, i, G, I); + return c; +} + +static int inst_sane(const assignment_t *a, unsigned short i, + const cfg_ralloc_t *G, const con_t *I) { + const iCode *ic = G->node[i].ic; + if (ic->op == SEND && ic->builtinSEND && ic->next && ic->next->op == SEND) { + unsigned int nbr = G->g.out[i].dst[0]; + if (!inst_sane(a, (unsigned short)nbr, G, I)) return 0; + } + return operand_sane(IC_RESULT(ic), a, i, G, I) && + operand_sane(IC_LEFT(ic), a, i, G, I) && + operand_sane(IC_RIGHT(ic), a, i, G, I); +} + +/* -------------------- assign_cost (coalescing) -------------------- */ + +static float assign_cost(const assignment_t *a, unsigned short i, + const cfg_ralloc_t *G, const con_t *I) { + float c = 0.0f; + const iCode *ic = G->node[i].ic; + const operand *right = IC_RIGHT(ic); + const operand *result = IC_RESULT(ic); + + if (!right || !IS_SYMOP(right) || !result || !IS_SYMOP(result) || + POINTER_GET(ic) || POINTER_SET(ic)) + return default_instruction_cost(a, i, G, I); + + reg_t byteregs[4] = {-1, -1, -1, -1}; + int size1 = 0, size2 = 0; + size_t oe; + size_t os = cfg_operands_equal_range(&G->node[i], OP_SYMBOL_CONST(right)->key, &oe); + if (os < oe) { + short v = G->node[i].operands[os].var; + if (!sss_contains(&a->local, v)) + return default_instruction_cost(a, i, G, I); + c += 1.0f; + byteregs[I->node[v].byte] = a->global[v]; + size1 = 1; + size_t p; + for (p = os + 1; p < oe; p++) { + v = G->node[i].operands[p].var; + c += (sss_contains(&a->local, v) ? 1.0f : RA_INF); + byteregs[I->node[v].byte] = a->global[v]; + size1++; + } + if (size1 > 1) { + int k; + for (k = 0; k < size1; k++) + if (byteregs[k] == REG_A) c += RA_INF; + } + if (byteregs[0] == REG_A) c -= 0.4f; + else if ((OPTRALLOC_IY && byteregs[0] == REG_IYL) || byteregs[0] == REG_IYH) + c += 0.1f; + } + if (!size1) return default_instruction_cost(a, i, G, I); + + os = cfg_operands_equal_range(&G->node[i], OP_SYMBOL_CONST(result)->key, &oe); + if (os < oe) { + short v = G->node[i].operands[os].var; + if (!sss_contains(&a->local, v)) + return default_instruction_cost(a, i, G, I); + c += 1.0f; + if (byteregs[I->node[v].byte] == a->global[v]) c -= 2.0f; + size2 = 1; + size_t p; + for (p = os + 1; p < oe; p++) { + v = G->node[i].operands[p].var; + c += (sss_contains(&a->local, v) ? 1.0f : RA_INF); + if (byteregs[I->node[v].byte] == a->global[v]) c -= 2.0f; + size2++; + } + if (byteregs[0] == REG_A) c -= 0.4f; + else if ((OPTRALLOC_IY && byteregs[0] == REG_IYL) || byteregs[0] == REG_IYH) + c += 0.1f; + } + if (!size2) return default_instruction_cost(a, i, G, I); + + return c; +} + +/* -------------------- add_operand_conflicts_in_node -------------------- */ + +void ralloc_add_operand_conflicts_in_node(const cfg_node_t *n, con_t *I) { + const iCode *ic = n->ic; + const operand *result = IC_RESULT(ic); + const operand *left = IC_LEFT(ic); + const operand *right = IC_RIGHT(ic); + + if (!result || !IS_SYMOP(result)) return; + if (!(ic->op == '~' || ic->op == UNARYMINUS || ic->op == '+' || + ic->op == '-' || ic->op == '^' || ic->op == '|' || + ic->op == BITWISEAND)) + return; + + size_t re, rs = cfg_operands_equal_range(n, OP_SYMBOL_CONST(result)->key, &re); + if (rs >= re) return; + + if (left && IS_SYMOP(left)) { + size_t oe, os = cfg_operands_equal_range(n, OP_SYMBOL_CONST(left)->key, &oe); + size_t p, q; + for (p = os; p < oe; p++) { + for (q = rs; q < re; q++) { + short rvar = n->operands[q].var; + short ovar = n->operands[p].var; + if (I->node[rvar].byte < I->node[ovar].byte) { + if (!cg_has_edge(&I->g, (unsigned int)rvar, (unsigned int)ovar)) + cg_add_edge(&I->g, (unsigned int)rvar, (unsigned int)ovar, 0.0f); + } + } + } + } + + if (right && IS_SYMOP(right)) { + size_t oe, os = cfg_operands_equal_range(n, OP_SYMBOL_CONST(right)->key, &oe); + size_t p, q; + for (p = os; p < oe; p++) { + for (q = rs; q < re; q++) { + short rvar = n->operands[q].var; + short ovar = n->operands[p].var; + if (I->node[rvar].byte < I->node[ovar].byte) { + if (!cg_has_edge(&I->g, (unsigned int)rvar, (unsigned int)ovar)) + cg_add_edge(&I->g, (unsigned int)rvar, (unsigned int)ovar, 0.0f); + } + } + } + } +} + +/* -------------------- operand_in_reg / operand_on_stack / operand_is_pair -------------------- */ + +static int operand_in_reg_r(const operand *o, reg_t r, const i_assignment_t *ia, + unsigned short i, const cfg_ralloc_t *G) { + if (!o || !IS_SYMOP(o)) return 0; + if (r >= port->num_regs) return 0; + size_t oe, os = cfg_operands_equal_range(&G->node[i], OP_SYMBOL_CONST(o)->key, &oe); + size_t p; + for (p = os; p < oe; p++) { + short v = G->node[i].operands[p].var; + if (v == ia->registers[r][1] || v == ia->registers[r][0]) return 1; + } + return 0; +} + +static int operand_in_reg_any(const operand *o, const i_assignment_t *ia, + unsigned short i, const cfg_ralloc_t *G) { + if (!o || !IS_SYMOP(o)) return 0; + size_t oe, os = cfg_operands_equal_range(&G->node[i], OP_SYMBOL_CONST(o)->key, &oe); + size_t p; + for (p = os; p < oe; p++) { + short v = G->node[i].operands[p].var; + reg_t r; + for (r = 0; r < port->num_regs; r++) + if (v == ia->registers[r][1] || v == ia->registers[r][0]) return 1; + } + return 0; +} + +static int operand_on_stack(const operand *o, const assignment_t *a, + unsigned short i, const cfg_ralloc_t *G) { + if (!o || !IS_SYMOP(o)) return 0; + if (OP_SYMBOL_CONST(o)->remat) return 0; + if (OP_SYMBOL_CONST(o)->_isparm && !IS_REGPARM(OP_SYMBOL_CONST(o)->etype)) + return 1; + size_t oe, os = cfg_operands_equal_range(&G->node[i], OP_SYMBOL_CONST(o)->key, &oe); + size_t p; + for (p = os; p < oe; p++) { + short v = G->node[i].operands[p].var; + if (a->global[v] < 0) return 1; + } + return 0; +} + +static int operand_is_pair(const operand *o, const assignment_t *a, + unsigned short i, const cfg_ralloc_t *G) { + if (!o || !IS_SYMOP(o)) return 0; + size_t oe, os = cfg_operands_equal_range(&G->node[i], OP_SYMBOL_CONST(o)->key, &oe); + if (os >= oe) return 0; + if (os + 1 >= oe) return 0; /* need exactly 2 bytes */ + if (os + 2 < oe) return 0; /* but not more */ + short v0 = G->node[i].operands[os].var; + short v1 = G->node[i].operands[os + 1].var; + if (a->global[v0] % 2) return 0; + if (a->global[v0] + 1 != a->global[v1]) return 0; + return 1; +} + +/* -------------------- Ainst_ok -------------------- */ + +static int Ainst_ok(const assignment_t *a, unsigned short i, + const cfg_ralloc_t *G, const con_t *I) { + const iCode *ic = G->node[i].ic; + const i_assignment_t *ia = &a->i_assignment; + const operand *left = IC_LEFT(ic); + const operand *right = IC_RIGHT(ic); + const operand *result = IC_RESULT(ic); + + if (ia->registers[REG_A][1] < 0) return 1; /* A not in use */ + + bool exstk = (should_omit_frame_ptr || (currFunc && currFunc->stack > 127) || IS_GB); + + if (I->node[ia->registers[REG_A][1]].size > 1 || + (ia->registers[REG_A][0] >= 0 && I->node[ia->registers[REG_A][0]].size > 1)) + return 0; + + int result_in_A = operand_in_reg_r(result, REG_A, ia, i, G); + int input_in_A; + switch (ic->op) { + case IFX: input_in_A = operand_in_reg_r(IC_COND(ic), REG_A, ia, i, G); break; + case JUMPTABLE: input_in_A = operand_in_reg_r(IC_JTCOND(ic), REG_A, ia, i, G); break; + default: + input_in_A = operand_in_reg_r(left, REG_A, ia, i, G) || + operand_in_reg_r(right, REG_A, ia, i, G); + break; + } + + /* Bit instructions don't disturb A. */ + if (ic->op == BITWISEAND && ifxForOp(IC_RESULT(ic), ic) && + ((IS_OP_LITERAL(left) && + (!((IS_GB && IS_TRUE_SYMOP(right)) || + (exstk && operand_on_stack(right, a, i, G))) || + (operand_in_reg_any(right, ia, i, G) && + !operand_in_reg_r(right, REG_IYL, ia, i, G) && + !operand_in_reg_r(right, REG_IYH, ia, i, G)))) || + (IS_OP_LITERAL(right) && + (!((IS_GB && IS_TRUE_SYMOP(left)) || + (exstk && operand_on_stack(left, a, i, G))) || + (operand_in_reg_any(left, ia, i, G) && + !operand_in_reg_r(left, REG_IYL, ia, i, G) && + !operand_in_reg_r(left, REG_IYH, ia, i, G)))))) { + operand *litop = IS_OP_LITERAL(left) ? IC_LEFT(ic) : IC_RIGHT(ic); + unsigned int ix; + int ok = 1; + for (ix = 0; ix < getSize(operandType(result)); ix++) { + unsigned char byte = (ulFromVal(OP_VALUE(litop)) >> (ix * 8)) & 0xff; + if (byte != 0x00 && byte != 0x01 && byte != 0x02 && byte != 0x04 && + byte != 0x08 && byte != 0x10 && byte != 0x20 && byte != 0x40 && + byte != 0x80) { ok = 0; break; } + } + if (ok) return 1; + } + + const sssset_t *dying = &G->node[i].dying; + + if (ic->op == GET_VALUE_AT_ADDRESS) + return (result_in_A || !IS_BITVAR(getSpec(operandType(result)))); + if (ic->op == '=' && POINTER_SET(ic)) + return (sss_contains(dying, ia->registers[REG_A][1]) || + sss_contains(dying, ia->registers[REG_A][0]) || + !(IS_BITVAR(getSpec(operandType(result))) || + IS_BITVAR(getSpec(operandType(right))))); + + /* Variable in A is not used by this instruction. */ + if (ic->op == '+' && IS_ITEMP(IC_LEFT(ic)) && IS_ITEMP(IC_RESULT(ic)) && + IS_OP_LITERAL(right) && ulFromVal(OP_VALUE(IC_RIGHT(ic))) == 1 && + OP_KEY(IC_RESULT(ic)) == OP_KEY(IC_LEFT(ic))) + return 1; + + if ((ic->op == '=' || ic->op == CAST) && !POINTER_SET(ic) && + isOperandEqual(result, right)) + return 1; + + if ((ic->op == '=' || ic->op == CAST) && !POINTER_SET(ic) && + !(ic->op == CAST && IS_BOOL(operandType(result))) && + (operand_in_reg_r(right, REG_A, ia, i, G) || + operand_in_reg_r(right, REG_B, ia, i, G) || + operand_in_reg_r(right, REG_C, ia, i, G) || + operand_in_reg_r(right, REG_D, ia, i, G) || + operand_in_reg_r(right, REG_E, ia, i, G) || + operand_in_reg_r(right, REG_H, ia, i, G) || + operand_in_reg_r(right, REG_L, ia, i, G)) && + (operand_in_reg_r(right, REG_A, ia, i, G) || + operand_in_reg_r(result, REG_B, ia, i, G) || + operand_in_reg_r(result, REG_C, ia, i, G) || + operand_in_reg_r(result, REG_D, ia, i, G) || + operand_in_reg_r(result, REG_E, ia, i, G) || + operand_in_reg_r(right, REG_H, ia, i, G) || + operand_in_reg_r(right, REG_L, ia, i, G))) + return 1; + + if (ic->op == GOTO || ic->op == LABEL) return 1; + + if (ic->op == IPUSH && getSize(operandType(IC_LEFT(ic))) <= 2 && + (operand_in_reg_r(left, REG_A, ia, i, G) || + (operand_in_reg_r(left, REG_B, ia, i, G) && + (getSize(operandType(left)) < 2 || + (operand_in_reg_r(left, REG_C, ia, i, G) && + I->node[ia->registers[REG_C][1]].byte == 0))) || + (operand_in_reg_r(left, REG_D, ia, i, G) && + (getSize(operandType(left)) < 2 || + (operand_in_reg_r(left, REG_E, ia, i, G) && + I->node[ia->registers[REG_E][1]].byte == 0))) || + (operand_in_reg_r(left, REG_H, ia, i, G) && + (getSize(operandType(left)) < 2 || + (operand_in_reg_r(left, REG_L, ia, i, G) && + I->node[ia->registers[REG_L][1]].byte == 0))) || + (operand_in_reg_r(left, REG_IYL, ia, i, G) && + I->node[ia->registers[REG_IYL][1]].byte == 0 && + (getSize(operandType(left)) < 2 || + operand_in_reg_r(left, REG_IYH, ia, i, G))))) + return 1; + + if (!result_in_A && !input_in_A) return 0; + + /* Last use of operand in A. */ + if (input_in_A && + (result_in_A || sss_contains(dying, ia->registers[REG_A][1]) || + sss_contains(dying, ia->registers[REG_A][0]))) { + if (ic->op != IFX && ic->op != RETURN && + !((ic->op == RIGHT_OP || ic->op == LEFT_OP) && + (IS_OP_LITERAL(right) || operand_in_reg_r(right, REG_A, ia, i, G))) && + !((ic->op == '=' || ic->op == CAST) && + !(IY_RESERVED && POINTER_SET(ic))) && + !IS_BITWISE_OP(ic) && !(ic->op == '~') && + !(ic->op == '*' && + (IS_ITEMP(IC_LEFT(ic)) || IS_OP_LITERAL(IC_LEFT(ic))) && + (IS_ITEMP(IC_RIGHT(ic)) || IS_OP_LITERAL(IC_RIGHT(ic)))) && + !((ic->op == '-' || ic->op == '+' || ic->op == EQ_OP) && + IS_OP_LITERAL(IC_RIGHT(ic)))) + return 0; + } else if (input_in_A && ic->op != IFX && ic->op != JUMPTABLE) { + return 0; + } + + if (result_in_A && !POINTER_GET(ic) && ic->op != '+' && ic->op != '-' && + (ic->op != '*' || (!IS_OP_LITERAL(IC_LEFT(ic)) && !IS_OP_LITERAL(right))) && + !IS_BITWISE_OP(ic) && ic->op != GET_VALUE_AT_ADDRESS && ic->op != '=' && + ic->op != EQ_OP && ic->op != '<' && ic->op != '>' && ic->op != CAST && + ic->op != CALL && ic->op != PCALL && ic->op != GETHBIT && + !((ic->op == LEFT_OP || ic->op == RIGHT_OP) && IS_OP_LITERAL(right))) + return 0; + + return 1; +} + +/* -------------------- HLinst_ok -------------------- */ + +static int HLinst_ok(const assignment_t *a, unsigned short i, + const cfg_ralloc_t *G, const con_t *I) { + const iCode *ic = G->node[i].ic; + bool exstk = (should_omit_frame_ptr || (currFunc && currFunc->stack > 127) || IS_GB); + const i_assignment_t *ia = &a->i_assignment; + bool unused_L = (ia->registers[REG_L][1] < 0); + bool unused_H = (ia->registers[REG_H][1] < 0); + if (unused_L && unused_H) return 1; + + const operand *left = IC_LEFT(ic); + const operand *right = IC_RIGHT(ic); + const operand *result = IC_RESULT(ic); + + bool result_in_L = operand_in_reg_r(result, REG_L, ia, i, G); + bool result_in_H = operand_in_reg_r(result, REG_H, ia, i, G); + bool result_in_HL = result_in_L || result_in_H; + + bool input_in_L, input_in_H; + switch (ic->op) { + case IFX: + input_in_L = operand_in_reg_r(IC_COND(ic), REG_L, ia, i, G); + input_in_H = operand_in_reg_r(IC_COND(ic), REG_L, ia, i, G); + break; + case JUMPTABLE: + input_in_L = operand_in_reg_r(IC_JTCOND(ic), REG_L, ia, i, G); + input_in_H = operand_in_reg_r(IC_JTCOND(ic), REG_L, ia, i, G); + break; + default: + input_in_L = operand_in_reg_r(left, REG_L, ia, i, G) || + operand_in_reg_r(right, REG_L, ia, i, G); + input_in_H = operand_in_reg_r(left, REG_H, ia, i, G) || + operand_in_reg_r(right, REG_H, ia, i, G); + break; + } + bool input_in_HL = input_in_L || input_in_H; + + const sssset_t *dying = &G->node[i].dying; + bool dying_L = result_in_L || + sss_contains(dying, ia->registers[REG_L][1]) || + sss_contains(dying, ia->registers[REG_L][0]); + bool dying_H = result_in_H || + sss_contains(dying, ia->registers[REG_H][1]) || + sss_contains(dying, ia->registers[REG_H][0]); + bool result_only_HL = (result_in_L || unused_L || dying_L) && + (result_in_H || unused_H || dying_H); + + if (ic->op == RETURN || ic->op == SEND) return 1; + + if ((IS_GB || IY_RESERVED) && (IS_TRUE_SYMOP(left) || IS_TRUE_SYMOP(right))) + return 0; + if ((IS_GB || IY_RESERVED) && IS_TRUE_SYMOP(result) && + getSize(operandType(IC_RESULT(ic))) > 2) + return 0; + + if (result_only_HL && ic->op == PCALL) return 1; + + if (exstk && + (operand_on_stack(result, a, i, G) + operand_on_stack(left, a, i, G) + + operand_on_stack(right, a, i, G) >= 2) && + ((result && IS_SYMOP(result) && getSize(operandType(result)) >= 2) || + !result_only_HL)) + return 0; + if (exstk && (operand_on_stack(left, a, i, G) || operand_on_stack(right, a, i, G)) && + (ic->op == '>' || ic->op == '<')) + return 0; + + if (ic->op == '+' && getSize(operandType(result)) == 2 && + ((IS_OP_LITERAL(right) && ulFromVal(OP_VALUE(IC_RIGHT(ic))) <= 3) || + (IS_OP_LITERAL(left) && ulFromVal(OP_VALUE(IC_LEFT(ic))) <= 3)) && + (operand_in_reg_r(result, REG_L, ia, i, G) && + I->node[ia->registers[REG_L][1]].byte == 0 && + operand_in_reg_r(result, REG_H, ia, i, G))) + return 1; + + if (ic->op == '+' && getSize(operandType(result)) == 2 && + !IS_TRUE_SYMOP(result) && + (result_only_HL || + (operand_in_reg_r(result, REG_IYL, ia, i, G) && + operand_in_reg_r(result, REG_IYH, ia, i, G))) && + ((ia->registers[REG_C][1] < 0 && ia->registers[REG_B][1] < 0) || + (ia->registers[REG_E][1] < 0 && ia->registers[REG_D][1] < 0))) + return 1; + + if (ic->op == '+' && getSize(operandType(result)) >= 2 && + ((IS_TRUE_SYMOP(result) && !operand_on_stack(result, a, i, G)) || + (operand_on_stack(left, a, i, G) ? exstk : IS_TRUE_SYMOP(left)) || + (operand_on_stack(right, a, i, G) ? exstk : IS_TRUE_SYMOP(right)))) + return 0; + + if (ic->op == '+' && input_in_HL && + (operand_on_stack(result, a, i, G) ? exstk : IS_TRUE_SYMOP(result))) + return 0; + + if (result_only_HL && !POINTER_SET(ic) && + (ic->op == ADDRESS_OF || ic->op == GET_VALUE_AT_ADDRESS || + ic->op == '+' || ic->op == '*' || ic->op == '=' || ic->op == CAST)) + return 1; + + if (!exstk && !isOperandInDirSpace(IC_LEFT(ic)) && + !isOperandInDirSpace(IC_RIGHT(ic)) && !isOperandInDirSpace(IC_RESULT(ic)) && + (ic->op == '-' || ic->op == '<' || ic->op == '>')) + return 1; + + if (ic->op == LEFT_OP && getSize(operandType(result)) <= 2 && + IS_OP_LITERAL(right) && result_only_HL) + return 1; + if ((ic->op == LEFT_OP || ic->op == RIGHT_OP) && + (!exstk || + ((!operand_on_stack(left, a, i, G) || (!input_in_HL && result_only_HL)) && + (!operand_on_stack(right, a, i, G) || (!input_in_HL && result_only_HL)) && + !operand_on_stack(result, a, i, G)))) + return 1; + + if (result && IS_SYMOP(result) && isOperandInDirSpace(IC_RESULT(ic))) return 0; + if ((input_in_HL || !result_only_HL) && left && IS_SYMOP(left) && + isOperandInDirSpace(IC_LEFT(ic))) return 0; + if ((input_in_HL || !result_only_HL) && right && IS_SYMOP(right) && + isOperandInDirSpace(IC_RIGHT(ic))) return 0; + + if (ic->op == IFX) return 1; + if (SKIP_IC2(ic)) return 1; + if (ic->op == IPUSH && input_in_H && + (getSize(operandType(IC_LEFT(ic))) <= 2 || + (ia->registers[REG_L][1] > 0 && + I->node[ia->registers[REG_L][1]].byte == 2 && + ia->registers[REG_H][1] > 0 && + I->node[ia->registers[REG_H][1]].byte == 3))) + return 1; + if (ic->op == IPUSH && ic->next && ic->next->op == CALL) return 1; + if (ic->op == IPUSH && getSize(operandType(left)) == 2 && + ((ia->registers[REG_C][1] < 0 && ia->registers[REG_B][1] < 0) || + (ia->registers[REG_E][1] < 0 && ia->registers[REG_D][1] < 0))) + return 1; + if (ic->op == IPUSH && getSize(operandType(left)) <= 2 && + ((operand_in_reg_r(left, REG_C, ia, i, G) && + I->node[ia->registers[REG_C][1]].byte == 0 && + (getSize(operandType(left)) < 2 || + operand_in_reg_r(left, REG_B, ia, i, G))) || + (operand_in_reg_r(left, REG_E, ia, i, G) && + I->node[ia->registers[REG_E][1]].byte == 0 && + (getSize(operandType(left)) < 2 || + operand_in_reg_r(left, REG_D, ia, i, G))) || + (operand_in_reg_r(left, REG_IYL, ia, i, G) && + I->node[ia->registers[REG_IYL][1]].byte == 0 && + (getSize(operandType(left)) < 2 || + operand_in_reg_r(left, REG_IYH, ia, i, G))))) + return 1; + if (POINTER_GET(ic) && input_in_L && input_in_H && + (getSize(operandType(IC_RESULT(ic))) == 1 || !result_in_HL)) + return 1; + if (!IS_GB && ic->op == ADDRESS_OF && + ((operand_in_reg_r(result, REG_IYL, ia, i, G) && + ia->registers[REG_IYL][1] > 0 && + I->node[ia->registers[REG_IYL][1]].byte == 0 && + operand_in_reg_r(result, REG_IYH, ia, i, G)) || + (!OP_SYMBOL_CONST(left)->onStack && + operand_in_reg_r(result, REG_C, ia, i, G) && + ia->registers[REG_C][1] > 0 && + I->node[ia->registers[REG_C][1]].byte == 0 && + operand_in_reg_r(result, REG_B, ia, i, G)) || + (!OP_SYMBOL_CONST(left)->onStack && + operand_in_reg_r(result, REG_E, ia, i, G) && + ia->registers[REG_E][1] > 0 && + I->node[ia->registers[REG_E][1]].byte == 0 && + operand_in_reg_r(result, REG_D, ia, i, G)))) + return 1; + + if (ic->op == LEFT_OP && isOperandLiteral(IC_RIGHT(ic))) return 1; + + if (exstk && !result_only_HL && + (operand_on_stack(left, a, i, G) || operand_on_stack(right, a, i, G) || + operand_on_stack(result, a, i, G)) && + ic->op == '+') + return 0; + + if ((!POINTER_SET(ic) && !POINTER_GET(ic) && + (ic->op == '=' || ic->op == CAST || ic->op == UNARYMINUS || + ic->op == RIGHT_OP || IS_BITWISE_OP(ic) || + (ic->op == '+' && getSize(operandType(IC_RESULT(ic))) == 1) || + (ic->op == '+' && getSize(operandType(IC_RESULT(ic))) <= 2 && + (result_only_HL || !IS_GB))))) + return 1; + + if ((ic->op == '<' || ic->op == '>') && + (IS_ITEMP(left) || IS_OP_LITERAL(left) || IS_ITEMP(right) || + IS_OP_LITERAL(right))) + return 1; + + if (ic->op == EQ_OP && IS_VALOP(right)) return 1; + + if (ic->op == CALL) return 1; + + if (result_only_HL && ic->op == PCALL) return 1; + + if (POINTER_GET(ic) && getSize(operandType(IC_RESULT(ic))) == 1 && + !IS_BITVAR(getSpec(operandType(result))) && + ((operand_in_reg_r(right, REG_C, ia, i, G) && + I->node[ia->registers[REG_C][1]].byte == 0 && + operand_in_reg_r(right, REG_B, ia, i, G)) || + (operand_in_reg_r(right, REG_E, ia, i, G) && + I->node[ia->registers[REG_E][1]].byte == 0 && + operand_in_reg_r(right, REG_D, ia, i, G)) || + (operand_in_reg_r(right, REG_IYL, ia, i, G) && + I->node[ia->registers[REG_IYL][1]].byte == 0 && + operand_in_reg_r(right, REG_IYH, ia, i, G)))) + return 1; + + if ((ic->op == '=') && POINTER_SET(ic) && + operand_in_reg_r(result, REG_IYL, ia, i, G) && + I->node[ia->registers[REG_IYL][1]].byte == 0 && + operand_in_reg_r(result, REG_IYH, ia, i, G)) + return 1; + + if ((ic->op == '=' || ic->op == CAST) && POINTER_SET(ic) && !result_only_HL) + return 0; + + if ((ic->op == '=' || ic->op == CAST) && !POINTER_GET(ic) && !input_in_HL) + return 1; + + return 0; +} + +/* -------------------- IYinst_ok -------------------- */ + +static int IYinst_ok(const assignment_t *a, unsigned short i, + const cfg_ralloc_t *G, const con_t *I) { + const iCode *ic = G->node[i].ic; + const i_assignment_t *ia = &a->i_assignment; + bool exstk = (should_omit_frame_ptr || (currFunc && currFunc->stack > 127)); + bool unused_IYL = (ia->registers[REG_IYL][1] < 0); + bool unused_IYH = (ia->registers[REG_IYH][1] < 0); + const operand *left = IC_LEFT(ic); + const operand *right = IC_RIGHT(ic); + const operand *result = IC_RESULT(ic); + + bool result_in_IYL = operand_in_reg_r(result, REG_IYL, ia, i, G); + bool result_in_IYH = operand_in_reg_r(result, REG_IYH, ia, i, G); + bool result_in_IY = result_in_IYL || result_in_IYH; + + bool input_in_IYL, input_in_IYH; + switch (ic->op) { + case IFX: + input_in_IYL = operand_in_reg_r(IC_COND(ic), REG_IYL, ia, i, G); + input_in_IYH = operand_in_reg_r(IC_COND(ic), REG_IYL, ia, i, G); + break; + case JUMPTABLE: + input_in_IYL = operand_in_reg_r(IC_JTCOND(ic), REG_IYL, ia, i, G); + input_in_IYH = operand_in_reg_r(IC_JTCOND(ic), REG_IYL, ia, i, G); + break; + default: + input_in_IYL = operand_in_reg_r(left, REG_IYL, ia, i, G) || + operand_in_reg_r(right, REG_IYL, ia, i, G); + input_in_IYH = operand_in_reg_r(left, REG_IYH, ia, i, G) || + operand_in_reg_r(right, REG_IYH, ia, i, G); + break; + } + bool input_in_IY = input_in_IYL || input_in_IYH; + + if (unused_IYL && unused_IYH) return 1; + + if (exstk && (operand_on_stack(result, a, i, G) || + operand_on_stack(left, a, i, G) || + operand_on_stack(right, a, i, G))) + return 0; + + if (unused_IYL ^ unused_IYH) return 0; + if ((!unused_IYL && I->node[ia->registers[REG_IYL][1]].size != 2) || + (!unused_IYH && I->node[ia->registers[REG_IYH][1]].size != 2) || + (ia->registers[REG_IYL][0] >= 0 && I->node[ia->registers[REG_IYL][0]].size != 2) || + (ia->registers[REG_IYH][0] >= 0 && I->node[ia->registers[REG_IYH][0]].size != 2)) + return 0; + if (ia->registers[REG_IYL][1] >= 0 && + (ia->registers[REG_IYH][1] <= 0 || + I->node[ia->registers[REG_IYL][1]].v != I->node[ia->registers[REG_IYH][1]].v)) + return 0; + if (ia->registers[REG_IYH][1] >= 0 && + (ia->registers[REG_IYL][1] <= 0 || + I->node[ia->registers[REG_IYH][1]].v != I->node[ia->registers[REG_IYL][1]].v)) + return 0; + if (ia->registers[REG_IYL][0] >= 0 && + (ia->registers[REG_IYH][0] <= 0 || + I->node[ia->registers[REG_IYL][0]].v != I->node[ia->registers[REG_IYH][0]].v)) + return 0; + if (ia->registers[REG_IYH][0] >= 0 && + (ia->registers[REG_IYL][0] <= 0 || + I->node[ia->registers[REG_IYH][0]].v != I->node[ia->registers[REG_IYL][0]].v)) + return 0; + if (I->node[ia->registers[REG_IYL][1]].byte != 0 || + I->node[ia->registers[REG_IYH][1]].byte != 1) + return 0; + if ((ia->registers[REG_IYL][0] >= 0 && I->node[ia->registers[REG_IYL][0]].byte != 0) || + (ia->registers[REG_IYH][0] >= 0 && I->node[ia->registers[REG_IYH][0]].byte != 1)) + return 0; + + if (result_in_IY && + (ic->op == '=' || + (ic->op == CAST && getSize(operandType(IC_RESULT(ic))) <= + getSize(operandType(IC_RIGHT(ic)))) || + ic->op == '+')) + return 1; + + if (ic->op == LEFT_OP && result_in_IY && input_in_IY && + IS_VALOP(IC_RIGHT(ic)) && operandLitValue(IC_RIGHT(ic)) < 8) + return 1; + + if (ic->op == '-' && result_in_IY && input_in_IY && IS_VALOP(IC_RIGHT(ic)) && + operandLitValue(IC_RIGHT(ic)) < 4) + return 1; + + if (SKIP_IC2(ic)) return 1; + + if (!result_in_IY && !input_in_IY && + !(IC_RESULT(ic) && isOperandInDirSpace(IC_RESULT(ic))) && + !(IC_RIGHT(ic) && IS_TRUE_SYMOP(IC_RIGHT(ic))) && + !(IC_LEFT(ic) && IS_TRUE_SYMOP(IC_LEFT(ic)))) + return 1; + + if (!result_in_IY && !input_in_IY && + (ic->op == '=' || + (ic->op == CAST && getSize(operandType(IC_RIGHT(ic))) >= 2 && + (getSize(operandType(IC_RESULT(ic))) <= getSize(operandType(IC_RIGHT(ic))) || + !IS_SPEC(operandType(IC_RIGHT(ic))) || SPEC_USIGN(operandType(IC_RIGHT(ic)))))) && + operand_is_pair(IC_RESULT(ic), a, i, G)) + return 1; + + if (ic->op == IPUSH) return 1; + + if (ic->op == GET_VALUE_AT_ADDRESS && isOperandInDirSpace(IC_RESULT(ic))) + return 0; + + if (input_in_IY && !result_in_IY && + ((ic->op == '=' && !POINTER_SET(ic)) || + (ic->op == CAST && getSize(operandType(IC_RESULT(ic))) <= + getSize(operandType(IC_RIGHT(ic)))) || + ic->op == GET_VALUE_AT_ADDRESS)) + return 1; + + return 0; +} + +/* -------------------- DEinst_ok -------------------- */ + +static int DEinst_ok(const assignment_t *a, unsigned short i, + const cfg_ralloc_t *G, const con_t *I) { + (void)I; + if (!IS_GB) return 1; + const i_assignment_t *ia = &a->i_assignment; + bool unused_E = (ia->registers[REG_E][1] < 0); + bool unused_D = (ia->registers[REG_D][1] < 0); + if (unused_E && unused_D) return 1; + + const iCode *ic = G->node[i].ic; + const operand *left = IC_LEFT(ic); + const operand *right = IC_RIGHT(ic); + const operand *result = IC_RESULT(ic); + + if (ic->op == PCALL) return 0; + if (ic->op == GET_VALUE_AT_ADDRESS && + (getSize(operandType(result)) >= 2 || !operand_is_pair(left, a, i, G))) + return 0; + if (ic->op == '=' && POINTER_SET(ic) && !operand_is_pair(result, a, i, G)) + return 0; + if ((ic->op == '=' || ic->op == CAST) && getSize(operandType(result)) >= 2 && + (operand_on_stack(right, a, i, G) || + operand_in_reg_r(right, REG_L, ia, i, G) || + operand_in_reg_r(right, REG_H, ia, i, G)) && + (operand_on_stack(result, a, i, G) || + operand_in_reg_r(result, REG_L, ia, i, G) || + operand_in_reg_r(result, REG_H, ia, i, G))) + return 0; + if (ic->op == '+' && getSize(operandType(result)) >= 2) return 0; + if (ic->op == UNARYMINUS || ic->op == '-' || ic->op == '*') return 0; + if (ic->op == '>' || ic->op == '<') return 0; + return 1; +} + +/* -------------------- set_surviving_regs / unset -------------------- */ + +static void set_surviving_regs(const assignment_t *a, unsigned short i, + const cfg_ralloc_t *G, const con_t *I) { + iCode *ic = G->node[i].ic; + ic->rMask = newBitVect(port->num_regs); + ic->rSurv = newBitVect(port->num_regs); + + size_t vi; + for (vi = 0; vi < G->node[i].alive.n; vi++) { + short v = G->node[i].alive.items[vi]; + if (a->global[v] < 0) continue; + ic->rMask = bitVectSetBit(ic->rMask, a->global[v]); + if (!sss_contains(&G->node[i].dying, v)) { + int owned_result = + IC_RESULT(ic) && !POINTER_SET(ic) && IS_SYMOP(IC_RESULT(ic)) && + OP_SYMBOL_CONST(IC_RESULT(ic))->key == I->node[v].v; + if (!owned_result) + ic->rSurv = bitVectSetBit(ic->rSurv, a->global[v]); + } + } +} + +static void unset_surviving_regs(unsigned short i, const cfg_ralloc_t *G) { + iCode *ic = G->node[i].ic; + freeBitVect(ic->rSurv); + freeBitVect(ic->rMask); +} + +/* -------------------- assign_operand_for_cost -------------------- */ + +static void assign_operand_for_cost(operand *o, const assignment_t *a, + unsigned short i, const cfg_ralloc_t *G, + const con_t *I) { + if (!o || !IS_SYMOP(o)) return; + symbol *sym = OP_SYMBOL(o); + size_t oe, os = cfg_operands_equal_range(&G->node[i], OP_SYMBOL_CONST(o)->key, &oe); + size_t p; + for (p = os; p < oe; p++) { + short v = G->node[i].operands[p].var; + if (a->global[v] >= 0) { + if (a->global[v] != REG_A && + ((a->global[v] != REG_IYL && a->global[v] != REG_IYH) || !OPTRALLOC_IY)) { + sym->regs[I->node[v].byte] = regsZ80 + a->global[v]; + sym->accuse = 0; + sym->isspilt = false; + sym->nRegs = I->node[v].size; + } else if (a->global[v] == REG_A) { + sym->accuse = ACCUSE_A; + sym->isspilt = false; + sym->nRegs = 0; + sym->regs[I->node[v].byte] = 0; + } else { + sym->accuse = ACCUSE_IY; + sym->isspilt = false; + sym->nRegs = 0; + sym->regs[I->node[v].byte] = 0; + } + } else { + sym->isspilt = true; + sym->accuse = 0; + sym->nRegs = I->node[v].size; + sym->regs[I->node[v].byte] = 0; + } + } +} + +static void assign_operands_for_cost(const assignment_t *a, unsigned short i, + const cfg_ralloc_t *G, const con_t *I) { + const iCode *ic = G->node[i].ic; + if (ic->op == IFX) + assign_operand_for_cost(IC_COND(ic), a, i, G, I); + else if (ic->op == JUMPTABLE) + assign_operand_for_cost(IC_JTCOND(ic), a, i, G, I); + else { + assign_operand_for_cost(IC_LEFT(ic), a, i, G, I); + assign_operand_for_cost(IC_RIGHT(ic), a, i, G, I); + assign_operand_for_cost(IC_RESULT(ic), a, i, G, I); + } + if (ic->op == SEND && ic->builtinSEND) { + unsigned int nx = G->g.out[i].dst[0]; + assign_operands_for_cost(a, (unsigned short)nx, G, I); + } +} + +/* -------------------- ralloc_instruction_cost (public hook) -------------------- */ + +float ralloc_instruction_cost(const assignment_t *a, unsigned int i, + const cfg_ralloc_t *G, const con_t *I) { + iCode *ic = G->node[i].ic; + float c; + + if (!inst_sane(a, (unsigned short)i, G, I)) return RA_INF; + if (ic->generated) return 0.0f; + if (!Ainst_ok(a, (unsigned short)i, G, I)) return RA_INF; + if (OPTRALLOC_HL && !HLinst_ok(a, (unsigned short)i, G, I)) return RA_INF; + if (!DEinst_ok(a, (unsigned short)i, G, I)) return RA_INF; + if (OPTRALLOC_IY && !IYinst_ok(a, (unsigned short)i, G, I)) return RA_INF; + + switch (ic->op) { + case FUNCTION: + case ENDFUNCTION: + case LABEL: + case GOTO: + case INLINEASM: + return 0.0f; + case '!': case '~': case UNARYMINUS: + case '+': case '-': case '^': case '|': case BITWISEAND: + case IPUSH: case CALL: case PCALL: case RETURN: + case '*': case '>': case '<': + case EQ_OP: case AND_OP: case OR_OP: + case GETHBIT: case LEFT_OP: case RIGHT_OP: + case GET_VALUE_AT_ADDRESS: case '=': case IFX: + case ADDRESS_OF: case JUMPTABLE: case CAST: + case SEND: case DUMMY_READ_VOLATILE: + case CRITICAL: case ENDCRITICAL: + assign_operands_for_cost(a, (unsigned short)i, G, I); + set_surviving_regs(a, (unsigned short)i, G, I); + c = dryZ80iCode(ic); + unset_surviving_regs((unsigned short)i, G); + ic->generated = false; + return c; + default: + return default_instruction_cost(a, (unsigned short)i, G, I); + } + (void)assign_cost; /* currently unused but kept for parity with original */ +} + +/* -------------------- weird_byte_order / local_assignment_insane -------------------- */ + +static float weird_byte_order(const assignment_t *a, const con_t *I) { + float c = 0.0f; + size_t vi; + for (vi = 0; vi < a->local.n; vi++) { + short v = a->local.items[vi]; + if (a->global[v] % 2 != I->node[v].byte % 2) c += 8.0f; + } + return c; +} + +static int local_assignment_insane(const assignment_t *a, const con_t *I, + short lastvar) { + size_t i; + for (i = 0; i < a->local.n;) { + short v_old = a->local.items[i]; + i++; + if (i == a->local.n) { + if (v_old != lastvar && I->node[v_old].byte != I->node[v_old].size - 1) + return 1; + break; + } + short v_cur = a->local.items[i]; + if (I->node[v_old].v == I->node[v_cur].v) { + if (I->node[v_old].byte != I->node[v_cur].byte - 1) return 1; + } else { + if ((v_old != lastvar && I->node[v_old].byte != I->node[v_old].size - 1) || + I->node[v_cur].byte) + return 1; + } + } + return 0; +} + +int ralloc_assignment_hopeless(const assignment_t *a, unsigned int i, + const cfg_ralloc_t *G, const con_t *I, + short lastvar) { + if (!G->node[i].ic->generated && !Ainst_ok(a, (unsigned short)i, G, I)) + return 1; + if (local_assignment_insane(a, I, lastvar)) return 1; + + const i_assignment_t *ia = &a->i_assignment; + + if (OPTRALLOC_IY && + ((ia->registers[REG_IYL][1] >= 0 && + (I->node[ia->registers[REG_IYL][1]].size != 2 || + I->node[ia->registers[REG_IYL][1]].byte != 0)) || + (ia->registers[REG_IYH][1] >= 0 && + (I->node[ia->registers[REG_IYH][1]].size != 2 || + I->node[ia->registers[REG_IYH][1]].byte != 1)) || + (ia->registers[REG_IYL][0] >= 0 && + (I->node[ia->registers[REG_IYL][0]].size != 2 || + I->node[ia->registers[REG_IYL][0]].byte != 0)) || + (ia->registers[REG_IYH][0] >= 0 && + (I->node[ia->registers[REG_IYH][0]].size != 2 || + I->node[ia->registers[REG_IYH][0]].byte != 1)))) + return 1; + + if (OPTRALLOC_HL && + (ia->registers[REG_L][1] >= 0 && ia->registers[REG_H][1] >= 0) && + (ia->registers[REG_L][0] >= 0 && ia->registers[REG_H][0] >= 0) && + !HLinst_ok(a, (unsigned short)i, G, I)) + return 1; + + if (OPTRALLOC_IY && + (ia->registers[REG_IYL][1] >= 0 && ia->registers[REG_IYH][1] >= 0) && + !((ia->registers[REG_IYL][0] >= 0) ^ (ia->registers[REG_IYH][0] >= 0)) && + !IYinst_ok(a, (unsigned short)i, G, I)) + return 1; + + return 0; +} + +/* -------------------- ralloc_get_best_local_assignment_biased -------------------- */ + +void ralloc_get_best_local_assignment_biased(assignment_t *out, unsigned int t, + const tree_dec_ralloc_t *T) { + const assignment_node_t *best = T->node[t].alist_head; + const assignment_node_t *an; + for (an = T->node[t].alist_head; an; an = an->next) { + if (an->a.s < best->a.s) { + int risky = 0; + size_t vi; + for (vi = 0; vi < an->a.local.n; vi++) { + short v = an->a.local.items[vi]; + signed char r = an->a.global[v]; + if (r == REG_A || + (OPTRALLOC_HL && (r == REG_H || r == REG_L)) || + (OPTRALLOC_IY && (r == REG_IYH || r == REG_IYL))) { + risky = 1; break; + } + } + if (!risky) best = an; + } + } + if (!best) return; + assignment_copy(out, &best->a); + size_t vi; + for (vi = 0; vi < T->node[t].alive.n; vi++) + sss_insert(&out->local, T->node[t].alive.items[vi]); +} + +/* -------------------- ralloc_rough_cost_estimate -------------------- */ + +float ralloc_rough_cost_estimate(const assignment_t *a, unsigned int i, + const cfg_ralloc_t *G, const con_t *I) { + const i_assignment_t *ia = &a->i_assignment; + float c = 0.0f; + + c += weird_byte_order(a, I); + + if (OPTRALLOC_HL && + (ia->registers[REG_L][1] >= 0 && ia->registers[REG_H][1] >= 0) && + !((ia->registers[REG_L][0] >= 0) ^ (ia->registers[REG_H][0] >= 0)) && + !HLinst_ok(a, (unsigned short)i, G, I)) + c += 8.0f; + + if (ia->registers[REG_A][1] < 0) c += 0.03f; + if (OPTRALLOC_HL && ia->registers[REG_L][1] < 0) c += 0.02f; + + if (OPTRALLOC_IY) { + size_t vi; + for (vi = 0; vi < a->local.n; vi++) { + short v = a->local.items[vi]; + if (a->global[v] == REG_IYL || a->global[v] == REG_IYH) c += 8.0f; + } + } + + if (ia->registers[REG_E][1] < 0) c += 0.0001f; + if (ia->registers[REG_D][1] < 0) c += 0.00001f; + + if (a->marked) c -= 0.5f; + + size_t vi; + for (vi = 0; vi < a->local.n; vi++) { + short v = a->local.items[vi]; + const symbol *sym = (symbol *)hTabItemWithKey(liveRanges, I->node[v].v); + if (a->global[v] < 0 && IS_REGISTER(sym->type)) c += 32.0f; + if ((I->node[v].byte % 2) && + (a->global[v] == REG_L || a->global[v] == REG_E || a->global[v] == REG_C)) + c += 8.0f; + if (!(I->node[v].byte % 2) && I->node[v].size > 1 && + (a->global[v] == REG_H || a->global[v] == REG_D || a->global[v] == REG_B)) + c += 8.0f; + if ((I->node[v].byte == 0 && I->node[v].size > 1) || + (I->node[v].byte == 2 && I->node[v].size > 3)) { + if (a->global[v] == REG_L && v + 1 < (short)a->global_n && + a->global[v + 1] >= 0 && a->global[v + 1] != REG_H) + c += 16.0f; + if (a->global[v] == REG_E && v + 1 < (short)a->global_n && + a->global[v + 1] >= 0 && a->global[v + 1] != REG_D) + c += 16.0f; + if (a->global[v] == REG_C && v + 1 < (short)a->global_n && + a->global[v + 1] >= 0 && a->global[v + 1] != REG_B) + c += 16.0f; + } else if (I->node[v].byte == 1 || I->node[v].byte == 3) { + if (a->global[v] == REG_H && v - 1 >= 0 && a->global[v - 1] >= 0 && + a->global[v - 1] != REG_L) c += 16.0f; + if (a->global[v] == REG_D && v - 1 >= 0 && a->global[v - 1] >= 0 && + a->global[v - 1] != REG_E) c += 16.0f; + if (a->global[v] == REG_B && v - 1 >= 0 && a->global[v - 1] >= 0 && + a->global[v - 1] != REG_C) c += 16.0f; + } + } + + c -= a->local.n * 0.2f; + return c; +} + +/* -------------------- ralloc_extra_ic_generated -------------------- */ + +void ralloc_extra_ic_generated(iCode *ic) { + if (ic->op == '>' || ic->op == '<' || ic->op == LE_OP || ic->op == GE_OP || + ic->op == EQ_OP || ic->op == NE_OP || + ((ic->op == '^' || ic->op == '|' || ic->op == BITWISEAND) && + (IS_OP_LITERAL(IC_LEFT(ic)) || IS_OP_LITERAL(IC_RIGHT(ic))))) { + iCode *ifx; + if ((ifx = ifxForOp(IC_RESULT(ic), ic))) { + OP_SYMBOL(IC_RESULT(ic))->for_newralloc = false; + OP_SYMBOL(IC_RESULT(ic))->regType = REG_CND; + ifx->generated = true; + } + } + + if (ic->op == SEND && ic->builtinSEND && + (!ic->prev || ic->prev->op != SEND || !ic->prev->builtinSEND)) { + iCode *icn; + for (icn = ic->next; icn->op != CALL; icn = icn->next) + icn->generated = true; + icn->generated = true; + ic->generated = false; + } +} + +/* -------------------- omit_frame_ptr / move_parms -------------------- */ + +static bool omit_frame_ptr(const cfg_ralloc_t *G) { + if (IS_GB || IY_RESERVED || z80_opts.noOmitFramePtr) return false; + if (options.omitFramePtr) return true; + + signed char omitcost = -16; + size_t ncfg = cg_num_vertices(&G->g); + unsigned int i; + for (i = 0; i < ncfg; i++) { + if ((int)G->node[i].alive.n > port->num_regs - 4) return false; + const iCode *ic = G->node[i].ic; + const operand *o; + o = IC_RESULT(ic); + if (o && IS_SYMOP(o) && OP_SYMBOL_CONST(o)->_isparm && + !IS_REGPARM(OP_SYMBOL_CONST(o)->etype)) omitcost += 6; + o = IC_LEFT(ic); + if (o && IS_SYMOP(o) && OP_SYMBOL_CONST(o)->_isparm && + !IS_REGPARM(OP_SYMBOL_CONST(o)->etype)) omitcost += 6; + o = IC_RIGHT(ic); + if (o && IS_SYMOP(o) && OP_SYMBOL_CONST(o)->_isparm && + !IS_REGPARM(OP_SYMBOL_CONST(o)->etype)) omitcost += 6; + if (omitcost > 14) return false; + } + return true; +} + +static void move_parms(void) { + if (!currFunc || IS_GB || options.omitFramePtr || !should_omit_frame_ptr) return; + value *val; + for (val = FUNC_ARGS(currFunc->type); val; val = val->next) { + if (IS_REGPARM(val->sym->etype) || !val->sym->onStack) continue; + val->sym->stack -= 2; + } +} + +/* -------------------- z80_ralloc2_cc (public entry) -------------------- */ + +iCode *z80_ralloc2_cc(ebbIndex *ebbi) { + iCode *ic; + cfg_ralloc_t cfg; + con_t conflict_graph; + tree_dec_ralloc_t T; + + cfg_ralloc_init(&cfg); + con_init(&conflict_graph); + tree_dec_ralloc_init(&T); + + ic = ralloc_create_cfg(&cfg, &conflict_graph, ebbi); + + should_omit_frame_ptr = omit_frame_ptr(&cfg); + move_parms(); + + /* Build tree decomposition. We feed it the underlying cgraph. */ + tree_dec_thorup(&T.td, &cfg.g); + /* Grow the side-array to match tree_dec's vertex count. */ + { + size_t n = cg_num_vertices(&T.td.g); + if (T.cap < n) { + T.node = (tree_dec_ralloc_node_t *)realloc(T.node, n * sizeof(*T.node)); + size_t k; + for (k = T.cap; k < n; k++) { + sss_init(&T.node[k].alive); + T.node[k].alist_head = T.node[k].alist_tail = NULL; + T.node[k].alist_n = 0; + } + T.cap = n; + } + } + tree_dec_nicify(&T.td); + /* Re-grow side-array after nicify (may have added vertices). */ + { + size_t n = cg_num_vertices(&T.td.g); + if (T.cap < n) { + T.node = (tree_dec_ralloc_node_t *)realloc(T.node, n * sizeof(*T.node)); + size_t k; + for (k = T.cap; k < n; k++) { + sss_init(&T.node[k].alive); + T.node[k].alist_head = T.node[k].alist_tail = NULL; + T.node[k].alist_n = 0; + } + T.cap = n; + } + } + + ralloc_alive_tree_dec(&T, &cfg); + ralloc_good_re_root(&T); + tree_dec_nicify(&T.td); + { + size_t n = cg_num_vertices(&T.td.g); + if (T.cap < n) { + T.node = (tree_dec_ralloc_node_t *)realloc(T.node, n * sizeof(*T.node)); + size_t k; + for (k = T.cap; k < n; k++) { + sss_init(&T.node[k].alive); + T.node[k].alist_head = T.node[k].alist_tail = NULL; + T.node[k].alist_n = 0; + } + T.cap = n; + } + } + ralloc_alive_tree_dec(&T, &cfg); + + assignment_t winner; + assignment_init(&winner); + int not_optimal = ralloc_tree_dec_ralloc(&T, &cfg, &conflict_graph, &winner); + z80_assignment_optimal = !not_optimal; + + /* Apply winner to symbols. */ + { + size_t ncon = cg_num_vertices(&conflict_graph.g); + unsigned int v; + for (v = 0; v < ncon; v++) { + symbol *sym = (symbol *)hTabItemWithKey(liveRanges, conflict_graph.node[v].v); + if (winner.global[v] >= 0) { + if (winner.global[v] != REG_A && + ((winner.global[v] != REG_IYL && winner.global[v] != REG_IYH) || + !OPTRALLOC_IY)) { + sym->regs[conflict_graph.node[v].byte] = regsZ80 + winner.global[v]; + sym->accuse = 0; + sym->isspilt = false; + sym->nRegs = conflict_graph.node[v].size; + } else if (winner.global[v] == REG_A) { + sym->accuse = ACCUSE_A; + sym->isspilt = false; + sym->nRegs = 0; + sym->regs[0] = 0; + } else { + sym->accuse = ACCUSE_IY; + sym->isspilt = false; + sym->nRegs = 0; + sym->regs[conflict_graph.node[v].byte] = 0; + } + } else { + int k; + for (k = 0; k < conflict_graph.node[v].size; k++) + sym->regs[k] = 0; + sym->accuse = 0; + sym->nRegs = conflict_graph.node[v].size; + sym->isspilt = false; + } + } + } + + { + size_t ncfg = cg_num_vertices(&cfg.g); + unsigned int vi; + for (vi = 0; vi < ncfg; vi++) + set_surviving_regs(&winner, (unsigned short)vi, &cfg, &conflict_graph); + } + + assignment_free(&winner); + tree_dec_ralloc_free(&T); + con_free(&conflict_graph); + cfg_ralloc_free(&cfg); + + (void)ra_is_inf; + return ic; +} diff --git a/src/backend/ralloc2.cc b/src/backend/ralloc2.cc deleted file mode 100644 index 1dc4670ad..000000000 --- a/src/backend/ralloc2.cc +++ /dev/null @@ -1,1787 +0,0 @@ -// Philipp Klaus Krause, philipp@informatik.uni-frankfurt.de, pkk@spth.de, 2010 -// - 2011 -// -// (c) 2010-2012 Goethe-Universität Frankfurt -// -// This program is free software; you can redistribute it and/or modify it -// under the terms of the GNU General Public License as published by the -// Free Software Foundation; either version 2, or (at your option) any -// later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -// -// An optimal, polynomial-time register allocator. - -//#define DEBUG_RALLOC_DEC // Uncomment to get debug messages while doing -// register allocation on the tree decomposition. #define DEBUG_RALLOC_DEC_ASS -// // Uncomment to get debug messages about assignments while doing register -// allocation on the tree decomposition (much more verbose than the one above). - -#include "SDCCralloc.hpp" - -extern "C" { -#include "z80.h" -unsigned char dryZ80iCode(iCode *ic); -bool z80_assignment_optimal; -bool should_omit_frame_ptr; -}; - -#define REG_C 0 -#define REG_B 1 -#define REG_E 2 -#define REG_D 3 -#define REG_L 4 -#define REG_H 5 -#define REG_IYL 6 -#define REG_IYH 7 -#define REG_A (port->num_regs - 1) - -template -float default_operand_cost(const operand *o, const assignment &a, - unsigned short int i, const G_t &G, const I_t &I) { - float c = 0.0f; - - operand_map_t::const_iterator oi, oi_end; - - var_t byteregs[4]; // Todo: Change this when sdcc supports variables larger - // than 4 bytes in registers. - unsigned short int size; - - if (o && IS_SYMOP(o)) { - boost::tie(oi, oi_end) = G[i].operands.equal_range(OP_SYMBOL_CONST(o)->key); - if (oi != oi_end) { - var_t v = oi->second; - - // In registers. - if (a.local.find(v) != a.local.end()) { - c += 1.0f; - byteregs[I[v].byte] = a.global[v]; - size = 1; - - while (++oi != oi_end) { - v = oi->second; - c += (a.local.find(v) != a.local.end() - ? 1.0f - : std::numeric_limits::infinity()); - byteregs[I[v].byte] = a.global[v]; - size++; - } - - // Penalty for not placing 2- and 4-byte variables in register pairs - // Todo: Extend this once the register allcoator can use registers other - // than bc, de: - if ((size == 2 || size == 4) && - (byteregs[1] != byteregs[0] + 1 || byteregs[0] != REG_C && - byteregs[0] != REG_E && - byteregs[0] != REG_L)) - c += 2.0f; - if (size == 4 && (byteregs[3] != byteregs[2] + 1 || - byteregs[2] != REG_C && byteregs[2] != REG_E && - byteregs[0] != REG_L)) - c += 2.0f; - - // Code generator cannot handle variables only partially in A. - if (size > 1) - for (unsigned short int i = 0; i < size; i++) - if (byteregs[i] == REG_A) - c += std::numeric_limits::infinity(); - - if (byteregs[0] == REG_A) - c -= 0.4f; - else if (OPTRALLOC_HL && byteregs[0] == REG_L) - c -= 0.1f; - else if (OPTRALLOC_IY && byteregs[0] == REG_IYL || - byteregs[0] == REG_IYH) - c += 0.1f; - } - // Spilt. - else { - c += OP_SYMBOL_CONST(o)->remat ? 1.5f : 4.0f; - while (++oi != oi_end) { - v = oi->second; - c += (a.local.find(v) == a.local.end() - ? 4.0f - : std::numeric_limits::infinity()); - } - } - } - } - - return (c); -} - -// Check that the operand is either fully in registers or fully in memory. -template -static bool operand_sane(const operand *o, const assignment &a, - unsigned short int i, const G_t &G, const I_t &I) { - if (!o || !IS_SYMOP(o)) - return (true); - - operand_map_t::const_iterator oi, oi_end; - boost::tie(oi, oi_end) = G[i].operands.equal_range(OP_SYMBOL_CONST(o)->key); - - if (oi == oi_end) - return (true); - - // In registers. - if (a.local.find(oi->second) != a.local.end()) { - while (++oi != oi_end) - if (a.local.find(oi->second) == a.local.end()) - return (false); - } else { - while (++oi != oi_end) - if (a.local.find(oi->second) != a.local.end()) - return (false); - } - - return (true); -} - -template -static float default_instruction_cost(const assignment &a, unsigned short int i, - const G_t &G, const I_t &I) { - float c = 0.0f; - - const iCode *ic = G[i].ic; - - c += default_operand_cost(IC_RESULT(ic), a, i, G, I); - c += default_operand_cost(IC_LEFT(ic), a, i, G, I); - c += default_operand_cost(IC_RIGHT(ic), a, i, G, I); - - return (c); -} - -template -static bool inst_sane(const assignment &a, unsigned short int i, const G_t &G, - const I_t &I) { - const iCode *ic = G[i].ic; - - // for a sequence of built-in SENDs, all of the SENDs must be sane - if (ic->op == SEND && ic->builtinSEND && ic->next->op == SEND && - !inst_sane(a, *(adjacent_vertices(i, G).first), G, I)) - return (false); - - return (operand_sane(IC_RESULT(ic), a, i, G, I) && - operand_sane(IC_LEFT(ic), a, i, G, I) && - operand_sane(IC_RIGHT(ic), a, i, G, I)); -} - -// Treat assignment separately to handle coalescing. -template -static float assign_cost(const assignment &a, unsigned short int i, - const G_t &G, const I_t &I) { - float c = 0.0f; - - const iCode *ic = G[i].ic; - - const operand *right = IC_RIGHT(ic); - const operand *result = IC_RESULT(ic); - - if (!right || !IS_SYMOP(right) || !result || !IS_SYMOP(result) || - POINTER_GET(ic) || POINTER_SET(ic)) - return (default_instruction_cost(a, i, G, I)); - - reg_t byteregs[4] = {-1, -1, -1, -1}; // Todo: Change this when sdcc supports - // variables larger than 4 bytes. - - operand_map_t::const_iterator oi, oi_end; - - int size1 = 0, size2 = 0; - - boost::tie(oi, oi_end) = - G[i].operands.equal_range(OP_SYMBOL_CONST(right)->key); - if (oi != oi_end) { - var_t v = oi->second; - - if (a.local.find(v) == a.local.end()) - return (default_instruction_cost(a, i, G, I)); - - c += 1.0f; - byteregs[I[v].byte] = a.global[v]; - size1 = 1; - - while (++oi != oi_end) { - v = oi->second; - c += (a.local.find(v) != a.local.end() - ? 1.0f - : std::numeric_limits::infinity()); - byteregs[I[v].byte] = a.global[v]; - size1++; - } - - // Code generator cannot handle variables only partially in A. - if (size1 > 1) - for (unsigned short int i = 0; i < size1; i++) - if (byteregs[i] == REG_A) - c += std::numeric_limits::infinity(); - - if (byteregs[0] == REG_A) - c -= 0.4f; - else if (OPTRALLOC_IY && byteregs[0] == REG_IYL || byteregs[0] == REG_IYH) - c += 0.1f; - } - - if (!size1) - return (default_instruction_cost(a, i, G, I)); - - boost::tie(oi, oi_end) = - G[i].operands.equal_range(OP_SYMBOL_CONST(result)->key); - if (oi != oi_end) { - var_t v = oi->second; - - if (a.local.find(v) == a.local.end()) - return (default_instruction_cost(a, i, G, I)); - - c += 1.0f; - if (byteregs[I[v].byte] == a.global[v]) - c -= 2.0f; - size2 = 1; - - while (++oi != oi_end) { - v = oi->second; - c += (a.local.find(v) != a.local.end() - ? 1.0f - : std::numeric_limits::infinity()); - if (byteregs[I[v].byte] == a.global[v]) - c -= 2.0f; - size2++; - } - - if (byteregs[0] == REG_A) - c -= 0.4f; - else if (OPTRALLOC_IY && byteregs[0] == REG_IYL || byteregs[0] == REG_IYH) - c += 0.1f; - } - - if (!size2) - return (default_instruction_cost(a, i, G, I)); - - return (c); -} - -template -static float return_cost(const assignment &a, unsigned short int i, - const G_t &G, const I_t &I) { - float c = 0.0f; - - const iCode *ic = G[i].ic; - - const operand *left = IC_LEFT(ic); - - if (!left || !IS_SYMOP(left)) - return (default_instruction_cost(a, i, G, I)); - - reg_t byteregs[4] = {-1, -1, -1, -1}; // Todo: Change this when sdcc supports - // variables larger than 4 bytes. - - operand_map_t::const_iterator oi, oi_end; - - int size = 0; - - boost::tie(oi, oi_end) = - G[i].operands.equal_range(OP_SYMBOL_CONST(left)->key); - if (oi != oi_end) { - var_t v = oi->second; - - if (a.local.find(v) == a.local.end()) - return (default_instruction_cost(a, i, G, I)); - - c += 1.0f; - byteregs[I[v].byte] = a.global[v]; - size = 1; - - while (++oi != oi_end) { - v = oi->second; - c += (a.local.find(v) != a.local.end() - ? 1.0f - : std::numeric_limits::infinity()); - byteregs[I[v].byte] = a.global[v]; - size++; - } - - if (byteregs[0] == REG_A) - c -= 0.4f; - - if (byteregs[0] == REG_L) - c -= 1.0f; - if (byteregs[1] == REG_H) - c -= 1.0f; - if (byteregs[2] == REG_E) - c -= 1.0f; - if (byteregs[3] == REG_D) - c -= 1.0f; - } - - return (c); -} - -template -static float call_cost(const assignment &a, unsigned short int i, const G_t &G, - const I_t &I) { - float c = 0.0f; - - const iCode *ic = G[i].ic; - - const operand *result = IC_RESULT(ic); - - if (!result || !IS_SYMOP(result)) - return (default_instruction_cost(a, i, G, I)); - - reg_t byteregs[4] = {-1, -1, -1, -1}; // Todo: Change this when sdcc supports - // variables larger than 4 bytes. - - operand_map_t::const_iterator oi, oi_end; - - int size = 0; - - boost::tie(oi, oi_end) = - G[i].operands.equal_range(OP_SYMBOL_CONST(result)->key); - if (oi != oi_end) { - var_t v = oi->second; - - if (a.local.find(v) == a.local.end()) - return (default_instruction_cost(a, i, G, I)); - - c += 1.0f; - byteregs[I[v].byte] = a.global[v]; - size = 1; - - while (++oi != oi_end) { - v = oi->second; - c += (a.local.find(v) != a.local.end() - ? 1.0f - : std::numeric_limits::infinity()); - byteregs[I[v].byte] = a.global[v]; - size++; - } - - // Code generator cannot handle variables only partially in A. - if (size > 1) - for (unsigned short int i = 0; i < size; i++) - if (byteregs[i] == REG_A) - c += std::numeric_limits::infinity(); - - if (byteregs[0] == REG_A) - c -= 0.4f; - - if (byteregs[0] == REG_L) - c -= 1.0f; - if (byteregs[1] == REG_H) - c -= 1.0f; - if (byteregs[2] == REG_E) - c -= 1.0f; - if (byteregs[3] == REG_D) - c -= 1.0f; - } - - return (c); -} - -template -static float ifx_cost(const assignment &a, unsigned short int i, const G_t &G, - const I_t &I) { - const iCode *ic = G[i].ic; - - return (default_operand_cost(IC_COND(ic), a, i, G, I)); -} - -template -static float jumptab_cost(const assignment &a, unsigned short int i, - const G_t &G, const I_t &I) { - const iCode *ic = G[i].ic; - - return (default_operand_cost(IC_JTCOND(ic), a, i, G, I)); -} - -template -static void add_operand_conflicts_in_node(const cfg_node &n, I_t &I) { - const iCode *ic = n.ic; - - const operand *result = IC_RESULT(ic); - const operand *left = IC_LEFT(ic); - const operand *right = IC_RIGHT(ic); - - if (!result || !IS_SYMOP(result)) - return; - - if (!(ic->op == '~' || ic->op == UNARYMINUS || ic->op == '+' || - ic->op == '-' || ic->op == '^' || ic->op == '|' || - ic->op == BITWISEAND)) - return; // Code generation can always handle all other operations. Todo: - // Handle ^, |, BITWISEAND and float UNARYMINUS there as well. - - operand_map_t::const_iterator oir, oir_end, oirs; - boost::tie(oir, oir_end) = - n.operands.equal_range(OP_SYMBOL_CONST(result)->key); - if (oir == oir_end) - return; - - operand_map_t::const_iterator oio, oio_end; - - if (left && IS_SYMOP(left)) - for (boost::tie(oio, oio_end) = - n.operands.equal_range(OP_SYMBOL_CONST(left)->key); - oio != oio_end; ++oio) - for (oirs = oir; oirs != oir_end; ++oirs) { - var_t rvar = oirs->second; - var_t ovar = oio->second; - if (I[rvar].byte < I[ovar].byte) - boost::add_edge(rvar, ovar, I); - } - - if (right && IS_SYMOP(right)) - for (boost::tie(oio, oio_end) = - n.operands.equal_range(OP_SYMBOL_CONST(right)->key); - oio != oio_end; ++oio) - for (oirs = oir; oirs != oir_end; ++oirs) { - var_t rvar = oirs->second; - var_t ovar = oio->second; - if (I[rvar].byte < I[ovar].byte) - boost::add_edge(rvar, ovar, I); - } -} - -// Return true, iff the operand is placed (partially) in r. -template -static bool operand_in_reg(const operand *o, reg_t r, const i_assignment_t &ia, - unsigned short int i, const G_t &G) { - if (!o || !IS_SYMOP(o)) - return (false); - - if (r >= port->num_regs) - return (false); - - operand_map_t::const_iterator oi, oi_end; - for (boost::tie(oi, oi_end) = - G[i].operands.equal_range(OP_SYMBOL_CONST(o)->key); - oi != oi_end; ++oi) - if (oi->second == ia.registers[r][1] || oi->second == ia.registers[r][0]) - return (true); - - return (false); -} - -// Return true, iff the operand is placed in a reg. -template -static bool operand_in_reg(const operand *o, const i_assignment_t &ia, - unsigned short int i, const G_t &G) { - if (!o || !IS_SYMOP(o)) - return (false); - - operand_map_t::const_iterator oi, oi_end; - for (boost::tie(oi, oi_end) = - G[i].operands.equal_range(OP_SYMBOL_CONST(o)->key); - oi != oi_end; ++oi) - for (reg_t r = 0; r < port->num_regs; r++) - if (oi->second == ia.registers[r][1] || oi->second == ia.registers[r][0]) - return (true); - - return (false); -} - -// Return true, iff the operand is placed on the stack. -template -bool operand_on_stack(const operand *o, const assignment &a, - unsigned short int i, const G_t &G) { - if (!o || !IS_SYMOP(o)) - return (false); - - if (OP_SYMBOL_CONST(o)->remat) - return (false); - - if (OP_SYMBOL_CONST(o)->_isparm && !IS_REGPARM(OP_SYMBOL_CONST(o)->etype)) - return (true); - - operand_map_t::const_iterator oi, oi_end; - for (boost::tie(oi, oi_end) = - G[i].operands.equal_range(OP_SYMBOL_CONST(o)->key); - oi != oi_end; ++oi) - if (a.global[oi->second] < 0) - return (true); - - return (false); -} - -template -static bool operand_is_pair(const operand *o, const assignment &a, - unsigned short int i, const G_t &G) { - if (!o || !IS_SYMOP(o)) - return (false); - - operand_map_t::const_iterator oi, oi2, oi3, oi_end; - boost::tie(oi, oi_end) = G[i].operands.equal_range(OP_SYMBOL_CONST(o)->key); - if (oi == oi_end) - return (false); - oi2 = oi; - ++oi2; - if (oi2 == oi_end) - return (false); - oi3 = oi2; - ++oi3; - if (oi3 != oi_end) - return (false); - - if (a.global[oi->second] % 2) - return (false); - if (a.global[oi->second] + 1 != a.global[oi2->second]) - return (false); - - return (true); -} - -template -static bool Ainst_ok(const assignment &a, unsigned short int i, const G_t &G, - const I_t &I) { - const iCode *ic = G[i].ic; - - const i_assignment_t &ia = a.i_assignment; - - const operand *const left = IC_LEFT(ic); - const operand *const right = IC_RIGHT(ic); - const operand *const result = IC_RESULT(ic); - - if (ia.registers[REG_A][1] < 0) - return (true); // Register A not in use. - - bool exstk = - (should_omit_frame_ptr || (currFunc && currFunc->stack > 127) || IS_GB); - - // std::cout << "Ainst_ok: A = (" << ia.registers[REG_A][0] << ", " << - // ia.registers[REG_A][1] << "), inst " << i << ", " << ic->key << "\n"; - - // Code generator cannot handle variables that are only partially in A. - if (I[ia.registers[REG_A][1]].size > 1 || - ia.registers[REG_A][0] >= 0 && I[ia.registers[REG_A][0]].size > 1) - return (false); - - // Check if the result of this instruction is placed in A. - bool result_in_A = operand_in_reg(IC_RESULT(ic), REG_A, ia, i, G); - - // Check if an input of this instruction is placed in A. - bool input_in_A; - switch (ic->op) { - case IFX: - input_in_A = operand_in_reg(IC_COND(ic), REG_A, ia, i, G); - break; - case JUMPTABLE: - input_in_A = operand_in_reg(IC_JTCOND(ic), REG_A, ia, i, G); - break; - default: - input_in_A = operand_in_reg(left, REG_A, ia, i, G) || - operand_in_reg(right, REG_A, ia, i, G); - break; - } - - // bit instructions do not disturb a. - if (ic->op == BITWISEAND && ifxForOp(IC_RESULT(ic), ic) && - (IS_OP_LITERAL(left) && (!(IS_GB && IS_TRUE_SYMOP(right) || - exstk && operand_on_stack(right, a, i, G)) || - operand_in_reg(right, ia, i, G) && - !operand_in_reg(right, REG_IYL, ia, i, G) && - !operand_in_reg(right, REG_IYH, ia, i, G)) || - IS_OP_LITERAL(right) && - (!(IS_GB && IS_TRUE_SYMOP(left) || - exstk && operand_on_stack(left, a, i, G)) || - operand_in_reg(left, ia, i, G) && - !operand_in_reg(left, REG_IYL, ia, i, G) && - !operand_in_reg(left, REG_IYH, ia, i, G)))) { - operand *const litop = IS_OP_LITERAL(left) ? IC_LEFT(ic) : IC_RIGHT(ic); - for (unsigned int i = 0; i < getSize(operandType(result)); i++) { - unsigned char byte = (ulFromVal(OP_VALUE(litop)) >> (i * 8) & 0xff); - if (byte != 0x00 && byte != 0x01 && byte != 0x02 && byte != 0x04 && - byte != 0x08 && byte != 0x10 && byte != 0x20 && byte != 0x40 && - byte != 0x80) - goto nobit; - } - return (true); - } -nobit: - - const std::set &dying = G[i].dying; - - if (ic->op == GET_VALUE_AT_ADDRESS) - return (result_in_A || !IS_BITVAR(getSpec(operandType(result)))); - if (ic->op == '=' && POINTER_SET(ic)) - return (dying.find(ia.registers[REG_A][1]) != dying.end() || - dying.find(ia.registers[REG_A][0]) != dying.end() || - !(IS_BITVAR(getSpec(operandType(result))) || - IS_BITVAR(getSpec(operandType(right))))); - - if (1) { - // Variable in A is not used by this instruction - if (ic->op == '+' && IS_ITEMP(IC_LEFT(ic)) && IS_ITEMP(IC_RESULT(ic)) && - IS_OP_LITERAL(right) && ulFromVal(OP_VALUE(IC_RIGHT(ic))) == 1 && - OP_KEY(IC_RESULT(ic)) == OP_KEY(IC_LEFT(ic))) - return (true); - - if ((ic->op == '=' || ic->op == CAST) && !POINTER_SET(ic) && - isOperandEqual(result, right)) - return (true); - - if ((ic->op == '=' || ic->op == CAST) && !POINTER_SET(ic) && - !(ic->op == CAST && IS_BOOL(operandType(result))) && - (operand_in_reg(right, REG_A, ia, i, G) || - operand_in_reg(right, REG_B, ia, i, G) || - operand_in_reg(right, REG_C, ia, i, G) || - operand_in_reg(right, REG_D, ia, i, G) || - operand_in_reg(right, REG_E, ia, i, G) || - operand_in_reg(right, REG_H, ia, i, G) || - operand_in_reg(right, REG_L, ia, i, G)) && - (operand_in_reg(right, REG_A, ia, i, G) || - operand_in_reg(result, REG_B, ia, i, G) || - operand_in_reg(result, REG_C, ia, i, G) || - operand_in_reg(result, REG_D, ia, i, G) || - operand_in_reg(result, REG_E, ia, i, G) || - operand_in_reg(right, REG_H, ia, i, G) || - operand_in_reg(right, REG_L, ia, i, G))) - return (true); - - if (ic->op == GOTO || ic->op == LABEL) - return (true); - - if (ic->op == IPUSH && getSize(operandType(IC_LEFT(ic))) <= 2 && - (operand_in_reg(left, REG_A, ia, i, G) || - operand_in_reg(left, REG_B, ia, i, G) && - (getSize(operandType(left)) < 2 || - operand_in_reg(left, REG_C, ia, i, G) && - I[ia.registers[REG_C][1]].byte == 0) || - operand_in_reg(left, REG_D, ia, i, G) && - (getSize(operandType(left)) < 2 || - operand_in_reg(left, REG_E, ia, i, G) && - I[ia.registers[REG_E][1]].byte == 0) || - operand_in_reg(left, REG_H, ia, i, G) && - (getSize(operandType(left)) < 2 || - operand_in_reg(left, REG_L, ia, i, G) && - I[ia.registers[REG_L][1]].byte == 0) || - operand_in_reg(left, REG_IYL, ia, i, G) && - I[ia.registers[REG_IYL][1]].byte == 0 && - (getSize(operandType(left)) < 2 || - operand_in_reg(left, REG_IYH, ia, i, G)))) - return (true); - if (!result_in_A && !input_in_A) - return (false); - } - - // Last use of operand in A. - if (input_in_A && - (result_in_A || dying.find(ia.registers[REG_A][1]) != dying.end() || - dying.find(ia.registers[REG_A][0]) != dying.end())) { - if (ic->op != IFX && ic->op != RETURN && - !((ic->op == RIGHT_OP || ic->op == LEFT_OP) && - (IS_OP_LITERAL(right) || operand_in_reg(right, REG_A, ia, i, G))) && - !((ic->op == '=' || ic->op == CAST) && - !(IY_RESERVED && POINTER_SET(ic))) && - !IS_BITWISE_OP(ic) && !(ic->op == '~') && - !(ic->op == '*' && - (IS_ITEMP(IC_LEFT(ic)) || IS_OP_LITERAL(IC_LEFT(ic))) && - (IS_ITEMP(IC_RIGHT(ic)) || IS_OP_LITERAL(IC_RIGHT(ic)))) && - !((ic->op == '-' || ic->op == '+' || ic->op == EQ_OP) && - IS_OP_LITERAL(IC_RIGHT(ic)))) { - // std::cout << "Last use: Dropping at " << i << ", " << ic->key << "(" << - // int(ic->op) << ")\n"; - return (false); - } - } - // A is used, and has to be preserved for later use. - else if (input_in_A && ic->op != IFX && ic->op != JUMPTABLE) { - // std::cout << "Intermediate use: Dropping at " << i << ", " << ic->key << - // "(" << int(ic->op) << "\n"; - return (false); - } - - // First use of operand in A. - if (result_in_A && !POINTER_GET(ic) && ic->op != '+' && ic->op != '-' && - (ic->op != '*' || !IS_OP_LITERAL(IC_LEFT(ic)) && !IS_OP_LITERAL(right)) && - !IS_BITWISE_OP(ic) && ic->op != GET_VALUE_AT_ADDRESS && ic->op != '=' && - ic->op != EQ_OP && ic->op != '<' && ic->op != '>' && ic->op != CAST && - ic->op != CALL && ic->op != PCALL && ic->op != GETHBIT && - !((ic->op == LEFT_OP || ic->op == RIGHT_OP) && IS_OP_LITERAL(right))) { - // std::cout << "First use: Dropping at " << i << ", " << ic->key << "(" << - // int(ic->op) << "\n"; - return (false); - } - - // std::cout << "Default OK\n"; - - return (true); -} - -template -static bool HLinst_ok(const assignment &a, unsigned short int i, const G_t &G, - const I_t &I) { - const iCode *ic = G[i].ic; - - bool exstk = - (should_omit_frame_ptr || (currFunc && currFunc->stack > 127) || IS_GB); - - const i_assignment_t &ia = a.i_assignment; - - bool unused_L = (ia.registers[REG_L][1] < 0); - bool unused_H = (ia.registers[REG_H][1] < 0); - - if (unused_L && unused_H) - return (true); // Register HL not in use. - -#if 0 - if (ic->key == 3) - std::cout << "HLinst_ok: at (" << i << ", " << ic->key << ")\nL = (" << ia.registers[REG_L][0] << ", " << ia.registers[REG_L][1] << "), H = (" << ia.registers[REG_H][0] << ", " << ia.registers[REG_H][1] << ")inst " << i << ", " << ic->key << "\n"; -#endif - - const operand *left = IC_LEFT(ic); - const operand *right = IC_RIGHT(ic); - const operand *result = IC_RESULT(ic); - - bool result_in_L = operand_in_reg(result, REG_L, ia, i, G); - bool result_in_H = operand_in_reg(result, REG_H, ia, i, G); - bool result_in_HL = result_in_L || result_in_H; - - bool input_in_L, input_in_H; - switch (ic->op) { - case IFX: - input_in_L = operand_in_reg(IC_COND(ic), REG_L, ia, i, G); - input_in_H = operand_in_reg(IC_COND(ic), REG_L, ia, i, G); - break; - case JUMPTABLE: - input_in_L = operand_in_reg(IC_JTCOND(ic), REG_L, ia, i, G); - input_in_H = operand_in_reg(IC_JTCOND(ic), REG_L, ia, i, G); - break; - default: - input_in_L = operand_in_reg(left, REG_L, ia, i, G) || - operand_in_reg(right, REG_L, ia, i, G); - input_in_H = operand_in_reg(left, REG_H, ia, i, G) || - operand_in_reg(right, REG_H, ia, i, G); - break; - } - bool input_in_HL = input_in_L || input_in_H; - - const std::set &dying = G[i].dying; - - bool dying_L = result_in_L || - dying.find(ia.registers[REG_L][1]) != dying.end() || - dying.find(ia.registers[REG_L][0]) != dying.end(); - bool dying_H = result_in_H || - dying.find(ia.registers[REG_H][1]) != dying.end() || - dying.find(ia.registers[REG_H][0]) != dying.end(); - - bool result_only_HL = (result_in_L || unused_L || dying_L) && - (result_in_H || unused_H || dying_H); - -#if 0 - if (ic->key == 4) - { - std::cout << "Result in L: " << result_in_L << ", result in H: " << result_in_H << "\n"; - std::cout << "Unsued L: " << unused_L << ", unused H: " << unused_H << "\n"; - std::cout << "Dying L: " << dying_L << ", dying H: " << dying_H << "\n"; - std::cout << "Result only HL: " << result_only_HL << "\n"; - } -#endif - - if (ic->op == RETURN || ic->op == SEND) - return (true); - - if ((IS_GB || IY_RESERVED) && (IS_TRUE_SYMOP(left) || IS_TRUE_SYMOP(right))) - return (false); - - if ((IS_GB || IY_RESERVED) && IS_TRUE_SYMOP(result) && - getSize(operandType(IC_RESULT(ic))) > 2) - return (false); - - if (result_only_HL && ic->op == PCALL) - return (true); - - if (exstk && - (operand_on_stack(result, a, i, G) + operand_on_stack(left, a, i, G) + - operand_on_stack(right, a, i, G) >= - 2) && - (result && IS_SYMOP(result) && getSize(operandType(result)) >= 2 || - !result_only_HL)) // Todo: Make this more accurate to get better code - // when using --fomit-frame-pointer - return (false); - if (exstk && - (operand_on_stack(left, a, i, G) || operand_on_stack(right, a, i, G)) && - (ic->op == '>' || ic->op == '<')) - return (false); - - if (ic->op == '+' && getSize(operandType(result)) == 2 && - (IS_OP_LITERAL(right) && ulFromVal(OP_VALUE(IC_RIGHT(ic))) <= 3 || - IS_OP_LITERAL(left) && ulFromVal(OP_VALUE(IC_LEFT(ic))) <= 3) && - (operand_in_reg(result, REG_L, ia, i, G) && - I[ia.registers[REG_L][1]].byte == 0 && - operand_in_reg(result, REG_H, ia, i, G))) - return (true); // Uses inc hl. - - if (ic->op == '+' && getSize(operandType(result)) == 2 && - !IS_TRUE_SYMOP(result) && - (result_only_HL || operand_in_reg(result, REG_IYL, ia, i, G) && - operand_in_reg(result, REG_IYH, ia, i, G)) && - (ia.registers[REG_C][1] < 0 && ia.registers[REG_B][1] < 0 || - ia.registers[REG_E][1] < 0 && - ia.registers[REG_D][1] < 0)) // Can use ld rr, (nn) instead of (hl). - return (true); - - if (ic->op == '+' && getSize(operandType(result)) >= 2 && - (IS_TRUE_SYMOP(result) && !operand_on_stack(result, a, i, G) || - (operand_on_stack(left, a, i, G) ? exstk : IS_TRUE_SYMOP(left)) || - (operand_on_stack(right, a, i, G) - ? exstk - : IS_TRUE_SYMOP(right)))) // Might use (hl). - return (false); - - if (ic->op == '+' && input_in_HL && - (operand_on_stack(result, a, i, G) - ? exstk - : IS_TRUE_SYMOP(result))) // Might use (hl) for result. - return (false); - - // HL overwritten by result. - if (result_only_HL && !POINTER_SET(ic) && - (ic->op == ADDRESS_OF || ic->op == GET_VALUE_AT_ADDRESS || - ic->op == '+' || ic->op == '*' || ic->op == '=' || ic->op == CAST)) - return (true); - - if (!exstk && !isOperandInDirSpace(IC_LEFT(ic)) && - !isOperandInDirSpace(IC_RIGHT(ic)) && - !isOperandInDirSpace(IC_RESULT(ic)) && - (ic->op == '-' || ic->op == '<' || ic->op == '>')) - return (true); - - if (ic->op == LEFT_OP && getSize(operandType(result)) <= 2 && - IS_OP_LITERAL(right) && result_only_HL) - return (true); - if ((ic->op == LEFT_OP || ic->op == RIGHT_OP) && - (!exstk || - ((!operand_on_stack(left, a, i, G) || !input_in_HL && result_only_HL) && - (!operand_on_stack(right, a, i, G) || !input_in_HL && result_only_HL) && - !operand_on_stack(result, a, i, G)))) - return (true); - - if (result && IS_SYMOP(result) && isOperandInDirSpace(IC_RESULT(ic))) - return (false); - - if ((input_in_HL || !result_only_HL) && left && IS_SYMOP(left) && - isOperandInDirSpace(IC_LEFT(ic))) - return (false); - - if ((input_in_HL || !result_only_HL) && right && IS_SYMOP(right) && - isOperandInDirSpace(IC_RIGHT(ic))) - return (false); - - // Operations that leave HL alone. - if (ic->op == IFX) - return (true); - if (SKIP_IC2(ic)) - return (true); - if (ic->op == IPUSH && input_in_H && - (getSize(operandType(IC_LEFT(ic))) <= 2 || - ia.registers[REG_L][1] > 0 && I[ia.registers[REG_L][1]].byte == 2 && - ia.registers[REG_H][1] > 0 && I[ia.registers[REG_H][1]].byte == 3)) - return (true); - if (ic->op == IPUSH && ic->next && ic->next->op == CALL) - return (true); - if (ic->op == IPUSH && getSize(operandType(left)) == 2 && - (ia.registers[REG_C][1] < 0 && ia.registers[REG_B][1] < 0 || - ia.registers[REG_E][1] < 0 && - ia.registers[REG_D][1] < 0)) // Can use pair other than HL. - return (true); - if (ic->op == IPUSH && getSize(operandType(left)) <= 2 && - (operand_in_reg(left, REG_C, ia, i, G) && - I[ia.registers[REG_C][1]].byte == 0 && - (getSize(operandType(left)) < 2 || - operand_in_reg(left, REG_B, ia, i, G)) || - operand_in_reg(left, REG_E, ia, i, G) && - I[ia.registers[REG_E][1]].byte == 0 && - (getSize(operandType(left)) < 2 || - operand_in_reg(left, REG_D, ia, i, G)) || - operand_in_reg(left, REG_IYL, ia, i, G) && - I[ia.registers[REG_IYL][1]].byte == 0 && - (getSize(operandType(left)) < 2 || - operand_in_reg(left, REG_IYH, ia, i, G)))) - return (true); - if (POINTER_GET(ic) && input_in_L && input_in_H && - (getSize(operandType(IC_RESULT(ic))) == 1 || !result_in_HL)) - return (true); - if (!IS_GB && ic->op == ADDRESS_OF && - (operand_in_reg(result, REG_IYL, ia, i, G) && - ia.registers[REG_IYL][1] > 0 && - I[ia.registers[REG_IYL][1]].byte == 0 && - operand_in_reg(result, REG_IYH, ia, i, G) || - !OP_SYMBOL_CONST(left)->onStack && - operand_in_reg(result, REG_C, ia, i, G) && - ia.registers[REG_C][1] > 0 && I[ia.registers[REG_C][1]].byte == 0 && - operand_in_reg(result, REG_B, ia, i, G) || - !OP_SYMBOL_CONST(left)->onStack && - operand_in_reg(result, REG_E, ia, i, G) && - ia.registers[REG_E][1] > 0 && I[ia.registers[REG_E][1]].byte == 0 && - operand_in_reg(result, REG_D, ia, i, G))) - return (true); - - if (ic->op == LEFT_OP && isOperandLiteral(IC_RIGHT(ic))) - return (true); - - if (exstk && !result_only_HL && - (operand_on_stack(left, a, i, G) || operand_on_stack(right, a, i, G) || - operand_on_stack(result, a, i, G)) && - ic->op == '+') - return (false); - - if ((!POINTER_SET(ic) && !POINTER_GET(ic) && - ((ic->op == '=' || ic->op == CAST || ic->op == UNARYMINUS || - ic->op == RIGHT_OP || - /*ic->op == '-' ||*/ - IS_BITWISE_OP(ic) || - /*ic->op == '>' || - ic->op == '<' || - ic->op == EQ_OP ||*/ - (ic->op == '+' && getSize(operandType(IC_RESULT(ic))) == 1) || - (ic->op == '+' && getSize(operandType(IC_RESULT(ic))) <= 2 && - (result_only_HL || !IS_GB)))))) // 16 bit addition on gbz80 might need - // to use add hl, rr. - return (true); - - if ((ic->op == '<' || ic->op == '>') && - (IS_ITEMP(left) || IS_OP_LITERAL(left) || IS_ITEMP(right) || - IS_OP_LITERAL(right))) // Todo: Fix for large stack. - return (true); - - if (ic->op == EQ_OP && IS_VALOP(right)) - return (true); - - if (ic->op == CALL) - return (true); - - // HL overwritten by result. - if (result_only_HL && ic->op == PCALL) - return (true); - - if (POINTER_GET(ic) && getSize(operandType(IC_RESULT(ic))) == 1 && - !IS_BITVAR(getSpec(operandType(result))) && - (operand_in_reg(right, REG_C, ia, i, G) && - I[ia.registers[REG_C][1]].byte == 0 && - operand_in_reg(right, REG_B, ia, i, G) || // Uses ld a, (bc) - operand_in_reg(right, REG_E, ia, i, G) && - I[ia.registers[REG_E][1]].byte == 0 && - operand_in_reg(right, REG_D, ia, i, G) || // Uses ld a, (de) - operand_in_reg(right, REG_IYL, ia, i, G) && - I[ia.registers[REG_IYL][1]].byte == 0 && - operand_in_reg(right, REG_IYH, ia, i, G))) // Uses ld r, 0 (iy) - return (true); - - if ((ic->op == '=') && POINTER_SET(ic) && - operand_in_reg(result, REG_IYL, ia, i, G) && - I[ia.registers[REG_IYL][1]].byte == 0 && - operand_in_reg(result, REG_IYH, ia, i, G)) // Uses ld 0 (iy), l etc - return (true); - - if ((ic->op == '=' || ic->op == CAST) && POINTER_SET(ic) && - !result_only_HL) // loads result pointer into (hl) first. - return (false); - - if ((ic->op == '=' || ic->op == CAST) && !POINTER_GET(ic) && !input_in_HL) - return (true); - -#if 0 - if(ic->key == 4) - { - std::cout << "HLinst_ok: L = (" << ia.registers[REG_L][0] << ", " << ia.registers[REG_L][1] << "), H = (" << ia.registers[REG_H][0] << ", " << ia.registers[REG_H][1] << ")inst " << i << ", " << ic->key << "\n"; - std::cout << "Result in L: " << result_in_L << ", result in H: " << result_in_H << "\n"; - std::cout << "HL default drop at " << ic->key << ", operation: " << ic->op << "\n"; - } -#endif - - return (false); -} - -template -static bool IYinst_ok(const assignment &a, unsigned short int i, const G_t &G, - const I_t &I) { - const iCode *ic = G[i].ic; - - const i_assignment_t &ia = a.i_assignment; - - /*if(ic->key == 40) - std::cout << "1IYinst_ok: at (" << i << ", " << ic->key << - ")\nIYL = (" << ia.registers[REG_IYL][0] << ", " << - ia.registers[REG_IYL][1] << "), IYH = (" << ia.registers[REG_IYH][0] << ", - " << ia.registers[REG_IYH][1] << ")inst " << i << ", " << ic->key << - "\n";*/ - - bool exstk = (should_omit_frame_ptr || (currFunc && currFunc->stack > 127)); - - bool unused_IYL = (ia.registers[REG_IYL][1] < 0); - bool unused_IYH = (ia.registers[REG_IYH][1] < 0); - - const operand *left = IC_LEFT(ic); - const operand *right = IC_RIGHT(ic); - const operand *result = IC_RESULT(ic); - - bool result_in_IYL = operand_in_reg(result, REG_IYL, ia, i, G); - bool result_in_IYH = operand_in_reg(result, REG_IYH, ia, i, G); - bool result_in_IY = result_in_IYL || result_in_IYH; - - bool input_in_IYL, input_in_IYH; - switch (ic->op) { - case IFX: - input_in_IYL = operand_in_reg(IC_COND(ic), REG_IYL, ia, i, G); - input_in_IYH = operand_in_reg(IC_COND(ic), REG_IYL, ia, i, G); - break; - case JUMPTABLE: - input_in_IYL = operand_in_reg(IC_JTCOND(ic), REG_IYL, ia, i, G); - input_in_IYH = operand_in_reg(IC_JTCOND(ic), REG_IYL, ia, i, G); - break; - default: - input_in_IYL = operand_in_reg(left, REG_IYL, ia, i, G) || - operand_in_reg(right, REG_IYL, ia, i, G); - input_in_IYH = operand_in_reg(left, REG_IYH, ia, i, G) || - operand_in_reg(right, REG_IYH, ia, i, G); - break; - } - bool input_in_IY = input_in_IYL || input_in_IYH; - - // const std::set &dying = G[i].dying; - - // bool dying_IYL = result_in_IYL || dying.find(ia.registers[REG_IYL][1]) != - // dying.end() || dying.find(ia.registers[REG_IYL][0]) != dying.end(); bool - // dying_IYH = result_in_IYH || dying.find(ia.registers[REG_IYH][1]) != - // dying.end() || dying.find(ia.registers[REG_IYH][0]) != dying.end(); - - // bool result_only_IY = (result_in_IYL || unused_IYL || dying_IYL) && - // (result_in_IYH || unused_IYH || dying_IYH); - - if (unused_IYL && unused_IYH) - return (true); // Register IY not in use. - - if (exstk && - (operand_on_stack(result, a, i, G) || operand_on_stack(left, a, i, G) || - operand_on_stack(right, a, i, - G))) // Todo: Make this more accurate to get better code - // when using --fomit-frame-pointer - return (false); - - // Code generator cannot handle variables that are only partially in IY. - if (unused_IYL ^ unused_IYH) - return (false); - if (!unused_IYL && I[ia.registers[REG_IYL][1]].size != 2 || - !unused_IYH && I[ia.registers[REG_IYH][1]].size != 2 || - ia.registers[REG_IYL][0] >= 0 && I[ia.registers[REG_IYL][0]].size != 2 || - ia.registers[REG_IYH][0] >= 0 && I[ia.registers[REG_IYH][0]].size != 2) - return (false); - if (ia.registers[REG_IYL][1] >= 0 && - (ia.registers[REG_IYH][1] <= 0 || - I[ia.registers[REG_IYL][1]].v != I[ia.registers[REG_IYH][1]].v)) - return (false); - if (ia.registers[REG_IYH][1] >= 0 && - (ia.registers[REG_IYL][1] <= 0 || - I[ia.registers[REG_IYH][1]].v != I[ia.registers[REG_IYL][1]].v)) - return (false); - if (ia.registers[REG_IYL][0] >= 0 && - (ia.registers[REG_IYH][0] <= 0 || - I[ia.registers[REG_IYL][0]].v != I[ia.registers[REG_IYH][0]].v)) - return (false); - if (ia.registers[REG_IYH][0] >= 0 && - (ia.registers[REG_IYL][0] <= 0 || - I[ia.registers[REG_IYH][0]].v != I[ia.registers[REG_IYL][0]].v)) - return (false); - if (I[ia.registers[REG_IYL][1]].byte != 0 || - I[ia.registers[REG_IYH][1]].byte != 1) - return (false); - if (ia.registers[REG_IYL][0] >= 0 && I[ia.registers[REG_IYL][0]].byte != 0 || - ia.registers[REG_IYH][0] >= 0 && I[ia.registers[REG_IYH][0]].byte != 1) - return (false); - -#if 0 - if(ic->key == 99) - { - std::cout << "IYinst_ok: Assignment: "; - //print_assignment(a); - std::cout << "\n"; - std::cout << "2IYinst_ok: at (" << i << ", " << ic->key << ")\nIYL = (" << ia.registers[REG_IYL][0] << ", " << ia.registers[REG_IYL][1] << "), IYH = (" << ia.registers[REG_IYH][0] << ", " << ia.registers[REG_IYH][1] << ")inst " << i << ", " << ic->key << "\n"; - } -#endif - - if (result_in_IY && - (ic->op == '=' || - ic->op == CAST && getSize(operandType(IC_RESULT(ic))) <= - getSize(operandType(IC_RIGHT(ic))) || - ic->op == '+')) // todo: More instructions that can write iy. - return (true); - - // Todo: Multiplication. - - if (ic->op == LEFT_OP && result_in_IY && input_in_IY && - IS_VALOP(IC_RIGHT(ic)) && operandLitValue(IC_RIGHT(ic)) < 8) - return (true); - - if (ic->op == '-' && result_in_IY && input_in_IY && IS_VALOP(IC_RIGHT(ic)) && - operandLitValue(IC_RIGHT(ic)) < 4) - return (true); - -#if 0 - if(ic->key == 99) - { - std::cout << "IYinst_ok: Assignment: "; - //print_assignment(a); - std::cout << "\n"; - std::cout << "2IYinst_ok: at (" << i << ", " << ic->key << ")\nIYL = (" << ia.registers[REG_IYL][0] << ", " << ia.registers[REG_IYL][1] << "), IYH = (" << ia.registers[REG_IYH][0] << ", " << ia.registers[REG_IYH][1] << ")inst " << i << ", " << ic->key << "\n"; - } -#endif - - if (SKIP_IC2(ic)) - return (true); - - if (!result_in_IY && !input_in_IY && - !(IC_RESULT(ic) && isOperandInDirSpace(IC_RESULT(ic))) && - !(IC_RIGHT(ic) && IS_TRUE_SYMOP(IC_RIGHT(ic))) && - !(IC_LEFT(ic) && IS_TRUE_SYMOP(IC_LEFT(ic)))) - return (true); - - if (!result_in_IY && !input_in_IY && - (ic->op == '=' || ic->op == CAST && - getSize(operandType(IC_RIGHT(ic))) >= 2 && - (getSize(operandType(IC_RESULT(ic))) <= - getSize(operandType(IC_RIGHT(ic))) || - !IS_SPEC(operandType(IC_RIGHT(ic))) || - SPEC_USIGN(operandType(IC_RIGHT(ic))))) && - operand_is_pair(IC_RESULT(ic), a, i, - G)) // DirSpace access won't use iy here. - return (true); - - if (ic->op == IPUSH) // todo: More instructions that can use IY. - return (true); - - if (ic->op == GET_VALUE_AT_ADDRESS && isOperandInDirSpace(IC_RESULT(ic))) - return (false); - - if (input_in_IY && !result_in_IY && - (ic->op == '=' && !POINTER_SET(ic) || - ic->op == CAST && getSize(operandType(IC_RESULT(ic))) <= - getSize(operandType(IC_RIGHT(ic))) || - ic->op == GET_VALUE_AT_ADDRESS)) - return (true); - -#if 0 - if(ic->key == 99) - { - std::cout << "Default drop.\n"; - std::cout << "result is pair: " << operand_is_pair(IC_RESULT(ic), a, i, G) << "\n"; - } -#endif - - return (false); -} - -template -bool DEinst_ok(const assignment &a, unsigned short int i, const G_t &G, - const I_t &I) { - if (!IS_GB) // Only gbz80 might need de for code generation. - return (true); - - const i_assignment_t &ia = a.i_assignment; - - bool unused_E = (ia.registers[REG_E][1] < 0); - bool unused_D = (ia.registers[REG_D][1] < 0); - - if (unused_E && unused_D) - return (true); // Register DE not in use. - - const iCode *ic = G[i].ic; - const operand *left = IC_LEFT(ic); - const operand *right = IC_RIGHT(ic); - const operand *result = IC_RESULT(ic); - - // const std::set &dying = G[i].dying; - - if (ic->op == PCALL) - return (false); - - if (ic->op == GET_VALUE_AT_ADDRESS && - (getSize(operandType(result)) >= 2 || !operand_is_pair(left, a, i, G))) - return (false); - - if (ic->op == '=' && POINTER_SET(ic) && !operand_is_pair(result, a, i, G)) - return (false); - - if ((ic->op == '=' || ic->op == CAST) && getSize(operandType(result)) >= 2 && - (operand_on_stack(right, a, i, G) || - operand_in_reg(right, REG_L, ia, i, G) || - operand_in_reg(right, REG_H, ia, i, G)) && - (operand_on_stack(result, a, i, G) || - operand_in_reg(result, REG_L, ia, i, G) || - operand_in_reg(result, REG_H, ia, i, G))) - return (false); - - if (ic->op == '+' && getSize(operandType(result)) >= 2) - return (false); - - if (ic->op == UNARYMINUS || ic->op == '-' || ic->op == '*') - return (false); - - if (ic->op == '>' || ic->op == '<') - return (false); - - return (true); -} - -template -static void set_surviving_regs(const assignment &a, unsigned short int i, - const G_t &G, const I_t &I) { - iCode *ic = G[i].ic; - - ic->rMask = newBitVect(port->num_regs); - ic->rSurv = newBitVect(port->num_regs); - - std::set::const_iterator v, v_end; - for (v = G[i].alive.begin(), v_end = G[i].alive.end(); v != v_end; ++v) { - if (a.global[*v] < 0) - continue; - ic->rMask = bitVectSetBit(ic->rMask, a.global[*v]); - if (G[i].dying.find(*v) == G[i].dying.end()) - if (!((IC_RESULT(ic) && !POINTER_SET(ic)) && IS_SYMOP(IC_RESULT(ic)) && - OP_SYMBOL_CONST(IC_RESULT(ic))->key == I[*v].v)) - ic->rSurv = bitVectSetBit(ic->rSurv, a.global[*v]); - } -} - -template -static void unset_surviving_regs(unsigned short int i, const G_t &G) { - iCode *ic = G[i].ic; - - freeBitVect(ic->rSurv); - freeBitVect(ic->rMask); -} - -template -static void assign_operand_for_cost(operand *o, const assignment &a, - unsigned short int i, const G_t &G, - const I_t &I) { - if (!o || !IS_SYMOP(o)) - return; - symbol *sym = OP_SYMBOL(o); - operand_map_t::const_iterator oi, oi_end; - for (boost::tie(oi, oi_end) = - G[i].operands.equal_range(OP_SYMBOL_CONST(o)->key); - oi != oi_end; ++oi) { - var_t v = oi->second; - if (a.global[v] >= 0) { - if (a.global[v] != REG_A && - (a.global[v] != REG_IYL && a.global[v] != REG_IYH || !OPTRALLOC_IY)) { - sym->regs[I[v].byte] = regsZ80 + a.global[v]; - sym->accuse = 0; - sym->isspilt = false; - sym->nRegs = I[v].size; - } else if (a.global[v] == REG_A) { - sym->accuse = ACCUSE_A; - sym->isspilt = false; - sym->nRegs = 0; - sym->regs[I[v].byte] = 0; - } else { - sym->accuse = ACCUSE_IY; - sym->isspilt = false; - sym->nRegs = 0; - sym->regs[I[v].byte] = 0; - } - } else { - sym->isspilt = true; - sym->accuse = 0; - sym->nRegs = I[v].size; - sym->regs[I[v].byte] = 0; - } - } -} - -template -static void assign_operands_for_cost(const assignment &a, unsigned short int i, - const G_t &G, const I_t &I) { - const iCode *ic = G[i].ic; - - if (ic->op == IFX) - assign_operand_for_cost(IC_COND(ic), a, i, G, I); - else if (ic->op == JUMPTABLE) - assign_operand_for_cost(IC_JTCOND(ic), a, i, G, I); - else { - assign_operand_for_cost(IC_LEFT(ic), a, i, G, I); - assign_operand_for_cost(IC_RIGHT(ic), a, i, G, I); - assign_operand_for_cost(IC_RESULT(ic), a, i, G, I); - } - - if (ic->op == SEND && ic->builtinSEND) - assign_operands_for_cost(a, *(adjacent_vertices(i, G).first), G, I); -} - -// Cost function. -template -static float instruction_cost(const assignment &a, unsigned short int i, - const G_t &G, const I_t &I) { - iCode *ic = G[i].ic; - float c; - - if (!inst_sane(a, i, G, I)) - return (std::numeric_limits::infinity()); - - if (ic->generated) - return (0.0f); - - if (!Ainst_ok(a, i, G, I)) - return (std::numeric_limits::infinity()); - - if (OPTRALLOC_HL && !HLinst_ok(a, i, G, I)) - return (std::numeric_limits::infinity()); - - if (!DEinst_ok(a, i, G, I)) - return (std::numeric_limits::infinity()); - - if (OPTRALLOC_IY && !IYinst_ok(a, i, G, I)) - return (std::numeric_limits::infinity()); - - switch (ic->op) { - // Register assignment doesn't matter for these: - case FUNCTION: - case ENDFUNCTION: - case LABEL: - case GOTO: - case INLINEASM: - return (0.0f); - // Exact cost: - case '!': - case '~': - case UNARYMINUS: - case '+': - case '-': - case '^': - case '|': - case BITWISEAND: - case IPUSH: - // case IPOP: - case CALL: - case PCALL: - case RETURN: - case '*': - case '>': - case '<': - case EQ_OP: - case AND_OP: - case OR_OP: - case GETHBIT: - case LEFT_OP: - case RIGHT_OP: - case GET_VALUE_AT_ADDRESS: - case '=': - case IFX: - case ADDRESS_OF: - case JUMPTABLE: - case CAST: - // case RECEIVE: - case SEND: - case DUMMY_READ_VOLATILE: - case CRITICAL: - case ENDCRITICAL: - assign_operands_for_cost(a, i, G, I); - set_surviving_regs(a, i, G, I); - c = dryZ80iCode(ic); - unset_surviving_regs(i, G); - ic->generated = false; - return (c); - // Inexact cost: - default: - return (default_instruction_cost(a, i, G, I)); - } -} - -template float weird_byte_order(const assignment &a, const I_t &I) { - float c = 0.0f; - - varset_t::const_iterator vi, vi_end; - for (vi = a.local.begin(), vi_end = a.local.end(); vi != vi_end; ++vi) - if (a.global[*vi] % 2 != I[*vi].byte % 2) - c += 8.0f; - - return (c); -} - -// Check for gaps, i.e. higher bytes of a variable being assigned to regs, while -// lower byte are not. -template -bool local_assignment_insane(const assignment &a, const I_t &I, var_t lastvar) { - varset_t::const_iterator v, v_end, v_old; - - for (v = a.local.begin(), v_end = a.local.end(); v != v_end;) { - v_old = v; - ++v; - if (v == v_end) { - if (*v_old != lastvar && I[*v_old].byte != I[*v_old].size - 1) - return (true); - break; - } - if (I[*v_old].v == I[*v].v) { - if (I[*v_old].byte != I[*v].byte - 1) - return (true); - } else { - if (*v_old != lastvar && I[*v_old].byte != I[*v_old].size - 1 || - I[*v].byte) - return (true); - } - } - - return (false); -} - -// For early removal of assignments that cannot be extended to valid -// assignments. -template -static bool assignment_hopeless(const assignment &a, unsigned short int i, - const G_t &G, const I_t &I, - const var_t lastvar) { - // Can check for Ainst_ok() since A only contains 1-byte variables. - if (!G[i].ic->generated && !Ainst_ok(a, i, G, I)) - return (true); - - if (local_assignment_insane(a, I, lastvar)) - return (true); - - const i_assignment_t &ia = a.i_assignment; - - // Code generator cannot handle variables that are only partially in IY. - if (OPTRALLOC_IY && (ia.registers[REG_IYL][1] >= 0 && - (I[ia.registers[REG_IYL][1]].size != 2 || - I[ia.registers[REG_IYL][1]].byte != 0) || - ia.registers[REG_IYH][1] >= 0 && - (I[ia.registers[REG_IYH][1]].size != 2 || - I[ia.registers[REG_IYH][1]].byte != 1) || - ia.registers[REG_IYL][0] >= 0 && - (I[ia.registers[REG_IYL][0]].size != 2 || - I[ia.registers[REG_IYL][0]].byte != 0) || - ia.registers[REG_IYH][0] >= 0 && - (I[ia.registers[REG_IYH][0]].size != 2 || - I[ia.registers[REG_IYH][0]].byte != 1))) - return (true); - - // Can only check for HLinst_ok() in some cases. - if (OPTRALLOC_HL && - (ia.registers[REG_L][1] >= 0 && ia.registers[REG_H][1] >= 0) && - (ia.registers[REG_L][0] >= 0 && ia.registers[REG_H][0] >= 0) && - !HLinst_ok(a, i, G, I)) - return (true); - - // Can only check for IYinst_ok() in some cases. - if (OPTRALLOC_IY && - (ia.registers[REG_IYL][1] >= 0 && ia.registers[REG_IYH][1] >= 0) && - !((ia.registers[REG_IYL][0] >= 0) ^ (ia.registers[REG_IYH][0] >= 0)) && - !IYinst_ok(a, i, G, I)) - return (true); - - return (false); -} - -// Increase chance of finding good compatible assignments at join nodes. -template -static void get_best_local_assignment_biased( - assignment &a, typename boost::graph_traits::vertex_descriptor t, - const T_t &T) { - const assignment_list_t &alist = T[t].assignments; - - assignment_list_t::const_iterator ai, ai_end, ai_best; - for (ai = ai_best = alist.begin(), ai_end = alist.end(); ai != ai_end; ++ai) { - if (ai->s < ai_best->s) { - varset_t::const_iterator vi, vi_end; - for (vi = ai->local.begin(), vi_end = ai->local.end(); vi != vi_end; ++vi) - if (ai->global[*vi] == REG_A || - OPTRALLOC_HL && - (ai->global[*vi] == REG_H || ai->global[*vi] == REG_L) || - OPTRALLOC_IY && - (ai->global[*vi] == REG_IYH || ai->global[*vi] == REG_IYL)) - goto too_risky; - ai_best = ai; - } - too_risky:; - } - - a = *ai_best; - - std::set::const_iterator vi, vi_end; - for (vi = T[t].alive.begin(), vi_end = T[t].alive.end(); vi != vi_end; ++vi) - a.local.insert(*vi); -} - -template -static float rough_cost_estimate(const assignment &a, unsigned short int i, - const G_t &G, const I_t &I) { - const i_assignment_t &ia = a.i_assignment; - float c = 0.0f; - - c += weird_byte_order(a, I); - - if (OPTRALLOC_HL && - (ia.registers[REG_L][1] >= 0 && ia.registers[REG_H][1] >= 0) && - !((ia.registers[REG_L][0] >= 0) ^ (ia.registers[REG_H][0] >= 0)) && - !HLinst_ok(a, i, G, I)) - c += 8.0f; - - if (ia.registers[REG_A][1] < 0) - c += 0.03f; - - if (OPTRALLOC_HL && ia.registers[REG_L][1] < 0) - c += 0.02f; - - // Using IY is rarely a good choice, so discard the IY-users first when in - // doubt. - if (OPTRALLOC_IY) { - varset_t::const_iterator vi, vi_end; - for (vi = a.local.begin(), vi_end = a.local.end(); vi != vi_end; ++vi) - if (a.global[*vi] == REG_IYL || a.global[*vi] == REG_IYH) - c += 8.0f; - } - - // An artifical ordering of assignments. - if (ia.registers[REG_E][1] < 0) - c += 0.0001f; - if (ia.registers[REG_D][1] < 0) - c += 0.00001f; - - if (a.marked) - c -= 0.5f; - - varset_t::const_iterator v, v_end; - for (v = a.local.begin(), v_end = a.local.end(); v != v_end; ++v) { - const symbol *const sym = (symbol *)(hTabItemWithKey(liveRanges, I[*v].v)); - if (a.global[*v] < 0 && - IS_REGISTER( - sym->type)) // When in doubt, try to honour register keyword. - c += 32.0f; - if ((I[*v].byte % 2) && - (a.global[*v] == REG_L || a.global[*v] == REG_E || - a.global[*v] == REG_C)) // Try not to reverse bytes. - c += 8.0f; - if (!(I[*v].byte % 2) && I[*v].size > 1 && - (a.global[*v] == REG_H || a.global[*v] == REG_D || - a.global[*v] == REG_B)) // Try not to reverse bytes. - c += 8.0f; - if (I[*v].byte == 0 && I[*v].size > 1 || - I[*v].byte == 2 && I[*v].size > 3) { - if (a.global[*v] == REG_L && a.global[*v + 1] >= 0 && - a.global[*v + 1] != REG_H) - c += 16.0f; - if (a.global[*v] == REG_E && a.global[*v + 1] >= 0 && - a.global[*v + 1] != REG_D) - c += 16.0f; - if (a.global[*v] == REG_C && a.global[*v + 1] >= 0 && - a.global[*v + 1] != REG_B) - c += 16.0f; - } else if (I[*v].byte == 1 || I[*v].byte == 3) { - if (a.global[*v] == REG_H && a.global[*v - 1] >= 0 && - a.global[*v - 1] != REG_L) - c += 16.0f; - if (a.global[*v] == REG_D && a.global[*v - 1] >= 0 && - a.global[*v - 1] != REG_E) - c += 16.0f; - if (a.global[*v] == REG_B && a.global[*v - 1] >= 0 && - a.global[*v - 1] != REG_C) - c += 16.0f; - } - } - - c -= a.local.size() * 0.2f; - - return (c); -} - -// Code for another ic is generated when generating this one. Mark the other as -// generated. -static void extra_ic_generated(iCode *ic) { - if (ic->op == '>' || ic->op == '<' || ic->op == LE_OP || ic->op == GE_OP || - ic->op == EQ_OP || ic->op == NE_OP || - (ic->op == '^' || ic->op == '|' || ic->op == BITWISEAND) && - (IS_OP_LITERAL(IC_LEFT(ic)) || IS_OP_LITERAL(IC_RIGHT(ic)))) { - iCode *ifx; - if (ifx = ifxForOp(IC_RESULT(ic), ic)) { - OP_SYMBOL(IC_RESULT(ic))->for_newralloc = false; - OP_SYMBOL(IC_RESULT(ic))->regType = REG_CND; - ifx->generated = true; - } - } - - if (ic->op == SEND && ic->builtinSEND && - (!ic->prev || ic->prev->op != SEND || !ic->prev->builtinSEND)) { - iCode *icn; - for (icn = ic->next; icn->op != CALL; icn = icn->next) - icn->generated = true; - icn->generated = true; - ic->generated = false; - } -} - -template -static bool tree_dec_ralloc(T_t &T, const G_t &G, const I_t &I) { - bool assignment_optimal; - - con2_t I2(boost::num_vertices(I)); - for (unsigned int i = 0; i < boost::num_vertices(I); i++) { - I2[i].v = I[i].v; - I2[i].byte = I[i].byte; - I2[i].size = I[i].size; - I2[i].name = I[i].name; - } - typename boost::graph_traits::edge_iterator e, e_end; - for (boost::tie(e, e_end) = boost::edges(I); e != e_end; ++e) - add_edge(boost::source(*e, I), boost::target(*e, I), I2); - - assignment ac; - assignment_optimal = true; - tree_dec_ralloc_nodes(T, find_root(T), G, I2, ac, &assignment_optimal); - - const assignment &winner = *(T[find_root(T)].assignments.begin()); - -#ifdef DEBUG_RALLOC_DEC - std::cout << "Winner: "; - for (unsigned int i = 0; i < boost::num_vertices(I); i++) { - std::cout << "(" << i << ", " << int(winner.global[i]) << ") "; - } - std::cout << "\n"; - std::cout << "Cost: " << winner.s << "\n"; - std::cout.flush(); -#endif - - // Todo: Make this an assertion - if (winner.global.size() != boost::num_vertices(I)) { - std::cerr << "ERROR: No Assignments at root\n"; - exit(-1); - } - - for (unsigned int v = 0; v < boost::num_vertices(I); v++) { - symbol *sym = (symbol *)(hTabItemWithKey(liveRanges, I[v].v)); - if (winner.global[v] >= 0) { - if (winner.global[v] != REG_A && - (winner.global[v] != REG_IYL && winner.global[v] != REG_IYH || - !OPTRALLOC_IY)) { - sym->regs[I[v].byte] = regsZ80 + winner.global[v]; - sym->accuse = 0; - sym->isspilt = false; - sym->nRegs = I[v].size; - } else if (winner.global[v] == REG_A) { - sym->accuse = ACCUSE_A; - sym->isspilt = false; - sym->nRegs = 0; - sym->regs[0] = 0; - } else { - sym->accuse = ACCUSE_IY; - sym->isspilt = false; - sym->nRegs = 0; - sym->regs[I[v].byte] = 0; - } - } else { - for (int i = 0; i < I[v].size; i++) - sym->regs[i] = 0; - sym->accuse = 0; - sym->nRegs = I[v].size; - // spillThis(sym); Leave it to regFix, which can do some spillocation - // compaction. Todo: Use Thorup instead. - sym->isspilt = false; - } - } - - for (unsigned int i = 0; i < boost::num_vertices(G); i++) - set_surviving_regs(winner, i, G, I); // Never freed. Memory leak? - - return (!assignment_optimal); -} - -// Omit the frame pointer for functions with low register pressure and few -// parameter accesses. -template static bool omit_frame_ptr(const G_t &G) { - if (IS_GB || IY_RESERVED || z80_opts.noOmitFramePtr) - return (false); - - if (options.omitFramePtr) - return (true); - - signed char omitcost = -16; - for (unsigned int i = 0; i < boost::num_vertices(G); i++) { - if ((int)G[i].alive.size() > port->num_regs - 4) - return (false); - - const iCode *const ic = G[i].ic; - const operand *o; - o = IC_RESULT(ic); - if (o && IS_SYMOP(o) && OP_SYMBOL_CONST(o)->_isparm && - !IS_REGPARM(OP_SYMBOL_CONST(o)->etype)) - omitcost += 6; - o = IC_LEFT(ic); - if (o && IS_SYMOP(o) && OP_SYMBOL_CONST(o)->_isparm && - !IS_REGPARM(OP_SYMBOL_CONST(o)->etype)) - omitcost += 6; - o = IC_RIGHT(ic); - if (o && IS_SYMOP(o) && OP_SYMBOL_CONST(o)->_isparm && - !IS_REGPARM(OP_SYMBOL_CONST(o)->etype)) - omitcost += 6; - - if (omitcost > - 14) // Chosen greater than zero, since the peephole optimizer often can - // optimize the use of iy into use of hl, reducing the cost. - return (false); - } - - return (true); -} - -// Adjust stack location when deciding to omit frame pointer. -void move_parms(void) { - if (!currFunc || IS_GB || options.omitFramePtr || !should_omit_frame_ptr) - return; - - for (value *val = FUNC_ARGS(currFunc->type); val; val = val->next) { - if (IS_REGPARM(val->sym->etype) || !val->sym->onStack) - continue; - - val->sym->stack -= 2; - } -} - -iCode *z80_ralloc2_cc(ebbIndex *ebbi) { - iCode *ic; - -#ifdef DEBUG_RALLOC_DEC - std::cout << "Processing " << currFunc->name << " from " << dstFileName - << "\n"; - std::cout.flush(); -#endif - - cfg_t control_flow_graph; - - con_t conflict_graph; - - ic = create_cfg(control_flow_graph, conflict_graph, ebbi); - - should_omit_frame_ptr = omit_frame_ptr(control_flow_graph); - move_parms(); - - if (options.dump_graphs) - dump_cfg(control_flow_graph); - - if (options.dump_graphs) - dump_con(conflict_graph); - - tree_dec_t tree_decomposition; - - thorup_tree_decomposition(tree_decomposition, control_flow_graph); - - nicify(tree_decomposition); - - alive_tree_dec(tree_decomposition, control_flow_graph); - - good_re_root(tree_decomposition); - nicify(tree_decomposition); - alive_tree_dec(tree_decomposition, control_flow_graph); - - if (options.dump_graphs) - dump_tree_decomposition(tree_decomposition); - - z80_assignment_optimal = - !tree_dec_ralloc(tree_decomposition, control_flow_graph, conflict_graph); - - return (ic); -} diff --git a/src/util/cgraph.c b/src/util/cgraph.c new file mode 100644 index 000000000..76edcd259 --- /dev/null +++ b/src/util/cgraph.c @@ -0,0 +1,216 @@ +#include "cgraph.h" + +#include +#include + +static void adj_init(cg_adj_t *a, int has_weights) { + a->dst = NULL; a->w = has_weights ? (float *)1 : NULL; a->n = 0; a->cap = 0; + /* w starts as sentinel (1) to indicate "has weights but not allocated"; + * we swap it to NULL below and (re)allocate on first reserve. */ + a->w = NULL; + (void)has_weights; +} + +static void adj_free(cg_adj_t *a) { + free(a->dst); a->dst = NULL; + free(a->w); a->w = NULL; + a->n = 0; a->cap = 0; +} + +static void adj_reserve(cg_adj_t *a, size_t need, int has_weights) { + if (need <= a->cap) return; + size_t nc = a->cap ? a->cap * 2 : 4; + while (nc < need) nc *= 2; + a->dst = (unsigned int *)realloc(a->dst, nc * sizeof(unsigned int)); + if (has_weights) + a->w = (float *)realloc(a->w, nc * sizeof(float)); + a->cap = nc; +} + +static void adj_push(cg_adj_t *a, unsigned int dst, float w, int has_weights) { + adj_reserve(a, a->n + 1, has_weights); + a->dst[a->n] = dst; + if (has_weights) a->w[a->n] = w; + a->n++; +} + +/* Remove the first edge whose dst == target. Returns 1 if removed. */ +static int adj_remove(cg_adj_t *a, unsigned int target, int has_weights) { + for (size_t i = 0; i < a->n; i++) { + if (a->dst[i] == target) { + memmove(&a->dst[i], &a->dst[i + 1], (a->n - i - 1) * sizeof(unsigned int)); + if (has_weights) + memmove(&a->w[i], &a->w[i + 1], (a->n - i - 1) * sizeof(float)); + a->n--; + return 1; + } + } + return 0; +} + +static int adj_find(const cg_adj_t *a, unsigned int target) { + for (size_t i = 0; i < a->n; i++) + if (a->dst[i] == target) return (int)i; + return -1; +} + +void cg_init(cgraph_t *g, cg_mode_t mode, int has_weights) { + g->mode = mode; + g->has_weights = has_weights ? 1 : 0; + g->out = NULL; + g->in = NULL; + g->nvertices = 0; + g->vcap = 0; + g->nedges = 0; +} + +void cg_free(cgraph_t *g) { + for (size_t v = 0; v < g->nvertices; v++) { + adj_free(&g->out[v]); + if (g->mode == CG_BIDIRECTIONAL) adj_free(&g->in[v]); + } + free(g->out); g->out = NULL; + free(g->in); g->in = NULL; + g->nvertices = 0; + g->vcap = 0; + g->nedges = 0; +} + +static void cg_grow(cgraph_t *g) { + if (g->nvertices < g->vcap) return; + size_t nc = g->vcap ? g->vcap * 2 : 8; + g->out = (cg_adj_t *)realloc(g->out, nc * sizeof(cg_adj_t)); + if (g->mode == CG_BIDIRECTIONAL) + g->in = (cg_adj_t *)realloc(g->in, nc * sizeof(cg_adj_t)); + g->vcap = nc; +} + +unsigned int cg_add_vertex(cgraph_t *g) { + cg_grow(g); + unsigned int v = (unsigned int)g->nvertices++; + adj_init(&g->out[v], g->has_weights); + if (g->mode == CG_BIDIRECTIONAL) adj_init(&g->in[v], g->has_weights); + return v; +} + +void cg_add_edge(cgraph_t *g, unsigned int u, unsigned int v, float w) { + adj_push(&g->out[u], v, w, g->has_weights); + if (g->mode == CG_BIDIRECTIONAL) { + adj_push(&g->in[v], u, w, g->has_weights); + } else if (g->mode == CG_UNDIRECTED && u != v) { + adj_push(&g->out[v], u, w, g->has_weights); + } + g->nedges++; +} + +void cg_remove_edge(cgraph_t *g, unsigned int u, unsigned int v) { + int r = adj_remove(&g->out[u], v, g->has_weights); + if (g->mode == CG_BIDIRECTIONAL) { + adj_remove(&g->in[v], u, g->has_weights); + } else if (g->mode == CG_UNDIRECTED && u != v) { + adj_remove(&g->out[v], u, g->has_weights); + } + if (r) g->nedges--; +} + +int cg_has_edge(const cgraph_t *g, unsigned int u, unsigned int v) { + return adj_find(&g->out[u], v) >= 0; +} + +size_t cg_num_vertices(const cgraph_t *g) { return g->nvertices; } +unsigned long cg_num_edges(const cgraph_t *g) { return g->nedges; } + +size_t cg_out_degree(const cgraph_t *g, unsigned int v) { + return g->out[v].n; +} + +size_t cg_in_degree(const cgraph_t *g, unsigned int v) { + if (g->mode == CG_BIDIRECTIONAL) return g->in[v].n; + if (g->mode == CG_UNDIRECTED) return g->out[v].n; + /* CG_DIRECTED: not tracked, scan. */ + size_t c = 0; + for (size_t u = 0; u < g->nvertices; u++) + if (adj_find(&g->out[u], v) >= 0) c++; + return c; +} + +float cg_edge_weight(const cgraph_t *g, unsigned int u, unsigned int v) { + int i = adj_find(&g->out[u], v); + if (i < 0 || !g->has_weights) return 0.0f; + return g->out[u].w[i]; +} + +void cg_set_edge_weight(cgraph_t *g, unsigned int u, unsigned int v, float w) { + if (!g->has_weights) return; + int i = adj_find(&g->out[u], v); + if (i >= 0) g->out[u].w[i] = w; + if (g->mode == CG_BIDIRECTIONAL) { + i = adj_find(&g->in[v], u); + if (i >= 0) g->in[v].w[i] = w; + } else if (g->mode == CG_UNDIRECTED && u != v) { + i = adj_find(&g->out[v], u); + if (i >= 0) g->out[v].w[i] = w; + } +} + +void cg_copy_full(cgraph_t *dst, const cgraph_t *src, cg_mode_t dst_mode, int dst_has_weights) { + cg_free(dst); + cg_init(dst, dst_mode, dst_has_weights); + for (size_t v = 0; v < src->nvertices; v++) cg_add_vertex(dst); + for (unsigned int u = 0; u < src->nvertices; u++) { + for (size_t i = 0; i < src->out[u].n; i++) { + unsigned int v = src->out[u].dst[i]; + float w = src->has_weights ? src->out[u].w[i] : 0.0f; + if (src->mode == CG_UNDIRECTED && v < u) continue; + cg_add_edge(dst, u, v, w); + } + } +} + +void cg_copy_topology(cgraph_t *dst, const cgraph_t *src, cg_mode_t dst_mode) { + cg_copy_full(dst, src, dst_mode, 0); +} + +/* Union-Find based undirected connected components over out[] (for undirected + * the adjacency lists are already symmetric; for directed-bidirectional we + * explicitly union both directions — this matches boost's default which treats + * the graph as undirected for this algorithm). */ +size_t cg_connected_components(const cgraph_t *g, unsigned int *comp) { + if (g->nvertices == 0) return 0; + unsigned int *p = (unsigned int *)malloc(g->nvertices * sizeof(unsigned int)); + for (size_t i = 0; i < g->nvertices; i++) p[i] = (unsigned int)i; + /* find with path compression */ + /* Using explicit while loops to avoid recursion issues on large graphs. */ + #define FIND(r, x) do { \ + (r) = (x); \ + while (p[r] != (r)) (r) = p[r]; \ + unsigned int _cur = (x); \ + while (p[_cur] != (r)) { \ + unsigned int _n = p[_cur]; p[_cur] = (r); _cur = _n; \ + } \ + } while (0) + + for (unsigned int u = 0; u < g->nvertices; u++) { + for (size_t i = 0; i < g->out[u].n; i++) { + unsigned int v = g->out[u].dst[i]; + unsigned int ru, rv; + FIND(ru, u); + FIND(rv, v); + if (ru != rv) p[ru] = rv; + } + } + #undef FIND + /* renumber */ + size_t *map = (size_t *)malloc(g->nvertices * sizeof(size_t)); + for (size_t i = 0; i < g->nvertices; i++) map[i] = (size_t)-1; + size_t next = 0; + for (unsigned int v = 0; v < g->nvertices; v++) { + unsigned int r = v; + while (p[r] != r) r = p[r]; + if (map[r] == (size_t)-1) map[r] = next++; + comp[v] = (unsigned int)map[r]; + } + free(p); + free(map); + return next; +} diff --git a/src/util/cgraph.h b/src/util/cgraph.h new file mode 100644 index 000000000..0bb53ebe0 --- /dev/null +++ b/src/util/cgraph.h @@ -0,0 +1,96 @@ +/* Minimal C graph library to replace the boost::adjacency_list / + * boost::adjacency_matrix usage in SDCCtree_dec, SDCCnaddr, SDCClospre, + * and SDCCralloc. + * + * Design: + * - Vertex descriptors are sequential unsigned ints (0..nvertices-1), matching + * boost::vecS semantics. + * - Topology only. Per-vertex bundled properties are kept in caller-owned + * arrays indexed by vertex id. + * - Three modes: + * CG_BIDIRECTIONAL — directed graph with incoming-edge tracking. + * CG_DIRECTED — directed graph without incoming-edge tracking. + * CG_UNDIRECTED — undirected graph (add_edge mirrors to both sides). + * - Optional per-edge float weight. + * - No edge descriptors. Callers walk adjacency lists directly via CG_FOREACH + * macros or returned (dst, weight) pairs. + */ +#ifndef KCC_CGRAPH_H +#define KCC_CGRAPH_H + +#include + +typedef enum { + CG_DIRECTED = 0, + CG_BIDIRECTIONAL = 1, + CG_UNDIRECTED = 2 +} cg_mode_t; + +typedef struct { + unsigned int *dst; + float *w; /* NULL if graph has no weights */ + size_t n; + size_t cap; +} cg_adj_t; + +typedef struct cgraph { + cg_mode_t mode; + int has_weights; + cg_adj_t *out; /* out[v] */ + cg_adj_t *in; /* in[v], only used when mode == CG_BIDIRECTIONAL */ + size_t nvertices; + size_t vcap; + unsigned long nedges; /* counts directed edges; undirected each counted once */ +} cgraph_t; + +void cg_init(cgraph_t *g, cg_mode_t mode, int has_weights); +void cg_free(cgraph_t *g); +unsigned int cg_add_vertex(cgraph_t *g); +/* add_edge: weight ignored if !has_weights. Always succeeds. */ +void cg_add_edge(cgraph_t *g, unsigned int u, unsigned int v, float w); +/* remove_edge: no-op if not present. For undirected, removes from both sides. */ +void cg_remove_edge(cgraph_t *g, unsigned int u, unsigned int v); +int cg_has_edge(const cgraph_t *g, unsigned int u, unsigned int v); +size_t cg_num_vertices(const cgraph_t *g); +unsigned long cg_num_edges(const cgraph_t *g); +size_t cg_out_degree(const cgraph_t *g, unsigned int v); +size_t cg_in_degree(const cgraph_t *g, unsigned int v); +/* Returns weight of edge (u,v). Caller is responsible that the edge exists + * and has_weights is set. */ +float cg_edge_weight(const cgraph_t *g, unsigned int u, unsigned int v); +/* Find the edge and update its weight. No-op if not present. */ +void cg_set_edge_weight(cgraph_t *g, unsigned int u, unsigned int v, float w); + +/* Copy topology (no weights carried). dst is cleared first. */ +void cg_copy_topology(cgraph_t *dst, const cgraph_t *src, cg_mode_t dst_mode); +/* Copy topology preserving weights (if src has them and dst_has_weights). */ +void cg_copy_full(cgraph_t *dst, const cgraph_t *src, cg_mode_t dst_mode, int dst_has_weights); + +/* connected_components: fills comp[v] with 0-based component id. Returns + * number of components. Treats graph as undirected. comp must be size nvertices. */ +size_t cg_connected_components(const cgraph_t *g, unsigned int *comp); + +/* ---- iteration helpers ---- + * + * Usage: + * CG_FOREACH_OUT(g, v, i, nbr, wt) { + * // use nbr (unsigned int) and wt (float, 0 if no weights) + * } + * i is a user-provided size_t variable; serves as loop index. + */ + +#define CG_FOREACH_OUT(g, v, i, nbr, wt) \ + for ((i) = 0; \ + (i) < (g)->out[(v)].n && \ + ((nbr) = (g)->out[(v)].dst[(i)], \ + (wt) = (g)->out[(v)].w ? (g)->out[(v)].w[(i)] : 0.0f, 1); \ + (i)++) + +#define CG_FOREACH_IN(g, v, i, src, wt) \ + for ((i) = 0; \ + (i) < (g)->in[(v)].n && \ + ((src) = (g)->in[(v)].dst[(i)], \ + (wt) = (g)->in[(v)].w ? (g)->in[(v)].w[(i)] : 0.0f, 1); \ + (i)++) + +#endif diff --git a/src/util/uiset.c b/src/util/uiset.c new file mode 100644 index 000000000..8c8682d0f --- /dev/null +++ b/src/util/uiset.c @@ -0,0 +1,172 @@ +#include "uiset.h" + +#include +#include + +/* The body is repeated for each element type via a macro. Binary search + + * insertion preserving ascending order. */ + +#define DEFINE_SET_OPS(PFX, STRUCT, T) \ + void PFX##_init(STRUCT *s) { \ + s->items = NULL; s->n = 0; s->cap = 0; \ + } \ + void PFX##_free(STRUCT *s) { \ + free(s->items); s->items = NULL; s->n = 0; s->cap = 0; \ + } \ + void PFX##_clear(STRUCT *s) { s->n = 0; } \ + static size_t PFX##_lb(const STRUCT *s, T v) { \ + size_t lo = 0, hi = s->n; \ + while (lo < hi) { \ + size_t mid = lo + ((hi - lo) >> 1); \ + if (s->items[mid] < v) lo = mid + 1; else hi = mid; \ + } \ + return lo; \ + } \ + int PFX##_contains(const STRUCT *s, T v) { \ + size_t i = PFX##_lb(s, v); \ + return i < s->n && s->items[i] == v; \ + } \ + static void PFX##_reserve(STRUCT *s, size_t need) { \ + if (need <= s->cap) return; \ + size_t nc = s->cap ? s->cap * 2 : 4; \ + while (nc < need) nc *= 2; \ + s->items = (T *)realloc(s->items, nc * sizeof(T)); \ + s->cap = nc; \ + } \ + int PFX##_insert(STRUCT *s, T v) { \ + size_t i = PFX##_lb(s, v); \ + if (i < s->n && s->items[i] == v) return 0; \ + PFX##_reserve(s, s->n + 1); \ + memmove(&s->items[i + 1], &s->items[i], (s->n - i) * sizeof(T)); \ + s->items[i] = v; s->n++; return 1; \ + } \ + int PFX##_erase(STRUCT *s, T v) { \ + size_t i = PFX##_lb(s, v); \ + if (i >= s->n || s->items[i] != v) return 0; \ + memmove(&s->items[i], &s->items[i + 1], (s->n - i - 1) * sizeof(T)); \ + s->n--; return 1; \ + } + +DEFINE_SET_OPS(uiset, uiset_t, unsigned int) +DEFINE_SET_OPS(usset, usset_t, unsigned short) +DEFINE_SET_OPS(sss, sssset_t, short) +DEFINE_SET_OPS(iset, iset_t, int) + +/* ---- extras for uiset ---- */ + +void uiset_copy(uiset_t *dst, const uiset_t *src) { + if (dst == src) return; + if (dst->cap < src->n) { + free(dst->items); + dst->cap = src->n ? src->n : 4; + dst->items = (unsigned int *)malloc(dst->cap * sizeof(unsigned int)); + } + memcpy(dst->items, src->items, src->n * sizeof(unsigned int)); + dst->n = src->n; +} + +void uiset_move(uiset_t *dst, uiset_t *src) { + if (dst == src) return; + free(dst->items); + *dst = *src; + src->items = NULL; src->n = 0; src->cap = 0; +} + +int uiset_equal(const uiset_t *a, const uiset_t *b) { + if (a->n != b->n) return 0; + return memcmp(a->items, b->items, a->n * sizeof(unsigned int)) == 0; +} + +int uiset_includes(const uiset_t *a, const uiset_t *b) { + size_t i = 0, j = 0; + while (j < b->n) { + while (i < a->n && a->items[i] < b->items[j]) i++; + if (i == a->n || a->items[i] != b->items[j]) return 0; + i++; j++; + } + return 1; +} + +void uiset_intersection(const uiset_t *a, const uiset_t *b, uiset_t *out) { + uiset_clear(out); + size_t i = 0, j = 0; + while (i < a->n && j < b->n) { + if (a->items[i] < b->items[j]) i++; + else if (a->items[i] > b->items[j]) j++; + else { uiset_insert(out, a->items[i]); i++; j++; } + } +} + +void uiset_difference(const uiset_t *a, const uiset_t *b, uiset_t *out) { + uiset_clear(out); + size_t i = 0, j = 0; + while (i < a->n) { + if (j >= b->n || a->items[i] < b->items[j]) { + uiset_insert(out, a->items[i]); i++; + } else if (a->items[i] > b->items[j]) { + j++; + } else { i++; j++; } + } +} + +void uiset_union_into(uiset_t *a, const uiset_t *b) { + for (size_t i = 0; i < b->n; i++) uiset_insert(a, b->items[i]); +} + +/* ---- extras for usset ---- */ + +void usset_copy(usset_t *dst, const usset_t *src) { + if (dst == src) return; + if (dst->cap < src->n) { + free(dst->items); + dst->cap = src->n ? src->n : 4; + dst->items = (unsigned short *)malloc(dst->cap * sizeof(unsigned short)); + } + memcpy(dst->items, src->items, src->n * sizeof(unsigned short)); + dst->n = src->n; +} + +void usset_move(usset_t *dst, usset_t *src) { + if (dst == src) return; + free(dst->items); + *dst = *src; + src->items = NULL; src->n = 0; src->cap = 0; +} + +int usset_equal(const usset_t *a, const usset_t *b) { + if (a->n != b->n) return 0; + return memcmp(a->items, b->items, a->n * sizeof(unsigned short)) == 0; +} + +int usset_less(const usset_t *a, const usset_t *b) { + size_t n = a->n < b->n ? a->n : b->n; + for (size_t i = 0; i < n; i++) { + if (a->items[i] < b->items[i]) return -1; + if (a->items[i] > b->items[i]) return 1; + } + if (a->n < b->n) return -1; + if (a->n > b->n) return 1; + return 0; +} + +void usset_union_into(usset_t *a, const usset_t *b) { + for (size_t i = 0; i < b->n; i++) usset_insert(a, b->items[i]); +} + +/* ---- extras for sssset ---- */ + +void sss_copy(sssset_t *dst, const sssset_t *src) { + if (dst == src) return; + if (dst->cap < src->n) { + free(dst->items); + dst->cap = src->n ? src->n : 4; + dst->items = (short *)malloc(dst->cap * sizeof(short)); + } + memcpy(dst->items, src->items, src->n * sizeof(short)); + dst->n = src->n; +} + +int sss_equal(const sssset_t *a, const sssset_t *b) { + if (a->n != b->n) return 0; + return memcmp(a->items, b->items, a->n * sizeof(short)) == 0; +} diff --git a/src/util/uiset.h b/src/util/uiset.h new file mode 100644 index 000000000..8f39f4728 --- /dev/null +++ b/src/util/uiset.h @@ -0,0 +1,87 @@ +/* Sorted dynamic-array set of unsigned int and unsigned short. + * + * Replacement for std::set / std::set used + * pervasively in SDCCtree_dec, SDCCnaddr, SDCClospre, and SDCCralloc. + * + * Operations preserve ascending order; binary search for membership/insert. + */ +#ifndef KCC_UISET_H +#define KCC_UISET_H + +#include + +/* --- unsigned int set --- */ + +typedef struct { + unsigned int *items; + size_t n; + size_t cap; +} uiset_t; + +void uiset_init(uiset_t *s); +void uiset_free(uiset_t *s); +void uiset_clear(uiset_t *s); +int uiset_contains(const uiset_t *s, unsigned int v); +int uiset_insert(uiset_t *s, unsigned int v); /* 1 if new, 0 if dup */ +int uiset_erase(uiset_t *s, unsigned int v); /* 1 if removed */ +void uiset_copy(uiset_t *dst, const uiset_t *src); +void uiset_move(uiset_t *dst, uiset_t *src); /* swap-style, frees dst */ +int uiset_equal(const uiset_t *a, const uiset_t *b); +int uiset_includes(const uiset_t *a, const uiset_t *b); /* a ⊇ b */ +void uiset_intersection(const uiset_t *a, const uiset_t *b, uiset_t *out); +void uiset_difference(const uiset_t *a, const uiset_t *b, uiset_t *out); +void uiset_union_into(uiset_t *a, const uiset_t *b); + +/* --- unsigned short set --- */ + +typedef struct { + unsigned short *items; + size_t n; + size_t cap; +} usset_t; + +void usset_init(usset_t *s); +void usset_free(usset_t *s); +void usset_clear(usset_t *s); +int usset_contains(const usset_t *s, unsigned short v); +int usset_insert(usset_t *s, unsigned short v); +int usset_erase(usset_t *s, unsigned short v); +void usset_copy(usset_t *dst, const usset_t *src); +void usset_move(usset_t *dst, usset_t *src); +int usset_equal(const usset_t *a, const usset_t *b); +int usset_less(const usset_t *a, const usset_t *b); /* lex compare, <0, 0, >0 */ +void usset_union_into(usset_t *a, const usset_t *b); + +/* --- signed short set (var_t from ralloc) --- */ + +typedef struct { + short *items; + size_t n; + size_t cap; +} sssset_t; /* name avoids clashing with anything */ + +void sss_init(sssset_t *s); +void sss_free(sssset_t *s); +void sss_clear(sssset_t *s); +int sss_contains(const sssset_t *s, short v); +int sss_insert(sssset_t *s, short v); +int sss_erase(sssset_t *s, short v); +void sss_copy(sssset_t *dst, const sssset_t *src); +int sss_equal(const sssset_t *a, const sssset_t *b); + +/* --- int set --- */ + +typedef struct { + int *items; + size_t n; + size_t cap; +} iset_t; + +void iset_init(iset_t *s); +void iset_free(iset_t *s); +void iset_clear(iset_t *s); +int iset_contains(const iset_t *s, int v); +int iset_insert(iset_t *s, int v); +int iset_erase(iset_t *s, int v); + +#endif