diff --git a/.clang-format b/.clang-format
index 7548f76b9b80cc6c1725505ab0492be1d7e3317a..22e04bab0e95d05981218e51cd6affb85f82a45f 100644
--- a/.clang-format
+++ b/.clang-format
@@ -1,6 +1,6 @@
 AccessModifierOffset: -4
 
-AlignAfterOpenBracket: DontAlign
+AlignAfterOpenBracket: Align
 AlignConsecutiveAssignments: false
 AlignConsecutiveDeclarations: false
 AlignConsecutiveMacros: true
@@ -9,10 +9,10 @@ AlignOperands: false
 AlignTrailingComments: true
 
 AllowAllArgumentsOnNextLine: true
-AllowAllConstructorInitializersOnNextLine: true
+AllowAllConstructorInitializersOnNextLine: false
 AllowAllParametersOfDeclarationOnNextLine: false
 AllowShortBlocksOnASingleLine: Empty
-AllowShortCaseLabelsOnASingleLine: true
+AllowShortCaseLabelsOnASingleLine: false
 AllowShortFunctionsOnASingleLine: Empty
 AllowShortIfStatementsOnASingleLine: Never
 AllowShortLambdasOnASingleLine: Inline
@@ -56,7 +56,7 @@ CommentPragmas: ''
 
 CompactNamespaces: false
 
-ConstructorInitializerAllOnOneLineOrOnePerLine: true
+ConstructorInitializerAllOnOneLineOrOnePerLine: false
 ConstructorInitializerIndentWidth: 4
 
 ContinuationIndentWidth: 4
diff --git a/.gitignore b/.gitignore
index 3395e18ae0404b87b2abc5759426b6ef38a49d09..6ebc1f5d11e2158e895383297863436b651309f1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -30,4 +30,7 @@ $RECYCLE.BIN/
 *.egg-info
 __pycache__/
 env/
-venv/
\ No newline at end of file
+venv/
+*.pyd
+*.so
+_b_asic_debug_log.txt
\ No newline at end of file
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 3471fb4f473b069f9a4f77efec5d7aeb0fc84a79..85ecb3db700070ccf262b61002b8d8bf927e856a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -3,16 +3,18 @@ cmake_minimum_required(VERSION 3.8)
 project(
 	"B-ASIC"
 	VERSION 0.0.1
-	DESCRIPTION "Better ASIC Toolbox for python3"
+	DESCRIPTION "Better ASIC Toolbox for Python 3"
 	LANGUAGES C CXX
 )
 
-find_package(fmt 5.2.1 REQUIRED)
+# Find dependencies.
+find_package(fmt REQUIRED)
 find_package(pybind11 CONFIG REQUIRED)
 
-set(LIBRARY_NAME "b_asic")
-set(TARGET_NAME "_${LIBRARY_NAME}")
+set(LIBRARY_NAME "b_asic") # Name of the python library directory.
+set(TARGET_NAME "_${LIBRARY_NAME}") # Name of this extension module.
 
+# Set output directory for compiled binaries.
 if(NOT CMAKE_LIBRARY_OUTPUT_DIRECTORY)
 	include(GNUInstallDirs)
 	set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${CMAKE_INSTALL_LIBDIR}")
@@ -29,22 +31,42 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}")
 set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_DEBUG "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}")
 set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELEASE "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}")
 
+# Add files to be compiled into Python module.
 pybind11_add_module(
 	"${TARGET_NAME}"
+
+	# Main files.
 	"${CMAKE_CURRENT_SOURCE_DIR}/src/main.cpp"
+	"${CMAKE_CURRENT_SOURCE_DIR}/src/simulation.cpp"
+	
+	# For DOD simulation.
+	"${CMAKE_CURRENT_SOURCE_DIR}/src/simulation/compile.cpp"
+	"${CMAKE_CURRENT_SOURCE_DIR}/src/simulation/run.cpp"
+	"${CMAKE_CURRENT_SOURCE_DIR}/src/simulation/simulation.cpp"
+
+	# For OOP simulation (see legacy folder).
+	#"${CMAKE_CURRENT_SOURCE_DIR}/src/simulation/custom_operation.cpp"
+	#"${CMAKE_CURRENT_SOURCE_DIR}/src/simulation/operation.cpp"
+	#"${CMAKE_CURRENT_SOURCE_DIR}/src/simulation/signal_flow_graph.cpp"
+	#"${CMAKE_CURRENT_SOURCE_DIR}/src/simulation/simulation.cpp"
+	#"${CMAKE_CURRENT_SOURCE_DIR}/src/simulation/special_operations.cpp"
 )
 
+# Include headers.
 target_include_directories(
 	"${TARGET_NAME}"
     PRIVATE
         "${CMAKE_CURRENT_SOURCE_DIR}/src"
 )
 
+# Use C++17.
 target_compile_features(
 	"${TARGET_NAME}"
 	PRIVATE
 		cxx_std_17
 )
+
+# Set compiler-specific options using generator expressions.
 target_compile_options(
 	"${TARGET_NAME}"
 	PRIVATE
@@ -60,20 +82,20 @@ target_compile_options(
 		>
 )
 
+# Add libraries. Note: pybind11 is already added in pybind11_add_module.
 target_link_libraries(
 	"${TARGET_NAME}"
 	PRIVATE
-		fmt::fmt
+		$<TARGET_NAME_IF_EXISTS:fmt::fmt-header-only>
+		$<$<NOT:$<TARGET_EXISTS:fmt::fmt-header-only>>:fmt::fmt>
 )
 
-add_custom_target(
-	remove_old_python_dir ALL
-	COMMAND ${CMAKE_COMMAND} -E remove_directory "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/${LIBRARY_NAME}"
-	COMMENT "Removing old python directory ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/${LIBRARY_NAME}"
-)
-add_custom_target(
-	copy_python_dir ALL
-	COMMAND ${CMAKE_COMMAND} -E copy_directory "${CMAKE_CURRENT_LIST_DIR}/${LIBRARY_NAME}" "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/${LIBRARY_NAME}"
-	COMMENT "Copying python files to ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/${LIBRARY_NAME}"
-	DEPENDS "${TARGET_NAME}" remove_old_python_dir
-)
\ No newline at end of file
+# Copy binaries to project folder for debugging during development.
+if(NOT ASIC_BUILDING_PYTHON_DISTRIBUTION)
+	add_custom_target(
+		copy_binaries ALL
+		COMMAND ${CMAKE_COMMAND} -E copy "$<TARGET_FILE:${TARGET_NAME}>" "${CMAKE_CURRENT_LIST_DIR}"
+		COMMENT "Copying binaries to ${CMAKE_CURRENT_LIST_DIR}"
+		DEPENDS "${TARGET_NAME}"
+	)
+endif()
\ No newline at end of file
diff --git a/README.md b/README.md
index d28399ce800d0240881c14405c892eb65d23b072..b2972f30828f19038e5efe612831f989b8f5ffd5 100644
--- a/README.md
+++ b/README.md
@@ -29,15 +29,6 @@ To run the test suite, the following additional packages are required:
   * pytest
   * pytest-cov (for testing with coverage)
 
-To build a binary distribution, the following additional packages are required:
-* Python:
-  * wheel
-
-To run the test suite, the following additional packages are required:
-* Python:
-  * pytest
-  * pytest-cov (for testing with coverage)
-
 ### Using CMake directly
 How to build using CMake.
 
diff --git a/b_asic/GUI/gui_interface.ui b/b_asic/GUI/gui_interface.ui
index c441048a8b6ceff93cf926060696a107bfdda9fb..382747818a3286373b825e4115c9b283799156bc 100644
--- a/b_asic/GUI/gui_interface.ui
+++ b/b_asic/GUI/gui_interface.ui
@@ -236,7 +236,7 @@ QGroupBox::title {
        </item>
        <item>
         <property name="text">
-         <string>Register</string>
+         <string>Delay</string>
         </property>
        </item>
        <item>
diff --git a/b_asic/GUI/main_window.py b/b_asic/GUI/main_window.py
index 178eeefaf7053ebfffc83f18ac7acda509977788..a353ca3cb2446f143131edb7ac6f439c66ad684d 100644
--- a/b_asic/GUI/main_window.py
+++ b/b_asic/GUI/main_window.py
@@ -16,8 +16,7 @@ from arrow import Arrow
 from port_button import PortButton
 from show_pc_window import ShowPCWindow
 
-from b_asic import Operation, SFG, InputPort, OutputPort
-from b_asic.simulation import Simulation
+from b_asic import Operation, SFG, InputPort, OutputPort, FastSimulation
 import b_asic.core_operations as c_oper
 import b_asic.special_operations as s_oper
 from utils import decorate_class, handle_error
@@ -296,8 +295,8 @@ class MainWindow(QMainWindow):
     def _simulate_sfg(self):
         for sfg, properties in self.dialog.properties.items():
             self.logger.info(f"Simulating sfg with name: {sfg.name}.")
-            simulation = Simulation(sfg, input_providers=properties["input_values"], save_results=properties["all_results"])
-            l_result = simulation.run_for(properties["iteration_count"])
+            simulation = FastSimulation(sfg, input_providers=properties["input_values"])
+            simulation.run_for(properties["iteration_count"], save_results=properties["all_results"])
 
             if properties["all_results"]:
                 print(f"{'=' * 10} {sfg.name} {'=' * 10}")
diff --git a/b_asic/__init__.py b/b_asic/__init__.py
index 8dc848b23ed11edf63d8ff88c3aa2e553a32e095..f0293649d362f419818723cc751823594d655982 100644
--- a/b_asic/__init__.py
+++ b/b_asic/__init__.py
@@ -2,6 +2,7 @@
 Better ASIC Toolbox.
 TODO: More info.
 """
+from _b_asic import *
 from b_asic.core_operations import *
 from b_asic.graph_component import *
 from b_asic.operation import *
diff --git a/b_asic/operation.py b/b_asic/operation.py
index d1a820fbe49592e5545ff5624dc5fc121c77b71f..08d6bf52c9b85b9d233e4f8f766b99105be94ac1 100644
--- a/b_asic/operation.py
+++ b/b_asic/operation.py
@@ -15,12 +15,11 @@ from numbers import Number
 from typing import NewType, List, Dict, Sequence, Iterable, Mapping, MutableMapping, Optional, Any, Set, Union
 
 
-OutputKey = NewType("OutputKey", str)
-OutputMap = Mapping[OutputKey, Optional[Number]]
-MutableOutputMap = MutableMapping[OutputKey, Optional[Number]]
-RegisterMap = Mapping[OutputKey, Number]
-MutableRegisterMap = MutableMapping[OutputKey, Number]
-
+ResultKey = NewType("ResultKey", str)
+ResultMap = Mapping[ResultKey, Optional[Number]]
+MutableResultMap = MutableMapping[ResultKey, Optional[Number]]
+DelayMap = Mapping[ResultKey, Number]
+MutableDelayMap = MutableMapping[ResultKey, Number]
 
 class Operation(GraphComponent, SignalSourceProvider):
     """Operation interface.
@@ -84,6 +83,14 @@ class Operation(GraphComponent, SignalSourceProvider):
         object that is connected to the self and other objects.
         """
         raise NotImplementedError
+    
+    @abstractmethod
+    def __lshift__(self, src: SignalSourceProvider) -> Signal:
+        """Overloads the left shift operator to make it connect the provided signal source
+        to this operation's input, assuming it has exactly 1 input port.
+        Returns the new signal.
+        """
+        raise NotImplementedError
 
     @property
     @abstractmethod
@@ -136,40 +143,42 @@ class Operation(GraphComponent, SignalSourceProvider):
         raise NotImplementedError
 
     @abstractmethod
-    def key(self, index: int, prefix: str = "") -> OutputKey:
+    def key(self, index: int, prefix: str = "") -> ResultKey:
         """Get the key used to access the output of a certain output of this operation
         from the output parameter passed to current_output(s) or evaluate_output(s).
         """
         raise NotImplementedError
 
     @abstractmethod
-    def current_output(self, index: int, registers: Optional[RegisterMap] = None, prefix: str = "") -> Optional[Number]:
+    def current_output(self, index: int, delays: Optional[DelayMap] = None, prefix: str = "") -> Optional[Number]:
         """Get the current output at the given index of this operation, if available.
-        The registers parameter will be used for lookup.
-        The prefix parameter will be used as a prefix for the key string when looking for registers.
+        The delays parameter will be used for lookup.
+        The prefix parameter will be used as a prefix for the key string when looking for delays.
         See also: current_outputs, evaluate_output, evaluate_outputs.
         """
         raise NotImplementedError
 
     @abstractmethod
-    def evaluate_output(self, index: int, input_values: Sequence[Number], results: Optional[MutableOutputMap] = None, registers: Optional[MutableRegisterMap] = None, prefix: str = "") -> Number:
+    def evaluate_output(self, index: int, input_values: Sequence[Number], results: Optional[MutableResultMap] = None, delays: Optional[MutableDelayMap] = None, prefix: str = "", bits_override: Optional[int] = None, truncate: bool = True) -> Number:
         """Evaluate the output at the given index of this operation with the given input values.
         The results parameter will be used to store any results (including intermediate results) for caching.
-        The registers parameter will be used to get the current value of any intermediate registers that are encountered, and be updated with their new values.
-        The prefix parameter will be used as a prefix for the key string when storing results/registers.
+        The delays parameter will be used to get the current value of any intermediate delays that are encountered, and be updated with their new values.
+        The prefix parameter will be used as a prefix for the key string when storing results/delays.
+        The bits_override parameter specifies a word length override when truncating inputs which ignores the word length specified by the input signal.
+        The truncate parameter specifies whether input truncation should be enabled in the first place. If set to False, input values will be used driectly without any bit truncation.
         See also: evaluate_outputs, current_output, current_outputs.
         """
         raise NotImplementedError
 
     @abstractmethod
-    def current_outputs(self, registers: Optional[RegisterMap] = None, prefix: str = "") -> Sequence[Optional[Number]]:
+    def current_outputs(self, delays: Optional[DelayMap] = None, prefix: str = "") -> Sequence[Optional[Number]]:
         """Get all current outputs of this operation, if available.
         See current_output for more information.
         """
         raise NotImplementedError
 
     @abstractmethod
-    def evaluate_outputs(self, input_values: Sequence[Number], results: Optional[MutableOutputMap] = None, registers: Optional[MutableRegisterMap] = None, prefix: str = "") -> Sequence[Number]:
+    def evaluate_outputs(self, input_values: Sequence[Number], results: Optional[MutableResultMap] = None, delays: Optional[MutableDelayMap] = None, prefix: str = "", bits_override: Optional[int] = None, truncate: bool = True) -> Sequence[Number]:
         """Evaluate all outputs of this operation given the input values.
         See evaluate_output for more information.
         """
@@ -184,7 +193,12 @@ class Operation(GraphComponent, SignalSourceProvider):
 
     @abstractmethod
     def inputs_required_for_output(self, output_index: int) -> Iterable[int]:
-        """Get the input indices of all inputs in this operation whose values are required in order to evalueate the output at the given output index."""
+        """Get the input indices of all inputs in this operation whose values are required in order to evaluate the output at the given output index."""
+        raise NotImplementedError
+
+    @abstractmethod
+    def truncate_input(self, index: int, value: Number, bits: int) -> Number:
+        """Truncate the value to be used as input at the given index to a certain bit length."""
         raise NotImplementedError
 
     @abstractmethod
@@ -313,6 +327,13 @@ class AbstractOperation(Operation, AbstractGraphComponent):
         from b_asic.core_operations import Constant, Division
         return Division(Constant(src) if isinstance(src, Number) else src, self)
 
+    def __lshift__(self, src: SignalSourceProvider) -> Signal:
+        if self.input_count != 1:
+            diff = "more" if self.input_count > 1 else "less"
+            raise TypeError(
+                f"{self.__class__.__name__} cannot be used as a destination because it has {diff} than 1 input")
+        return self.input(0).connect(src)
+    
     def __str__(self):
         inputs_dict = dict()
         for i, port in enumerate(self.inputs):
@@ -392,7 +413,7 @@ class AbstractOperation(Operation, AbstractGraphComponent):
                 result.append(s)
         return result
 
-    def key(self, index: int, prefix: str = "") -> OutputKey:
+    def key(self, index: int, prefix: str = "") -> ResultKey:
         key = prefix
         if self.output_count != 1:
             if key:
@@ -401,23 +422,18 @@ class AbstractOperation(Operation, AbstractGraphComponent):
         elif not key:
             key = str(index)
         return key
-
-    def current_output(self, index: int, registers: Optional[RegisterMap] = None, prefix: str = "") -> Optional[Number]:
+    
+    def current_output(self, index: int, delays: Optional[DelayMap] = None, prefix: str = "") -> Optional[Number]:
         return None
 
-    def evaluate_output(self, index: int, input_values: Sequence[Number], results: Optional[MutableOutputMap] = None, registers: Optional[MutableRegisterMap] = None, prefix: str = "") -> Number:
+    def evaluate_output(self, index: int, input_values: Sequence[Number], results: Optional[MutableResultMap] = None, delays: Optional[MutableDelayMap] = None, prefix: str = "", bits_override: Optional[int] = None, truncate: bool = True) -> Number:
         if index < 0 or index >= self.output_count:
             raise IndexError(
                 f"Output index out of range (expected 0-{self.output_count - 1}, got {index})")
         if len(input_values) != self.input_count:
-            raise ValueError(
-                f"Wrong number of input values supplied to operation (expected {self.input_count}, got {len(input_values)})")
-        if results is None:
-            results = {}
-        if registers is None:
-            registers = {}
-
-        values = self.evaluate(*self.truncate_inputs(input_values))
+            raise ValueError(f"Wrong number of input values supplied to operation (expected {self.input_count}, got {len(input_values)})")
+
+        values = self.evaluate(*(self.truncate_inputs(input_values, bits_override) if truncate else input_values))
         if isinstance(values, collections.abc.Sequence):
             if len(values) != self.output_count:
                 raise RuntimeError(
@@ -431,18 +447,16 @@ class AbstractOperation(Operation, AbstractGraphComponent):
             raise RuntimeError(
                 f"Operation evaluated to invalid type (expected Sequence/Number, got {values.__class__.__name__})")
 
-        if self.output_count == 1:
-            results[self.key(index, prefix)] = values[index]
-        else:
+        if results is not None:
             for i in range(self.output_count):
                 results[self.key(i, prefix)] = values[i]
         return values[index]
 
-    def current_outputs(self, registers: Optional[RegisterMap] = None, prefix: str = "") -> Sequence[Optional[Number]]:
-        return [self.current_output(i, registers, prefix) for i in range(self.output_count)]
+    def current_outputs(self, delays: Optional[DelayMap] = None, prefix: str = "") -> Sequence[Optional[Number]]:
+        return [self.current_output(i, delays, prefix) for i in range(self.output_count)]
 
-    def evaluate_outputs(self, input_values: Sequence[Number], results: Optional[MutableOutputMap] = None, registers: Optional[MutableRegisterMap] = None, prefix: str = "") -> Sequence[Number]:
-        return [self.evaluate_output(i, input_values, results, registers, prefix) for i in range(self.output_count)]
+    def evaluate_outputs(self, input_values: Sequence[Number], results: Optional[MutableResultMap] = None, delays: Optional[MutableDelayMap] = None, prefix: str = "", bits_override: Optional[int] = None, truncate: bool = True) -> Sequence[Number]:
+        return [self.evaluate_output(i, input_values, results, delays, prefix, bits_override, truncate) for i in range(self.output_count)]
 
     def split(self) -> Iterable[Operation]:
         # Import here to avoid circular imports.
@@ -519,27 +533,21 @@ class AbstractOperation(Operation, AbstractGraphComponent):
         return self.output(0)
 
     def truncate_input(self, index: int, value: Number, bits: int) -> Number:
-        """Truncate the value to be used as input at the given index to a certain bit length."""
-        n = value
-        if not isinstance(n, int):
-            n = trunc(value)
-        return n & ((2 ** bits) - 1)
+        return int(value) & ((2 ** bits) - 1)
 
-    def truncate_inputs(self, input_values: Sequence[Number]) -> Sequence[Number]:
+    def truncate_inputs(self, input_values: Sequence[Number], bits_override: Optional[int] = None) -> Sequence[Number]:
         """Truncate the values to be used as inputs to the bit lengths specified by the respective signals connected to each input."""
         args = []
         for i, input_port in enumerate(self.inputs):
-            if input_port.signal_count >= 1:
+            value = input_values[i]
+            bits = bits_override
+            if bits_override is None and input_port.signal_count >= 1:
                 bits = input_port.signals[0].bits
-                if bits is None:
-                    args.append(input_values[i])
-                else:
-                    if isinstance(input_values[i], complex):
-                        raise TypeError(
-                            "Complex value cannot be truncated to {bits} bits as requested by the signal connected to input #{i}")
-                    args.append(self.truncate_input(i, input_values[i], bits))
-            else:
-                args.append(input_values[i])
+            if bits_override is not None:
+                if isinstance(value, complex):
+                    raise TypeError("Complex value cannot be truncated to {bits} bits as requested by the signal connected to input #{i}")
+                value = self.truncate_input(i, value, bits)
+            args.append(value)
         return args
 
     @property
diff --git a/b_asic/port.py b/b_asic/port.py
index fb3f64177e74f2623d02284243a03529cf05b6e4..2d6093886992922f22af1ac0c3f09c25696301b8 100644
--- a/b_asic/port.py
+++ b/b_asic/port.py
@@ -172,6 +172,12 @@ class InputPort(AbstractPort):
         assert self._source_signal is None, "Attempted to connect already connected input port."
         # self._source_signal is set by the signal constructor.
         return Signal(source=src.source, destination=self, name=name)
+    
+    def __lshift__(self, src: SignalSourceProvider) -> Signal:
+        """Overloads the left shift operator to make it connect the provided signal source to this input port.
+        Returns the new signal.
+        """
+        return self.connect(src)
 
 
 class OutputPort(AbstractPort, SignalSourceProvider):
diff --git a/b_asic/signal_flow_graph.py b/b_asic/signal_flow_graph.py
index 1a0cd8e7cee0f1ae8798233348a7b45e12f7e74f..e85a5209c70572bdf6bc570ac782fdde43ca4e6d 100644
--- a/b_asic/signal_flow_graph.py
+++ b/b_asic/signal_flow_graph.py
@@ -3,7 +3,7 @@ B-ASIC Signal Flow Graph Module.
 TODO: More info.
 """
 
-from typing import List, Iterable, Sequence, Dict, Optional, DefaultDict, MutableSet
+from typing import List, Iterable, Sequence, Dict, Optional, DefaultDict, MutableSet, Tuple
 from numbers import Number
 from collections import defaultdict, deque
 from io import StringIO
@@ -12,10 +12,13 @@ import itertools as it
 from graphviz import Digraph
 
 from b_asic.port import SignalSourceProvider, OutputPort
-from b_asic.operation import Operation, AbstractOperation, MutableOutputMap, MutableRegisterMap
+from b_asic.operation import Operation, AbstractOperation, ResultKey, DelayMap, MutableResultMap, MutableDelayMap
 from b_asic.signal import Signal
 from b_asic.graph_component import GraphID, GraphIDNumber, GraphComponent, Name, TypeName
-from b_asic.special_operations import Input, Output, Register
+from b_asic.special_operations import Input, Output, Delay
+
+
+DelayQueue = List[Tuple[str, ResultKey, OutputPort]]
 
 
 class GraphIDGenerator:
@@ -198,11 +201,11 @@ class SFG(AbstractOperation):
         return "sfg"
 
     def evaluate(self, *args):
-        result = self.evaluate_outputs(args, {}, {}, "")
+        result = self.evaluate_outputs(args)
         n = len(result)
         return None if n == 0 else result[0] if n == 1 else result
 
-    def evaluate_output(self, index: int, input_values: Sequence[Number], results: Optional[MutableOutputMap] = None, registers: Optional[MutableRegisterMap] = None, prefix: str = "") -> Number:
+    def evaluate_output(self, index: int, input_values: Sequence[Number], results: Optional[MutableResultMap] = None, delays: Optional[MutableDelayMap] = None, prefix: str = "", bits_override: Optional[int] = None, truncate: bool = True) -> Number:
         if index < 0 or index >= self.output_count:
             raise IndexError(
                 f"Output index out of range (expected 0-{self.output_count - 1}, got {index})")
@@ -211,15 +214,20 @@ class SFG(AbstractOperation):
                 f"Wrong number of inputs supplied to SFG for evaluation (expected {self.input_count}, got {len(input_values)})")
         if results is None:
             results = {}
-        if registers is None:
-            registers = {}
-
+        if delays is None:
+            delays = {}
+        
         # Set the values of our input operations to the given input values.
-        for op, arg in zip(self._input_operations, self.truncate_inputs(input_values)):
+        for op, arg in zip(self._input_operations, self.truncate_inputs(input_values, bits_override) if truncate else input_values):
             op.value = arg
-
-        value = self._evaluate_source(self._output_operations[index].input(
-            0).signals[0].source, results, registers, prefix)
+        
+        deferred_delays = []
+        value = self._evaluate_source(self._output_operations[index].input(0).signals[0].source, results, delays, prefix, bits_override, truncate, deferred_delays)
+        while deferred_delays:
+            new_deferred_delays = []
+            for key_base, key, src in deferred_delays:
+                self._do_evaluate_source(key_base, key, src, results, delays, prefix, bits_override, truncate, new_deferred_delays)
+            deferred_delays = new_deferred_delays
         results[self.key(index, prefix)] = value
         return value
 
@@ -345,110 +353,6 @@ class SFG(AbstractOperation):
         """
         return self._components_by_name.get(name, [])
 
-    def _add_component_unconnected_copy(self, original_component: GraphComponent) -> GraphComponent:
-        assert original_component not in self._original_components_to_new, "Tried to add duplicate SFG component"
-        new_component = original_component.copy_component()
-        self._original_components_to_new[original_component] = new_component
-        new_id = self._graph_id_generator.next_id(new_component.type_name())
-        new_component.graph_id = new_id
-        self._components_by_id[new_id] = new_component
-        self._components_by_name[new_component.name].append(new_component)
-        return new_component
-
-    def _add_operation_connected_tree_copy(self, start_op: Operation) -> None:
-        op_stack = deque([start_op])
-        while op_stack:
-            original_op = op_stack.pop()
-            # Add or get the new copy of the operation.
-            new_op = None
-            if original_op not in self._original_components_to_new:
-                new_op = self._add_component_unconnected_copy(original_op)
-                self._components_dfs_order.append(new_op)
-                self._operations_dfs_order.append(new_op)
-            else:
-                new_op = self._original_components_to_new[original_op]
-
-            # Connect input ports to new signals.
-            for original_input_port in original_op.inputs:
-                if original_input_port.signal_count < 1:
-                    raise ValueError("Unconnected input port in SFG")
-
-                for original_signal in original_input_port.signals:
-                    # Check if the signal is one of the SFG's input signals.
-                    if original_signal in self._original_input_signals_to_indices:
-                        # New signal already created during first step of constructor.
-                        new_signal = self._original_components_to_new[original_signal]
-                        new_signal.set_destination(new_op.input(original_input_port.index))
-
-                        self._components_dfs_order.extend([new_signal, new_signal.source.operation])
-                        self._operations_dfs_order.append(new_signal.source.operation)
-
-                    # Check if the signal has not been added before.
-                    elif original_signal not in self._original_components_to_new:
-                        if original_signal.source is None:
-                            raise ValueError("Dangling signal without source in SFG")
-
-                        new_signal = self._add_component_unconnected_copy(original_signal)
-                        new_signal.set_destination(new_op.input(original_input_port.index))
-
-                        self._components_dfs_order.append(new_signal)
-
-                        original_connected_op = original_signal.source.operation
-                        # Check if connected Operation has been added before.
-                        if original_connected_op in self._original_components_to_new:
-                            # Set source to the already added operations port.
-                            new_signal.set_source(self._original_components_to_new[original_connected_op].output(
-                                original_signal.source.index))
-                        else:
-                            # Create new operation, set signal source to it.
-                            new_connected_op = self._add_component_unconnected_copy(original_connected_op)
-                            new_signal.set_source(new_connected_op.output(original_signal.source.index))
-
-                            self._components_dfs_order.append(new_connected_op)
-                            self._operations_dfs_order.append(new_connected_op)
-
-                            # Add connected operation to queue of operations to visit.
-                            op_stack.append(original_connected_op)
-
-            # Connect output ports.
-            for original_output_port in original_op.outputs:
-                for original_signal in original_output_port.signals:
-                    # Check if the signal is one of the SFG's output signals.
-                    if original_signal in self._original_output_signals_to_indices:
-                        # New signal already created during first step of constructor.
-                        new_signal = self._original_components_to_new[original_signal]
-                        new_signal.set_source(new_op.output(original_output_port.index))
-
-                        self._components_dfs_order.extend([new_signal, new_signal.destination.operation])
-                        self._operations_dfs_order.append(new_signal.destination.operation)
-
-                    # Check if signal has not been added before.
-                    elif original_signal not in self._original_components_to_new:
-                        if original_signal.source is None:
-                            raise ValueError("Dangling signal without source in SFG")
-
-                        new_signal = self._add_component_unconnected_copy(original_signal)
-                        new_signal.set_source(new_op.output(original_output_port.index))
-
-                        self._components_dfs_order.append(new_signal)
-
-                        original_connected_op = original_signal.destination.operation
-                        # Check if connected operation has been added.
-                        if original_connected_op in self._original_components_to_new:
-                            # Set destination to the already connected operations port.
-                            new_signal.set_destination(self._original_components_to_new[original_connected_op].input(
-                                original_signal.destination.index))
-                        else:
-                            # Create new operation, set destination to it.
-                            new_connected_op = self._add_component_unconnected_copy(original_connected_op)
-                            new_signal.set_destination(new_connected_op.input(original_signal.destination.index))
-
-                            self._components_dfs_order.append(new_connected_op)
-                            self._operations_dfs_order.append(new_connected_op)
-
-                            # Add connected operation to the queue of operations to visit.
-                            op_stack.append(original_connected_op)
-
     def replace_component(self, component: Operation, _id: GraphID):
         """Find and replace all components matching either on GraphID, Type or both.
         Then return a new deepcopy of the sfg with the replaced component.
@@ -543,26 +447,6 @@ class SFG(AbstractOperation):
 
         return sfg_copy()
 
-    def _evaluate_source(self, src: OutputPort, results: MutableOutputMap, registers: MutableRegisterMap, prefix: str) -> Number:
-        src_prefix = prefix
-        if src_prefix:
-            src_prefix += "."
-        src_prefix += src.operation.graph_id
-
-        key = src.operation.key(src.index, src_prefix)
-        if key in results:
-            value = results[key]
-            if value is None:
-                raise RuntimeError(f"Direct feedback loop detected when evaluating operation.")
-            return value
-
-        results[key] = src.operation.current_output(src.index, registers, src_prefix)
-        input_values = [self._evaluate_source(
-            input_port.signals[0].source, results, registers, prefix) for input_port in src.operation.inputs]
-        value = src.operation.evaluate_output(src.index, input_values, results, registers, src_prefix)
-        results[key] = value
-        return value
-
     def get_precedence_list(self) -> List[List[OutputPort]]:
         """Returns a Precedence list of the SFG where each element in n:th the list consists
         of elements that are executed in the n:th step. If the precedence list already has been
@@ -572,10 +456,10 @@ class SFG(AbstractOperation):
 
         # Find all operations with only outputs and no inputs.
         no_input_ops = list(filter(lambda op: op.input_count == 0, self.operations))
-        reg_ops = self.get_components_with_type_name(Register.type_name())
+        delay_ops = self.get_components_with_type_name(Delay.type_name())
 
         # Find all first iter output ports for precedence
-        first_iter_ports = [op.output(i) for op in (no_input_ops + reg_ops) for i in range(op.output_count)]
+        first_iter_ports = [op.output(i) for op in (no_input_ops + delay_ops) for i in range(op.output_count)]
 
         self._precedence_list = self._traverse_for_precedence_list(first_iter_ports)
 
@@ -598,8 +482,8 @@ class SFG(AbstractOperation):
             for outport in curr_iter_ports:
                 for signal in outport.signals:
                     new_inport = signal.destination
-                    # Don't traverse over Registers
-                    if new_inport is not None and not isinstance(new_inport.operation, Register):
+                    # Don't traverse over delays.
+                    if new_inport is not None and not isinstance(new_inport.operation, Delay):
                         new_op = new_inport.operation
                         remaining_inports_per_operation[new_op] -= 1
                         if remaining_inports_per_operation[new_op] == 0:
@@ -736,13 +620,156 @@ class SFG(AbstractOperation):
                     raise RuntimeError("Unallowed structure in SFG detected")
 
         self._operations_topological_order = top_order
-
         return self._operations_topological_order
-
-    def set_latency_of_type(self, type_name: TypeName, latency: int):
+    
+    def set_latency_of_type(self, type_name: TypeName, latency: int) -> None:
+        """TODO: docstring"""
         for op in self.get_components_with_type_name(type_name):
             op.set_latency(latency)
 
-    def set_latency_offsets_of_type(self, type_name: TypeName, latency_offsets: Dict[str, int]):
+    def set_latency_offsets_of_type(self, type_name: TypeName, latency_offsets: Dict[str, int]) -> None:
+        """TODO: docstring"""
         for op in self.get_components_with_type_name(type_name):
             op.set_latency_offsets(latency_offsets)
+
+    def _add_component_unconnected_copy(self, original_component: GraphComponent) -> GraphComponent:
+        assert original_component not in self._original_components_to_new, "Tried to add duplicate SFG component"
+        new_component = original_component.copy_component()
+        self._original_components_to_new[original_component] = new_component
+        new_id = self._graph_id_generator.next_id(new_component.type_name())
+        new_component.graph_id = new_id
+        self._components_by_id[new_id] = new_component
+        self._components_by_name[new_component.name].append(new_component)
+        return new_component
+
+    def _add_operation_connected_tree_copy(self, start_op: Operation) -> None:
+        op_stack = deque([start_op])
+        while op_stack:
+            original_op = op_stack.pop()
+            # Add or get the new copy of the operation.
+            new_op = None
+            if original_op not in self._original_components_to_new:
+                new_op = self._add_component_unconnected_copy(original_op)
+                self._components_dfs_order.append(new_op)
+                self._operations_dfs_order.append(new_op)
+            else:
+                new_op = self._original_components_to_new[original_op]
+
+            # Connect input ports to new signals.
+            for original_input_port in original_op.inputs:
+                if original_input_port.signal_count < 1:
+                    raise ValueError("Unconnected input port in SFG")
+
+                for original_signal in original_input_port.signals:
+                    # Check if the signal is one of the SFG's input signals.
+                    if original_signal in self._original_input_signals_to_indices:
+                        # New signal already created during first step of constructor.
+                        new_signal = self._original_components_to_new[original_signal]
+                        new_signal.set_destination(new_op.input(original_input_port.index))
+
+                        self._components_dfs_order.extend([new_signal, new_signal.source.operation])
+                        self._operations_dfs_order.append(new_signal.source.operation)
+
+                    # Check if the signal has not been added before.
+                    elif original_signal not in self._original_components_to_new:
+                        if original_signal.source is None:
+                            raise ValueError("Dangling signal without source in SFG")
+
+                        new_signal = self._add_component_unconnected_copy(original_signal)
+                        new_signal.set_destination(new_op.input(original_input_port.index))
+
+                        self._components_dfs_order.append(new_signal)
+
+                        original_connected_op = original_signal.source.operation
+                        # Check if connected Operation has been added before.
+                        if original_connected_op in self._original_components_to_new:
+                            # Set source to the already added operations port.
+                            new_signal.set_source(self._original_components_to_new[original_connected_op].output(
+                                original_signal.source.index))
+                        else:
+                            # Create new operation, set signal source to it.
+                            new_connected_op = self._add_component_unconnected_copy(original_connected_op)
+                            new_signal.set_source(new_connected_op.output(original_signal.source.index))
+
+                            self._components_dfs_order.append(new_connected_op)
+                            self._operations_dfs_order.append(new_connected_op)
+
+                            # Add connected operation to queue of operations to visit.
+                            op_stack.append(original_connected_op)
+
+            # Connect output ports.
+            for original_output_port in original_op.outputs:
+                for original_signal in original_output_port.signals:
+                    # Check if the signal is one of the SFG's output signals.
+                    if original_signal in self._original_output_signals_to_indices:
+                        # New signal already created during first step of constructor.
+                        new_signal = self._original_components_to_new[original_signal]
+                        new_signal.set_source(new_op.output(original_output_port.index))
+
+                        self._components_dfs_order.extend([new_signal, new_signal.destination.operation])
+                        self._operations_dfs_order.append(new_signal.destination.operation)
+
+                    # Check if signal has not been added before.
+                    elif original_signal not in self._original_components_to_new:
+                        if original_signal.source is None:
+                            raise ValueError("Dangling signal without source in SFG")
+
+                        new_signal = self._add_component_unconnected_copy(original_signal)
+                        new_signal.set_source(new_op.output(original_output_port.index))
+
+                        self._components_dfs_order.append(new_signal)
+
+                        original_connected_op = original_signal.destination.operation
+                        # Check if connected operation has been added.
+                        if original_connected_op in self._original_components_to_new:
+                            # Set destination to the already connected operations port.
+                            new_signal.set_destination(self._original_components_to_new[original_connected_op].input(
+                                original_signal.destination.index))
+                        else:
+                            # Create new operation, set destination to it.
+                            new_connected_op = self._add_component_unconnected_copy(original_connected_op)
+                            new_signal.set_destination(new_connected_op.input(original_signal.destination.index))
+
+                            self._components_dfs_order.append(new_connected_op)
+                            self._operations_dfs_order.append(new_connected_op)
+
+                            # Add connected operation to the queue of operations to visit.
+                            op_stack.append(original_connected_op)
+
+    def _evaluate_source(self, src: OutputPort, results: MutableResultMap, delays: MutableDelayMap, prefix: str, bits_override: Optional[int], truncate: bool, deferred_delays: DelayQueue) -> Number:
+        key_base = (prefix + "." + src.operation.graph_id) if prefix else src.operation.graph_id
+        key = src.operation.key(src.index, key_base)
+        if key in results:
+            value = results[key]
+            if value is None:
+                raise RuntimeError("Direct feedback loop detected when evaluating operation.")
+            return value
+        
+        value = src.operation.current_output(src.index, delays, key_base)
+        results[key] = value
+        if value is None:
+            value = self._do_evaluate_source(key_base, key, src, results, delays, prefix, bits_override, truncate, deferred_delays)
+        else:
+            deferred_delays.append((key_base, key, src)) # Evaluate later. Use current value for now.
+        return value
+
+    def _do_evaluate_source(self, key_base: str, key: ResultKey, src: OutputPort, results: MutableResultMap, delays: MutableDelayMap, prefix: str, bits_override: Optional[int], truncate: bool, deferred_delays: DelayQueue) -> Number:
+        input_values = [self._evaluate_source(input_port.signals[0].source, results, delays, prefix, bits_override, truncate, deferred_delays) for input_port in src.operation.inputs]
+        value = src.operation.evaluate_output(src.index, input_values, results, delays, key_base, bits_override, truncate)
+        results[key] = value
+        return value
+
+    def _inputs_required_for_source(self, src: OutputPort, visited: MutableSet[Operation]) -> Sequence[bool]:
+        if src.operation in visited:
+            return []
+        visited.add(src.operation)
+        
+        if isinstance(src.operation, Input):
+            for i, input_operation in enumerate(self._input_operations):
+                if input_operation is src.operation:
+                    return [i]
+
+        input_indices = []
+        for i in src.operation.inputs_required_for_output(src.index):
+            input_indices.extend(self._inputs_required_for_source(src.operation.input(i).signals[0].source, visited))
+        return input_indices
diff --git a/b_asic/simulation.py b/b_asic/simulation.py
index 81be8de4ae59486a21e73f03078490c61f89d6d5..7829d171a38ff74c9a90b513e5a21034727e7492 100644
--- a/b_asic/simulation.py
+++ b/b_asic/simulation.py
@@ -3,15 +3,20 @@ B-ASIC Simulation Module.
 TODO: More info.
 """
 
+import numpy as np
+
 from collections import defaultdict
 from numbers import Number
-from typing import List, Dict, DefaultDict, Callable, Sequence, Mapping, Union, Optional
+from typing import List, Dict, DefaultDict, Callable, Sequence, Mapping, Union, Optional, MutableSequence, MutableMapping
 
-from b_asic.operation import OutputKey, OutputMap
+from b_asic.operation import ResultKey, ResultMap, MutableResultMap, MutableDelayMap
 from b_asic.signal_flow_graph import SFG
 
 
-InputProvider = Union[Number, Sequence[Number], Callable[[int], Number]]
+ResultArrayMap = Mapping[ResultKey, Sequence[Number]]
+MutableResultArrayMap = MutableMapping[ResultKey, MutableSequence[Number]]
+InputFunction = Callable[[int], Number]
+InputProvider = Union[Number, Sequence[Number], InputFunction]
 
 
 class Simulation:
@@ -20,24 +25,19 @@ class Simulation:
     """
 
     _sfg: SFG
-    _results: DefaultDict[int, Dict[str, Number]]
-    _registers: Dict[str, Number]
+    _results: MutableResultArrayMap
+    _delays: MutableDelayMap
     _iteration: int
-    _input_functions: Sequence[Callable[[int], Number]]
-    _current_input_values: Sequence[Number]
-    _latest_output_values: Sequence[Number]
-    _save_results: bool
+    _input_functions: Sequence[InputFunction]
+    _input_length: Optional[int]
 
-    def __init__(self, sfg: SFG, input_providers: Optional[Sequence[Optional[InputProvider]]] = None, save_results: bool = False):
+    def __init__(self, sfg: SFG, input_providers: Optional[Sequence[Optional[InputProvider]]] = None):
         self._sfg = sfg
-        self._results = defaultdict(dict)
-        self._registers = {}
+        self._results = defaultdict(list)
+        self._delays = {}
         self._iteration = 0
-        self._input_functions = [
-            lambda _: 0 for _ in range(self._sfg.input_count)]
-        self._current_input_values = [0 for _ in range(self._sfg.input_count)]
-        self._latest_output_values = [0 for _ in range(self._sfg.output_count)]
-        self._save_results = save_results
+        self._input_functions = [lambda _: 0 for _ in range(self._sfg.input_count)]
+        self._input_length = None
         if input_providers is not None:
             self.set_inputs(input_providers)
 
@@ -51,48 +51,48 @@ class Simulation:
         elif isinstance(input_provider, Number):
             self._input_functions[index] = lambda _: input_provider
         else:
+            if self._input_length is None:
+                self._input_length = len(input_provider)
+            elif self._input_length != len(input_provider):
+                raise ValueError(f"Inconsistent input length for simulation (was {self._input_length}, got {len(input_provider)})")
             self._input_functions[index] = lambda n: input_provider[n]
 
     def set_inputs(self, input_providers: Sequence[Optional[InputProvider]]) -> None:
         """Set the input functions used to get values for the inputs to the internal SFG."""
         if len(input_providers) != self._sfg.input_count:
-            raise ValueError(
-                f"Wrong number of inputs supplied to simulation (expected {self._sfg.input_count}, got {len(input_providers)})")
-        self._input_functions = [None for _ in range(self._sfg.input_count)]
+            raise ValueError(f"Wrong number of inputs supplied to simulation (expected {self._sfg.input_count}, got {len(input_providers)})")
         for index, input_provider in enumerate(input_providers):
             if input_provider is not None:
                 self.set_input(index, input_provider)
 
-    @property
-    def save_results(self) -> bool:
-        """Get the flag that determines if the results of ."""
-        return self._save_results
-
-    @save_results.setter
-    def save_results(self, save_results) -> None:
-        self._save_results = save_results
-
-    def run(self) -> Sequence[Number]:
+    def step(self, save_results: bool = True, bits_override: Optional[int] = None, truncate: bool = True) -> Sequence[Number]:
         """Run one iteration of the simulation and return the resulting output values."""
-        return self.run_for(1)
+        return self.run_for(1, save_results, bits_override, truncate)
 
-    def run_until(self, iteration: int) -> Sequence[Number]:
+    def run_until(self, iteration: int, save_results: bool = True, bits_override: Optional[int] = None, truncate: bool = True) -> Sequence[Number]:
         """Run the simulation until its iteration is greater than or equal to the given iteration
-        and return the resulting output values.
+        and return the output values of the last iteration.
         """
+        result = []
         while self._iteration < iteration:
-            self._current_input_values = [self._input_functions[i](
-                self._iteration) for i in range(self._sfg.input_count)]
-            self._latest_output_values = self._sfg.evaluate_outputs(
-                self._current_input_values, self._results[self._iteration], self._registers)
-            if not self._save_results:
-                del self._results[self.iteration]
+            input_values = [self._input_functions[i](self._iteration) for i in range(self._sfg.input_count)]
+            results = {}
+            result = self._sfg.evaluate_outputs(input_values, results, self._delays, "", bits_override, truncate)
+            if save_results:
+                for key, value in results.items():
+                    self._results[key].append(value)
             self._iteration += 1
-        return self._latest_output_values
+        return result
+
+    def run_for(self, iterations: int, save_results: bool = True, bits_override: Optional[int] = None, truncate: bool = True) -> Sequence[Number]:
+        """Run a given number of iterations of the simulation and return the output values of the last iteration."""
+        return self.run_until(self._iteration + iterations, save_results, bits_override, truncate)
 
-    def run_for(self, iterations: int) -> Sequence[Number]:
-        """Run a given number of iterations of the simulation and return the resulting output values."""
-        return self.run_until(self._iteration + iterations)
+    def run(self, save_results: bool = True, bits_override: Optional[int] = None, truncate: bool = True) -> Sequence[Number]:
+        """Run the simulation until the end of its input arrays and return the output values of the last iteration."""
+        if self._input_length is None:
+            raise IndexError("Tried to run unlimited simulation")
+        return self.run_until(self._input_length, save_results, bits_override, truncate)
 
     @property
     def iteration(self) -> int:
@@ -100,12 +100,13 @@ class Simulation:
         return self._iteration
 
     @property
-    def results(self) -> Mapping[int, OutputMap]:
-        """Get a mapping of all results, including intermediate values, calculated for each iteration up until now.
-        The outer mapping maps from iteration number to value mapping. The value mapping maps output port identifiers to values.
-        Example: {0: {"c1": 3, "c2": 4, "bfly1.0": 7, "bfly1.1": -1, "0": 7}}
+    def results(self) -> ResultArrayMap:
+        """Get a mapping from result keys to numpy arrays containing all results, including intermediate values,
+        calculated for each iteration up until now that was run with save_results enabled.
+        The mapping is indexed using the key() method of Operation with the appropriate output index.
+        Example result after 3 iterations: {"c1": [3, 6, 7], "c2": [4, 5, 5], "bfly1.0": [7, 0, 0], "bfly1.1": [-1, 0, 2], "0": [7, -2, -1]}
         """
-        return self._results
+        return {key: np.array(value) for key, value in self._results.items()}
 
     def clear_results(self) -> None:
         """Clear all results that were saved until now."""
@@ -113,6 +114,4 @@ class Simulation:
 
     def clear_state(self) -> None:
         """Clear all current state of the simulation, except for the results and iteration."""
-        self._registers.clear()
-        self._current_input_values = [0 for _ in range(self._sfg.input_count)]
-        self._latest_output_values = [0 for _ in range(self._sfg.output_count)]
+        self._delays.clear()
diff --git a/b_asic/special_operations.py b/b_asic/special_operations.py
index 14440eb098e7100e5399f3ec023b17bcab4ed458..8c15c55aed2ab7821467b51e84750a86743bde7a 100644
--- a/b_asic/special_operations.py
+++ b/b_asic/special_operations.py
@@ -6,7 +6,7 @@ TODO: More info.
 from numbers import Number
 from typing import Optional, Sequence
 
-from b_asic.operation import AbstractOperation, RegisterMap, MutableOutputMap, MutableRegisterMap
+from b_asic.operation import AbstractOperation, ResultKey, DelayMap, MutableResultMap, MutableDelayMap
 from b_asic.graph_component import Name, TypeName
 from b_asic.port import SignalSourceProvider
 
@@ -55,7 +55,7 @@ class Output(AbstractOperation):
         return None
 
 
-class Register(AbstractOperation):
+class Delay(AbstractOperation):
     """Unit delay operation.
     TODO: More info.
     """
@@ -67,17 +67,17 @@ class Register(AbstractOperation):
 
     @classmethod
     def type_name(cls) -> TypeName:
-        return "reg"
+        return "t"
 
     def evaluate(self, a):
         return self.param("initial_value")
 
-    def current_output(self, index: int, registers: Optional[RegisterMap] = None, prefix: str = "") -> Optional[Number]:
-        if registers is not None:
-            return registers.get(self.key(index, prefix), self.param("initial_value"))
+    def current_output(self, index: int, delays: Optional[DelayMap] = None, prefix: str = "") -> Optional[Number]:
+        if delays is not None:
+            return delays.get(self.key(index, prefix), self.param("initial_value"))
         return self.param("initial_value")
-
-    def evaluate_output(self, index: int, input_values: Sequence[Number], results: Optional[MutableOutputMap] = None, registers: Optional[MutableRegisterMap] = None, prefix: str = "") -> Number:
+    
+    def evaluate_output(self, index: int, input_values: Sequence[Number], results: Optional[MutableResultMap] = None, delays: Optional[MutableDelayMap] = None, prefix: str = "", bits_override: Optional[int] = None, truncate: bool = True) -> Number:
         if index != 0:
             raise IndexError(
                 f"Output index out of range (expected 0-0, got {index})")
@@ -87,9 +87,19 @@ class Register(AbstractOperation):
 
         key = self.key(index, prefix)
         value = self.param("initial_value")
-        if registers is not None:
-            value = registers.get(key, value)
-            registers[key] = self.truncate_inputs(input_values)[0]
+        if delays is not None:
+            value = delays.get(key, value)
+            delays[key] = self.truncate_inputs(input_values, bits_override)[0] if truncate else input_values[0]
         if results is not None:
             results[key] = value
         return value
+
+    @property
+    def initial_value(self) -> Number:
+        """Get the initial value of this delay."""
+        return self.param("initial_value")
+
+    @initial_value.setter
+    def initial_value(self, value: Number) -> None:
+        """Set the initial value of this delay."""
+        self.set_param("initial_value", value)
diff --git a/legacy/README.md b/legacy/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..746d1efdb57c04f4e52bb2ce7f0a7def99c744ed
--- /dev/null
+++ b/legacy/README.md
@@ -0,0 +1,11 @@
+# Legacy files
+
+This folder contains currently unused code that is kept for acedemic purposes,
+or to be used as a refererence for future development.
+
+## simulation_oop
+
+This folder contains a C++ implementation of the Simulation class designed
+using Object-Oriented Programming, as opposed to the current version that uses
+Data-Oriented Design. They are functionally identical, but use different
+styles of programming and have different performance characteristics.
\ No newline at end of file
diff --git a/legacy/simulation_oop/core_operations.h b/legacy/simulation_oop/core_operations.h
new file mode 100644
index 0000000000000000000000000000000000000000..0926572905053cdf5c762f73545642f1ffe3d4f8
--- /dev/null
+++ b/legacy/simulation_oop/core_operations.h
@@ -0,0 +1,236 @@
+#ifndef ASIC_SIMULATION_CORE_OPERATIONS_H
+#define ASIC_SIMULATION_CORE_OPERATIONS_H
+
+#define NOMINMAX
+#include "../debug.h"
+#include "../number.h"
+#include "operation.h"
+
+#include <algorithm>
+#include <cmath>
+#include <cstddef>
+#include <stdexcept>
+#include <utility>
+
+namespace asic {
+
+class constant_operation final : public abstract_operation {
+public:
+	constant_operation(result_key key, number value)
+		: abstract_operation(std::move(key))
+		, m_value(value) {}
+
+	[[nodiscard]] std::size_t output_count() const noexcept final {
+		return 1;
+	}
+
+private:
+	[[nodiscard]] number evaluate_output_impl(std::size_t, evaluation_context const&) const final {
+		ASIC_DEBUG_MSG("Evaluating constant.");
+		return m_value;
+	}
+
+	number m_value;
+};
+
+class addition_operation final : public binary_operation {
+public:
+	explicit addition_operation(result_key key)
+		: binary_operation(std::move(key)) {}
+
+	[[nodiscard]] std::size_t output_count() const noexcept final {
+		return 1;
+	}
+
+private:
+	[[nodiscard]] number evaluate_output_impl(std::size_t, evaluation_context const& context) const final {
+		ASIC_DEBUG_MSG("Evaluating addition.");
+		return this->evaluate_lhs(context) + this->evaluate_rhs(context);
+	}
+};
+
+class subtraction_operation final : public binary_operation {
+public:
+	explicit subtraction_operation(result_key key)
+		: binary_operation(std::move(key)) {}
+
+	[[nodiscard]] std::size_t output_count() const noexcept final {
+		return 1;
+	}
+
+private:
+	[[nodiscard]] number evaluate_output_impl(std::size_t, evaluation_context const& context) const final {
+		ASIC_DEBUG_MSG("Evaluating subtraction.");
+		return this->evaluate_lhs(context) - this->evaluate_rhs(context);
+	}
+};
+
+class multiplication_operation final : public binary_operation {
+public:
+	explicit multiplication_operation(result_key key)
+		: binary_operation(std::move(key)) {}
+
+	[[nodiscard]] std::size_t output_count() const noexcept final {
+		return 1;
+	}
+
+private:
+	[[nodiscard]] number evaluate_output_impl(std::size_t, evaluation_context const& context) const final {
+		ASIC_DEBUG_MSG("Evaluating multiplication.");
+		return this->evaluate_lhs(context) * this->evaluate_rhs(context);
+	}
+};
+
+class division_operation final : public binary_operation {
+public:
+	explicit division_operation(result_key key)
+		: binary_operation(std::move(key)) {}
+
+	[[nodiscard]] std::size_t output_count() const noexcept final {
+		return 1;
+	}
+
+private:
+	[[nodiscard]] number evaluate_output_impl(std::size_t, evaluation_context const& context) const final {
+		ASIC_DEBUG_MSG("Evaluating division.");
+		return this->evaluate_lhs(context) / this->evaluate_rhs(context);
+	}
+};
+
+class min_operation final : public binary_operation {
+public:
+	explicit min_operation(result_key key)
+		: binary_operation(std::move(key)) {}
+
+	[[nodiscard]] std::size_t output_count() const noexcept final {
+		return 1;
+	}
+
+private:
+	[[nodiscard]] number evaluate_output_impl(std::size_t, evaluation_context const& context) const final {
+		ASIC_DEBUG_MSG("Evaluating min.");
+		auto const lhs = this->evaluate_lhs(context);
+		if (lhs.imag() != 0) {
+			throw std::runtime_error{"Min does not support complex numbers."};
+		}
+		auto const rhs = this->evaluate_rhs(context);
+		if (rhs.imag() != 0) {
+			throw std::runtime_error{"Min does not support complex numbers."};
+		}
+		return std::min(lhs.real(), rhs.real());
+	}
+};
+
+class max_operation final : public binary_operation {
+public:
+	explicit max_operation(result_key key)
+		: binary_operation(std::move(key)) {}
+
+	[[nodiscard]] std::size_t output_count() const noexcept final {
+		return 1;
+	}
+
+private:
+	[[nodiscard]] number evaluate_output_impl(std::size_t, evaluation_context const& context) const final {
+		ASIC_DEBUG_MSG("Evaluating max.");
+		auto const lhs = this->evaluate_lhs(context);
+		if (lhs.imag() != 0) {
+			throw std::runtime_error{"Max does not support complex numbers."};
+		}
+		auto const rhs = this->evaluate_rhs(context);
+		if (rhs.imag() != 0) {
+			throw std::runtime_error{"Max does not support complex numbers."};
+		}
+		return std::max(lhs.real(), rhs.real());
+	}
+};
+
+class square_root_operation final : public unary_operation {
+public:
+	explicit square_root_operation(result_key key)
+		: unary_operation(std::move(key)) {}
+
+	[[nodiscard]] std::size_t output_count() const noexcept final {
+		return 1;
+	}
+
+private:
+	[[nodiscard]] number evaluate_output_impl(std::size_t, evaluation_context const& context) const final {
+		ASIC_DEBUG_MSG("Evaluating sqrt.");
+		return std::sqrt(this->evaluate_input(context));
+	}
+};
+
+class complex_conjugate_operation final : public unary_operation {
+public:
+	explicit complex_conjugate_operation(result_key key)
+		: unary_operation(std::move(key)) {}
+
+	[[nodiscard]] std::size_t output_count() const noexcept final {
+		return 1;
+	}
+
+private:
+	[[nodiscard]] number evaluate_output_impl(std::size_t, evaluation_context const& context) const final {
+		ASIC_DEBUG_MSG("Evaluating conj.");
+		return std::conj(this->evaluate_input(context));
+	}
+};
+
+class absolute_operation final : public unary_operation {
+public:
+	explicit absolute_operation(result_key key)
+		: unary_operation(std::move(key)) {}
+
+	[[nodiscard]] std::size_t output_count() const noexcept final {
+		return 1;
+	}
+
+private:
+	[[nodiscard]] number evaluate_output_impl(std::size_t, evaluation_context const& context) const final {
+		ASIC_DEBUG_MSG("Evaluating abs.");
+		return std::abs(this->evaluate_input(context));
+	}
+};
+
+class constant_multiplication_operation final : public unary_operation {
+public:
+	constant_multiplication_operation(result_key key, number value)
+		: unary_operation(std::move(key))
+		, m_value(value) {}
+
+	[[nodiscard]] std::size_t output_count() const noexcept final {
+		return 1;
+	}
+
+private:
+	[[nodiscard]] number evaluate_output_impl(std::size_t, evaluation_context const& context) const final {
+		ASIC_DEBUG_MSG("Evaluating cmul.");
+		return this->evaluate_input(context) * m_value;
+	}
+
+	number m_value;
+};
+
+class butterfly_operation final : public binary_operation {
+public:
+	explicit butterfly_operation(result_key key)
+		: binary_operation(std::move(key)) {}
+
+	[[nodiscard]] std::size_t output_count() const noexcept final {
+		return 2;
+	}
+
+private:
+	[[nodiscard]] number evaluate_output_impl(std::size_t index, evaluation_context const& context) const final {
+		ASIC_DEBUG_MSG("Evaluating bfly.");
+		if (index == 0) {
+			return this->evaluate_lhs(context) + this->evaluate_rhs(context);
+		}
+		return this->evaluate_lhs(context) - this->evaluate_rhs(context);
+	}
+};
+
+} // namespace asic
+
+#endif // ASIC_SIMULATION_CORE_OPERATIONS_H
\ No newline at end of file
diff --git a/legacy/simulation_oop/custom_operation.cpp b/legacy/simulation_oop/custom_operation.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..9153eb5b651f3a481747f3b652f790ce581e70dc
--- /dev/null
+++ b/legacy/simulation_oop/custom_operation.cpp
@@ -0,0 +1,30 @@
+#include "custom_operation.h"
+
+#include <pybind11/stl.h>
+
+namespace py = pybind11;
+
+namespace asic {
+
+custom_operation::custom_operation(result_key key, pybind11::object evaluate_output, pybind11::object truncate_input,
+								   std::size_t output_count)
+	: nary_operation(std::move(key))
+	, m_evaluate_output(std::move(evaluate_output))
+	, m_truncate_input(std::move(truncate_input))
+	, m_output_count(output_count) {}
+
+std::size_t custom_operation::output_count() const noexcept {
+	return m_output_count;
+}
+
+number custom_operation::evaluate_output_impl(std::size_t index, evaluation_context const& context) const {
+	using namespace pybind11::literals;
+	auto input_values = this->evaluate_inputs(context);
+	return m_evaluate_output(index, std::move(input_values), "truncate"_a = false).cast<number>();
+}
+
+number custom_operation::truncate_input(std::size_t index, number value, std::size_t bits) const {
+	return m_truncate_input(index, value, bits).cast<number>();
+}
+
+} // namespace asic
\ No newline at end of file
diff --git a/legacy/simulation_oop/custom_operation.h b/legacy/simulation_oop/custom_operation.h
new file mode 100644
index 0000000000000000000000000000000000000000..8a11aaacbc8c17500069522d9d8f56d9c416d804
--- /dev/null
+++ b/legacy/simulation_oop/custom_operation.h
@@ -0,0 +1,35 @@
+#ifndef ASIC_SIMULATION_CUSTOM_OPERATION_H
+#define ASIC_SIMULATION_CUSTOM_OPERATION_H
+
+#include "../algorithm.h"
+#include "../debug.h"
+#include "../number.h"
+#include "operation.h"
+
+#include <cstddef>
+#include <fmt/format.h>
+#include <functional>
+#include <pybind11/pybind11.h>
+#include <stdexcept>
+#include <utility>
+
+namespace asic {
+
+class custom_operation final : public nary_operation {
+public:
+	custom_operation(result_key key, pybind11::object evaluate_output, pybind11::object truncate_input, std::size_t output_count);
+
+	[[nodiscard]] std::size_t output_count() const noexcept final;
+
+private:
+	[[nodiscard]] number evaluate_output_impl(std::size_t index, evaluation_context const& context) const final;
+	[[nodiscard]] number truncate_input(std::size_t index, number value, std::size_t bits) const final;
+
+	pybind11::object m_evaluate_output;
+	pybind11::object m_truncate_input;
+	std::size_t m_output_count;
+};
+
+} // namespace asic
+
+#endif // ASIC_SIMULATION_CUSTOM_OPERATION_H
\ No newline at end of file
diff --git a/legacy/simulation_oop/operation.cpp b/legacy/simulation_oop/operation.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..a9738a0a05287f6ab2d430d4c73560a4c6bd57c5
--- /dev/null
+++ b/legacy/simulation_oop/operation.cpp
@@ -0,0 +1,156 @@
+#include "operation.h"
+
+#include "../debug.h"
+
+#include <pybind11/pybind11.h>
+
+namespace py = pybind11;
+
+namespace asic {
+
+signal_source::signal_source(std::shared_ptr<const operation> op, std::size_t index, std::optional<std::size_t> bits)
+	: m_operation(std::move(op))
+	, m_index(index)
+	, m_bits(std::move(bits)) {}
+
+signal_source::operator bool() const noexcept {
+	return static_cast<bool>(m_operation);
+}
+
+std::optional<number> signal_source::current_output(delay_map const& delays) const {
+	ASIC_ASSERT(m_operation);
+	return m_operation->current_output(m_index, delays);
+}
+
+number signal_source::evaluate_output(evaluation_context const& context) const {
+	ASIC_ASSERT(m_operation);
+	return m_operation->evaluate_output(m_index, context);
+}
+
+std::optional<std::size_t> signal_source::bits() const noexcept {
+	return m_bits;
+}
+
+abstract_operation::abstract_operation(result_key key)
+	: m_key(std::move(key)) {}
+
+std::optional<number> abstract_operation::current_output(std::size_t, delay_map const&) const {
+	return std::nullopt;
+}
+
+number abstract_operation::evaluate_output(std::size_t index, evaluation_context const& context) const {
+	ASIC_ASSERT(index < this->output_count());
+	ASIC_ASSERT(context.results);
+	auto const key = this->key_of_output(index);
+	if (auto const it = context.results->find(key); it != context.results->end()) {
+		if (it->second) {
+			return *it->second;
+		}
+		throw std::runtime_error{"Direct feedback loop detected when evaluating simulation operation."};
+	}
+	auto& result = context.results->try_emplace(key, this->current_output(index, *context.delays))
+					   .first->second; // Use a reference to avoid potential iterator invalidation caused by evaluate_output_impl.
+	auto const value = this->evaluate_output_impl(index, context);
+	ASIC_ASSERT(&context.results->at(key) == &result);
+	result = value;
+	return value;
+}
+
+number abstract_operation::truncate_input(std::size_t index, number value, std::size_t bits) const {
+	if (value.imag() != 0) {
+		throw py::type_error{
+			fmt::format("Complex value cannot be truncated to {} bits as requested by the signal connected to input #{}", bits, index)};
+	}
+	if (bits > 64) {
+		throw py::value_error{
+			fmt::format("Cannot truncate to {} (more than 64) bits as requested by the singal connected to input #{}", bits, index)};
+	}
+	return number{static_cast<number::value_type>(static_cast<std::int64_t>(value.real()) & ((std::int64_t{1} << bits) - 1))};
+}
+
+result_key const& abstract_operation::key_base() const {
+	return m_key;
+}
+
+result_key abstract_operation::key_of_output(std::size_t index) const {
+	if (m_key.empty()) {
+		return fmt::to_string(index);
+	}
+	if (this->output_count() == 1) {
+		return m_key;
+	}
+	return fmt::format("{}.{}", m_key, index);
+}
+
+unary_operation::unary_operation(result_key key)
+	: abstract_operation(std::move(key)) {}
+
+void unary_operation::connect(signal_source in) {
+	m_in = std::move(in);
+}
+
+bool unary_operation::connected() const noexcept {
+	return static_cast<bool>(m_in);
+}
+
+signal_source const& unary_operation::input() const noexcept {
+	return m_in;
+}
+
+number unary_operation::evaluate_input(evaluation_context const& context) const {
+	auto const value = m_in.evaluate_output(context);
+	auto const bits = context.bits_override.value_or(m_in.bits().value_or(0));
+	return (context.truncate && bits) ? this->truncate_input(0, value, bits) : value;
+}
+
+binary_operation::binary_operation(result_key key)
+	: abstract_operation(std::move(key)) {}
+
+void binary_operation::connect(signal_source lhs, signal_source rhs) {
+	m_lhs = std::move(lhs);
+	m_rhs = std::move(rhs);
+}
+
+signal_source const& binary_operation::lhs() const noexcept {
+	return m_lhs;
+}
+
+signal_source const& binary_operation::rhs() const noexcept {
+	return m_rhs;
+}
+
+number binary_operation::evaluate_lhs(evaluation_context const& context) const {
+	auto const value = m_lhs.evaluate_output(context);
+	auto const bits = context.bits_override.value_or(m_lhs.bits().value_or(0));
+	return (context.truncate && bits) ? this->truncate_input(0, value, bits) : value;
+}
+
+number binary_operation::evaluate_rhs(evaluation_context const& context) const {
+	auto const value = m_rhs.evaluate_output(context);
+	auto const bits = context.bits_override.value_or(m_rhs.bits().value_or(0));
+	return (context.truncate && bits) ? this->truncate_input(0, value, bits) : value;
+}
+
+nary_operation::nary_operation(result_key key)
+	: abstract_operation(std::move(key)) {}
+
+void nary_operation::connect(std::vector<signal_source> inputs) {
+	m_inputs = std::move(inputs);
+}
+
+span<signal_source const> nary_operation::inputs() const noexcept {
+	return m_inputs;
+}
+
+std::vector<number> nary_operation::evaluate_inputs(evaluation_context const& context) const {
+	auto values = std::vector<number>{};
+	values.reserve(m_inputs.size());
+	for (auto const& input : m_inputs) {
+		auto const value = input.evaluate_output(context);
+		auto const bits = context.bits_override.value_or(input.bits().value_or(0));
+		values.push_back((context.truncate && bits) ? this->truncate_input(0, value, bits) : value);
+	}
+	return values;
+}
+
+} // namespace asic
\ No newline at end of file
diff --git a/legacy/simulation_oop/operation.h b/legacy/simulation_oop/operation.h
new file mode 100644
index 0000000000000000000000000000000000000000..344eacc1482c40021b3d0ff686cbef5c71085f58
--- /dev/null
+++ b/legacy/simulation_oop/operation.h
@@ -0,0 +1,132 @@
+#ifndef ASIC_SIMULATION_OPERATION_H
+#define ASIC_SIMULATION_OPERATION_H
+
+#include "../number.h"
+#include "../span.h"
+
+#include <cstddef>
+#include <cstdint>
+#include <fmt/format.h>
+#include <memory>
+#include <optional>
+#include <stdexcept>
+#include <string>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+namespace asic {
+
+class operation;
+class signal_source;
+
+using result_key = std::string;
+using result_map = std::unordered_map<result_key, std::optional<number>>;
+using delay_map = std::unordered_map<result_key, number>;
+using delay_queue = std::vector<std::pair<result_key, signal_source const*>>;
+
+struct evaluation_context final {
+	result_map* results;
+	delay_map* delays;
+	delay_queue* deferred_delays;
+	std::optional<std::size_t> bits_override;
+	bool truncate;
+};
+
+class signal_source final {
+public:
+	signal_source() noexcept = default;
+	signal_source(std::shared_ptr<const operation> op, std::size_t index, std::optional<std::size_t> bits);
+
+	[[nodiscard]] explicit operator bool() const noexcept;
+
+	[[nodiscard]] std::optional<number> current_output(delay_map const& delays) const;
+	[[nodiscard]] number evaluate_output(evaluation_context const& context) const;
+
+	[[nodiscard]] std::optional<std::size_t> bits() const noexcept;
+
+private:
+	std::shared_ptr<const operation> m_operation;
+	std::size_t m_index = 0;
+	std::optional<std::size_t> m_bits;
+};
+
+class operation {
+public:
+	virtual ~operation() = default;
+	[[nodiscard]] virtual std::size_t output_count() const noexcept = 0;
+	[[nodiscard]] virtual std::optional<number> current_output(std::size_t index, delay_map const& delays) const = 0;
+	[[nodiscard]] virtual number evaluate_output(std::size_t index, evaluation_context const& context) const = 0;
+};
+
+class abstract_operation : public operation {
+public:
+	explicit abstract_operation(result_key key);
+	virtual ~abstract_operation() = default;
+
+	[[nodiscard]] std::optional<number> current_output(std::size_t, delay_map const&) const override;
+	[[nodiscard]] number evaluate_output(std::size_t index, evaluation_context const& context) const override;
+
+protected:
+	[[nodiscard]] virtual number evaluate_output_impl(std::size_t index, evaluation_context const& context) const = 0;
+	[[nodiscard]] virtual number truncate_input(std::size_t index, number value, std::size_t bits) const;
+
+	[[nodiscard]] result_key const& key_base() const;
+	[[nodiscard]] result_key key_of_output(std::size_t index) const;
+
+private:
+	result_key m_key;
+};
+
+class unary_operation : public abstract_operation {
+public:
+	explicit unary_operation(result_key key);
+	virtual ~unary_operation() = default;
+
+	void connect(signal_source in);
+
+protected:
+	[[nodiscard]] bool connected() const noexcept;
+	[[nodiscard]] signal_source const& input() const noexcept;
+	[[nodiscard]] number evaluate_input(evaluation_context const& context) const;
+
+private:
+	signal_source m_in;
+};
+
+class binary_operation : public abstract_operation {
+public:
+	explicit binary_operation(result_key key);
+	virtual ~binary_operation() = default;
+
+	void connect(signal_source lhs, signal_source rhs);
+
+protected:
+	[[nodiscard]] signal_source const& lhs() const noexcept;
+	[[nodiscard]] signal_source const& rhs() const noexcept;
+	[[nodiscard]] number evaluate_lhs(evaluation_context const& context) const;
+	[[nodiscard]] number evaluate_rhs(evaluation_context const& context) const;
+
+private:
+	signal_source m_lhs;
+	signal_source m_rhs;
+};
+
+class nary_operation : public abstract_operation {
+public:
+	explicit nary_operation(result_key key);
+	virtual ~nary_operation() = default;
+
+	void connect(std::vector<signal_source> inputs);
+
+protected:
+	[[nodiscard]] span<signal_source const> inputs() const noexcept;
+	[[nodiscard]] std::vector<number> evaluate_inputs(evaluation_context const& context) const;
+
+private:
+	std::vector<signal_source> m_inputs;
+};
+
+} // namespace asic
+
+#endif // ASIC_SIMULATION_OPERATION_H
\ No newline at end of file
diff --git a/legacy/simulation_oop/signal_flow_graph.cpp b/legacy/simulation_oop/signal_flow_graph.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..4c3763c81e8f97f22caf42a47d88c1186b9a874d
--- /dev/null
+++ b/legacy/simulation_oop/signal_flow_graph.cpp
@@ -0,0 +1,144 @@
+#include "signal_flow_graph.h"
+
+#include "../debug.h"
+
+namespace py = pybind11;
+
+namespace asic {
+
+signal_flow_graph_operation::signal_flow_graph_operation(result_key key)
+	: abstract_operation(std::move(key)) {}
+
+void signal_flow_graph_operation::create(pybind11::handle sfg, added_operation_cache& added) {
+	ASIC_DEBUG_MSG("Creating SFG.");
+	for (auto const& [i, op] : enumerate(sfg.attr("output_operations"))) {
+		ASIC_DEBUG_MSG("Adding output op.");
+		m_output_operations.emplace_back(this->key_of_output(i)).connect(make_source(op, 0, added, this->key_base()));
+	}
+	for (auto const& op : sfg.attr("input_operations")) {
+		ASIC_DEBUG_MSG("Adding input op.");
+		if (!m_input_operations.emplace_back(std::dynamic_pointer_cast<input_operation>(make_operation(op, added, this->key_base())))) {
+			throw py::value_error{"Invalid input operation in SFG."};
+		}
+	}
+}
+
+std::vector<std::shared_ptr<input_operation>> const& signal_flow_graph_operation::inputs() noexcept {
+	return m_input_operations;
+}
+
+std::size_t signal_flow_graph_operation::output_count() const noexcept {
+	return m_output_operations.size();
+}
+
+number signal_flow_graph_operation::evaluate_output(std::size_t index, evaluation_context const& context) const {
+	ASIC_DEBUG_MSG("Evaluating SFG.");
+	return m_output_operations.at(index).evaluate_output(0, context);
+}
+
+number signal_flow_graph_operation::evaluate_output_impl(std::size_t, evaluation_context const&) const {
+	return number{};
+}
+
+signal_source signal_flow_graph_operation::make_source(pybind11::handle op, std::size_t input_index, added_operation_cache& added,
+													   std::string_view prefix) {
+	auto const signal = py::object{op.attr("inputs")[py::int_{input_index}].attr("signals")[py::int_{0}]};
+	auto const src = py::handle{signal.attr("source")};
+	auto const operation = py::handle{src.attr("operation")};
+	auto const index = src.attr("index").cast<std::size_t>();
+	auto bits = std::optional<std::size_t>{};
+	if (!signal.attr("bits").is_none()) {
+		bits = signal.attr("bits").cast<std::size_t>();
+	}
+	return signal_source{make_operation(operation, added, prefix), index, bits};
+}
+
+std::shared_ptr<operation> signal_flow_graph_operation::add_signal_flow_graph_operation(pybind11::handle sfg, added_operation_cache& added,
+																						std::string_view prefix, result_key key) {
+	auto const new_op = add_operation<signal_flow_graph_operation>(sfg, added, std::move(key));
+	new_op->create(sfg, added);
+	for (auto&& [i, input] : enumerate(new_op->inputs())) {
+		input->connect(make_source(sfg, i, added, prefix));
+	}
+	return new_op;
+}
+
+std::shared_ptr<custom_operation> signal_flow_graph_operation::add_custom_operation(pybind11::handle op, added_operation_cache& added,
+																					std::string_view prefix, result_key key) {
+	auto const input_count = op.attr("input_count").cast<std::size_t>();
+	auto const output_count = op.attr("output_count").cast<std::size_t>();
+	auto const new_op = add_operation<custom_operation>(
+		op, added, key, op.attr("evaluate_output"), op.attr("truncate_input"), output_count);
+	auto inputs = std::vector<signal_source>{};
+	inputs.reserve(input_count);
+	for (auto const i : range(input_count)) {
+		inputs.push_back(make_source(op, i, added, prefix));
+	}
+	new_op->connect(std::move(inputs));
+	return new_op;
+}
+
+std::shared_ptr<operation> signal_flow_graph_operation::make_operation(pybind11::handle op, added_operation_cache& added,
+																	   std::string_view prefix) {
+	if (auto const it = added.find(op.ptr()); it != added.end()) {
+		ASIC_ASSERT(it->second);
+		return it->second;
+	}
+	auto const graph_id = op.attr("graph_id").cast<std::string_view>();
+	auto const type_name = op.attr("type_name")().cast<std::string_view>();
+	auto key = (prefix.empty()) ? result_key{graph_id} : fmt::format("{}.{}", prefix, graph_id);
+	if (type_name == "c") {
+		auto const value = op.attr("value").cast<number>();
+		return add_operation<constant_operation>(op, added, std::move(key), value);
+	}
+	if (type_name == "add") {
+		return add_binary_operation<addition_operation>(op, added, prefix, std::move(key));
+	}
+	if (type_name == "sub") {
+		return add_binary_operation<subtraction_operation>(op, added, prefix, std::move(key));
+	}
+	if (type_name == "mul") {
+		return add_binary_operation<multiplication_operation>(op, added, prefix, std::move(key));
+	}
+	if (type_name == "div") {
+		return add_binary_operation<division_operation>(op, added, prefix, std::move(key));
+	}
+	if (type_name == "min") {
+		return add_binary_operation<min_operation>(op, added, prefix, std::move(key));
+	}
+	if (type_name == "max") {
+		return add_binary_operation<max_operation>(op, added, prefix, std::move(key));
+	}
+	if (type_name == "sqrt") {
+		return add_unary_operation<square_root_operation>(op, added, prefix, std::move(key));
+	}
+	if (type_name == "conj") {
+		return add_unary_operation<complex_conjugate_operation>(op, added, prefix, std::move(key));
+	}
+	if (type_name == "abs") {
+		return add_unary_operation<absolute_operation>(op, added, prefix, std::move(key));
+	}
+	if (type_name == "cmul") {
+		auto const value = op.attr("value").cast<number>();
+		return add_unary_operation<constant_multiplication_operation>(op, added, prefix, std::move(key), value);
+	}
+	if (type_name == "bfly") {
+		return add_binary_operation<butterfly_operation>(op, added, prefix, std::move(key));
+	}
+	if (type_name == "in") {
+		return add_operation<input_operation>(op, added, std::move(key));
+	}
+	if (type_name == "out") {
+		return add_unary_operation<output_operation>(op, added, prefix, std::move(key));
+	}
+	if (type_name == "t") {
+		auto const initial_value = op.attr("initial_value").cast<number>();
+		return add_unary_operation<delay_operation>(op, added, prefix, std::move(key), initial_value);
+	}
+	if (type_name == "sfg") {
+		return add_signal_flow_graph_operation(op, added, prefix, std::move(key));
+	}
+	return add_custom_operation(op, added, prefix, std::move(key));
+}
+
+} // namespace asic
\ No newline at end of file
diff --git a/legacy/simulation_oop/signal_flow_graph.h b/legacy/simulation_oop/signal_flow_graph.h
new file mode 100644
index 0000000000000000000000000000000000000000..f06788249e367d3e8e0602f04c6dcf91c71b7a96
--- /dev/null
+++ b/legacy/simulation_oop/signal_flow_graph.h
@@ -0,0 +1,82 @@
+#ifndef ASIC_SIMULATION_SIGNAL_FLOW_GRAPH_H
+#define ASIC_SIMULATION_SIGNAL_FLOW_GRAPH_H
+
+#include "../algorithm.h"
+#include "../debug.h"
+#include "../number.h"
+#include "core_operations.h"
+#include "custom_operation.h"
+#include "operation.h"
+#include "special_operations.h"
+
+#include <Python.h>
+#include <cstddef>
+#include <fmt/format.h>
+#include <functional>
+#include <memory>
+#include <pybind11/pybind11.h>
+#include <stdexcept>
+#include <string_view>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+namespace asic {
+
+class signal_flow_graph_operation final : public abstract_operation {
+public:
+	using added_operation_cache = std::unordered_map<PyObject const*, std::shared_ptr<operation>>;
+
+	signal_flow_graph_operation(result_key key);
+
+	void create(pybind11::handle sfg, added_operation_cache& added);
+
+	[[nodiscard]] std::vector<std::shared_ptr<input_operation>> const& inputs() noexcept;
+	[[nodiscard]] std::size_t output_count() const noexcept final;
+
+	[[nodiscard]] number evaluate_output(std::size_t index, evaluation_context const& context) const final;
+
+private:
+	[[nodiscard]] number evaluate_output_impl(std::size_t index, evaluation_context const& context) const final;
+
+	[[nodiscard]] static signal_source make_source(pybind11::handle op, std::size_t input_index, added_operation_cache& added,
+												   std::string_view prefix);
+
+	template <typename Operation, typename... Args>
+	[[nodiscard]] static std::shared_ptr<Operation> add_operation(pybind11::handle op, added_operation_cache& added, Args&&... args) {
+		return std::static_pointer_cast<Operation>(
+			added.try_emplace(op.ptr(), std::make_shared<Operation>(std::forward<Args>(args)...)).first->second);
+	}
+
+	template <typename Operation, typename... Args>
+	[[nodiscard]] static std::shared_ptr<Operation> add_unary_operation(pybind11::handle op, added_operation_cache& added,
+																		std::string_view prefix, Args&&... args) {
+		auto const new_op = add_operation<Operation>(op, added, std::forward<Args>(args)...);
+		new_op->connect(make_source(op, 0, added, prefix));
+		return new_op;
+	}
+
+	template <typename Operation, typename... Args>
+	[[nodiscard]] static std::shared_ptr<Operation> add_binary_operation(pybind11::handle op, added_operation_cache& added,
+																		 std::string_view prefix, Args&&... args) {
+		auto const new_op = add_operation<Operation>(op, added, std::forward<Args>(args)...);
+		new_op->connect(make_source(op, 0, added, prefix), make_source(op, 1, added, prefix));
+		return new_op;
+	}
+
+	[[nodiscard]] static std::shared_ptr<operation> add_signal_flow_graph_operation(pybind11::handle sfg, added_operation_cache& added,
+																					std::string_view prefix, result_key key);
+
+	[[nodiscard]] static std::shared_ptr<custom_operation> add_custom_operation(pybind11::handle op, added_operation_cache& added,
+																				std::string_view prefix, result_key key);
+
+	[[nodiscard]] static std::shared_ptr<operation> make_operation(pybind11::handle op, added_operation_cache& added,
+																   std::string_view prefix);
+
+	std::vector<output_operation> m_output_operations;
+	std::vector<std::shared_ptr<input_operation>> m_input_operations;
+};
+
+} // namespace asic
+
+#endif // ASIC_SIMULATION_SIGNAL_FLOW_GRAPH_H
\ No newline at end of file
diff --git a/legacy/simulation_oop/simulation.cpp b/legacy/simulation_oop/simulation.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..4d6ff83337a6dade0a0d476e5942ee3b14178195
--- /dev/null
+++ b/legacy/simulation_oop/simulation.cpp
@@ -0,0 +1,138 @@
+#define NOMINMAX
+#include "simulation.h"
+
+#include "../debug.h"
+
+namespace py = pybind11;
+
+namespace asic {
+
+simulation::simulation(pybind11::handle sfg, std::optional<std::vector<std::optional<input_provider_t>>> input_providers)
+	: m_sfg("")
+	, m_input_functions(sfg.attr("input_count").cast<std::size_t>(), [](iteration_t) -> number { return number{}; }) {
+	if (input_providers) {
+		this->set_inputs(std::move(*input_providers));
+	}
+	auto added = signal_flow_graph_operation::added_operation_cache{};
+	m_sfg.create(sfg, added);
+}
+
+void simulation::set_input(std::size_t index, input_provider_t input_provider) {
+	if (index >= m_input_functions.size()) {
+		throw py::index_error{fmt::format("Input index out of range (expected 0-{}, got {})", m_input_functions.size() - 1, index)};
+	}
+	if (auto* const callable = std::get_if<input_function_t>(&input_provider)) {
+		m_input_functions[index] = std::move(*callable);
+	} else if (auto* const numeric = std::get_if<number>(&input_provider)) {
+		m_input_functions[index] = [value = *numeric](iteration_t) -> number {
+			return value;
+		};
+	} else if (auto* const list = std::get_if<std::vector<number>>(&input_provider)) {
+		if (!m_input_length) {
+			m_input_length = static_cast<iteration_t>(list->size());
+		} else if (*m_input_length != static_cast<iteration_t>(list->size())) {
+			throw py::value_error{fmt::format("Inconsistent input length for simulation (was {}, got {})", *m_input_length, list->size())};
+		}
+		m_input_functions[index] = [values = std::move(*list)](iteration_t n) -> number {
+			return values.at(n);
+		};
+	}
+}
+
+void simulation::set_inputs(std::vector<std::optional<input_provider_t>> input_providers) {
+	if (input_providers.size() != m_input_functions.size()) {
+		throw py::value_error{fmt::format(
+			"Wrong number of inputs supplied to simulation (expected {}, got {})", m_input_functions.size(), input_providers.size())};
+	}
+	for (auto&& [i, input_provider] : enumerate(input_providers)) {
+		if (input_provider) {
+			this->set_input(i, std::move(*input_provider));
+		}
+	}
+}
+
+std::vector<number> simulation::step(bool save_results, std::optional<std::size_t> bits_override, bool truncate) {
+	return this->run_for(1, save_results, bits_override, truncate);
+}
+
+std::vector<number> simulation::run_until(iteration_t iteration, bool save_results, std::optional<std::size_t> bits_override,
+										  bool truncate) {
+	auto result = std::vector<number>{};
+	while (m_iteration < iteration) {
+		ASIC_DEBUG_MSG("Running simulation iteration.");
+		for (auto&& [input, function] : zip(m_sfg.inputs(), m_input_functions)) {
+			input->value(function(m_iteration));
+		}
+
+		result.clear();
+		result.reserve(m_sfg.output_count());
+
+		auto results = result_map{};
+		auto deferred_delays = delay_queue{};
+		auto context = evaluation_context{};
+		context.results = &results;
+		context.delays = &m_delays;
+		context.deferred_delays = &deferred_delays;
+		context.bits_override = bits_override;
+		context.truncate = truncate;
+
+		for (auto const i : range(m_sfg.output_count())) {
+			result.push_back(m_sfg.evaluate_output(i, context));
+		}
+
+		while (!deferred_delays.empty()) {
+			auto new_deferred_delays = delay_queue{};
+			context.deferred_delays = &new_deferred_delays;
+			for (auto const& [key, src] : deferred_delays) {
+				ASIC_ASSERT(src);
+				m_delays[key] = src->evaluate_output(context);
+			}
+			deferred_delays = std::move(new_deferred_delays);
+		}
+
+		if (save_results) {
+			for (auto const& [key, value] : results) {
+				m_results[key].push_back(value.value());
+			}
+		}
+		++m_iteration;
+	}
+	return result;
+}
+
+std::vector<number> simulation::run_for(iteration_t iterations, bool save_results, std::optional<std::size_t> bits_override,
+										bool truncate) {
+	if (iterations > std::numeric_limits<iteration_t>::max() - m_iteration) {
+		throw py::value_error("Simulation iteration type overflow!");
+	}
+	return this->run_until(m_iteration + iterations, save_results, bits_override, truncate);
+}
+
+std::vector<number> simulation::run(bool save_results, std::optional<std::size_t> bits_override, bool truncate) {
+	if (m_input_length) {
+		return this->run_until(*m_input_length, save_results, bits_override, truncate);
+	}
+	throw py::index_error{"Tried to run unlimited simulation"};
+}
+
+iteration_t simulation::iteration() const noexcept {
+	return m_iteration;
+}
+
+pybind11::dict simulation::results() const noexcept {
+	auto results = py::dict{};
+	for (auto const& [key, values] : m_results) {
+		results[py::str{key}] = py::array{static_cast<py::ssize_t>(values.size()), values.data()};
+	}
+	return results;
+}
+
+void simulation::clear_results() noexcept {
+	m_results.clear();
+}
+
+void simulation::clear_state() noexcept {
+	m_delays.clear();
+}
+
+} // namespace asic
diff --git a/legacy/simulation_oop/simulation.h b/legacy/simulation_oop/simulation.h
new file mode 100644
index 0000000000000000000000000000000000000000..38e2771b877772bd28048cae16d791bb4e0b45e3
--- /dev/null
+++ b/legacy/simulation_oop/simulation.h
@@ -0,0 +1,66 @@
+#ifndef ASIC_SIMULATION_OOP_H
+#define ASIC_SIMULATION_OOP_H
+
+#include "../number.h"
+#include "core_operations.h"
+#include "custom_operation.h"
+#include "operation.h"
+#include "signal_flow_graph.h"
+#include "special_operations.h"
+
+#include <cstddef>
+#include <cstdint>
+#include <fmt/format.h>
+#include <functional>
+#include <limits>
+#include <memory>
+#include <optional>
+#include <pybind11/functional.h>
+#include <pybind11/numpy.h>
+#include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
+#include <string_view>
+#include <unordered_map>
+#include <utility>
+#include <variant>
+#include <vector>
+
+namespace asic {
+
+using iteration_t = std::uint32_t;
+using result_array_map = std::unordered_map<std::string, std::vector<number>>;
+using input_function_t = std::function<number(iteration_t)>;
+using input_provider_t = std::variant<number, std::vector<number>, input_function_t>;
+
+class simulation final {
+public:
+	simulation(pybind11::handle sfg, std::optional<std::vector<std::optional<input_provider_t>>> input_providers = std::nullopt);
+
+	void set_input(std::size_t index, input_provider_t input_provider);
+	void set_inputs(std::vector<std::optional<input_provider_t>> input_providers);
+
+	[[nodiscard]] std::vector<number> step(bool save_results, std::optional<std::size_t> bits_override, bool truncate);
+	[[nodiscard]] std::vector<number> run_until(iteration_t iteration, bool save_results, std::optional<std::size_t> bits_override,
+												bool truncate);
+	[[nodiscard]] std::vector<number> run_for(iteration_t iterations, bool save_results, std::optional<std::size_t> bits_override,
+											  bool truncate);
+	[[nodiscard]] std::vector<number> run(bool save_results, std::optional<std::size_t> bits_override, bool truncate);
+
+	[[nodiscard]] iteration_t iteration() const noexcept;
+	[[nodiscard]] pybind11::dict results() const noexcept;
+
+	void clear_results() noexcept;
+	void clear_state() noexcept;
+
+private:
+	signal_flow_graph_operation m_sfg;
+	result_array_map m_results;
+	delay_map m_delays;
+	iteration_t m_iteration = 0;
+	std::vector<input_function_t> m_input_functions;
+	std::optional<iteration_t> m_input_length;
+};
+
+} // namespace asic
+
+#endif // ASIC_SIMULATION_OOP_H
\ No newline at end of file
diff --git a/legacy/simulation_oop/special_operations.cpp b/legacy/simulation_oop/special_operations.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..1f7a6519a90ba08224585e36093694becf495c4d
--- /dev/null
+++ b/legacy/simulation_oop/special_operations.cpp
@@ -0,0 +1,78 @@
+#include "special_operations.h"
+
+#include "../debug.h"
+
+namespace asic {
+
+input_operation::input_operation(result_key key)
+	: unary_operation(std::move(key)) {}
+
+std::size_t input_operation::output_count() const noexcept {
+	return 1;
+}
+
+number input_operation::value() const noexcept {
+	return m_value;
+}
+
+void input_operation::value(number value) noexcept {
+	m_value = value;
+}
+
+number input_operation::evaluate_output_impl(std::size_t, evaluation_context const& context) const {
+	ASIC_DEBUG_MSG("Evaluating input.");
+	if (this->connected()) {
+		return this->evaluate_input(context);
+	}
+	return m_value;
+}
+
+output_operation::output_operation(result_key key)
+	: unary_operation(std::move(key)) {}
+
+std::size_t output_operation::output_count() const noexcept {
+	return 1;
+}
+
+number output_operation::evaluate_output_impl(std::size_t, evaluation_context const& context) const {
+	ASIC_DEBUG_MSG("Evaluating output.");
+	return this->evaluate_input(context);
+}
+
+delay_operation::delay_operation(result_key key, number initial_value)
+	: unary_operation(std::move(key))
+	, m_initial_value(initial_value) {}
+
+std::size_t delay_operation::output_count() const noexcept {
+	return 1;
+}
+
+std::optional<number> delay_operation::current_output(std::size_t index, delay_map const& delays) const {
+	auto const key = this->key_of_output(index);
+	if (auto const it = delays.find(key); it != delays.end()) {
+		return it->second;
+	}
+	return m_initial_value;
+}
+
+number delay_operation::evaluate_output(std::size_t index, evaluation_context const& context) const {
+	ASIC_DEBUG_MSG("Evaluating delay.");
+	ASIC_ASSERT(index == 0);
+	ASIC_ASSERT(context.results);
+	ASIC_ASSERT(context.delays);
+	ASIC_ASSERT(context.deferred_delays);
+	auto key = this->key_of_output(index);
+	auto const value = context.delays->try_emplace(key, m_initial_value).first->second;
+	auto const& [it, inserted] = context.results->try_emplace(key, value);
+	if (inserted) {
+		context.deferred_delays->emplace_back(std::move(key), &this->input());
+		return value;
+	}
+	return it->second.value();
+}
+
+[[nodiscard]] number delay_operation::evaluate_output_impl(std::size_t, evaluation_context const&) const {
+	return number{};
+}
+
+} // namespace asic
diff --git a/legacy/simulation_oop/special_operations.h b/legacy/simulation_oop/special_operations.h
new file mode 100644
index 0000000000000000000000000000000000000000..88fb087e84378e36f423364d2c7d83a083828784
--- /dev/null
+++ b/legacy/simulation_oop/special_operations.h
@@ -0,0 +1,55 @@
+#ifndef ASIC_SIMULATION_SPECIAL_OPERATIONS_H
+#define ASIC_SIMULATION_SPECIAL_OPERATIONS_H
+
+#include "../debug.h"
+#include "../number.h"
+#include "operation.h"
+
+#include <cassert>
+#include <cstddef>
+#include <utility>
+
+namespace asic {
+
+class input_operation final : public unary_operation {
+public:
+	explicit input_operation(result_key key);
+
+	[[nodiscard]] std::size_t output_count() const noexcept final;
+	[[nodiscard]] number value() const noexcept;
+	void value(number value) noexcept;
+
+private:
+	[[nodiscard]] number evaluate_output_impl(std::size_t index, evaluation_context const& context) const final;
+
+	number m_value{};
+};
+
+class output_operation final : public unary_operation {
+public:
+	explicit output_operation(result_key key);
+
+	[[nodiscard]] std::size_t output_count() const noexcept final;
+
+private:
+	[[nodiscard]] number evaluate_output_impl(std::size_t index, evaluation_context const& context) const final;
+};
+
+class delay_operation final : public unary_operation {
+public:
+	delay_operation(result_key key, number initial_value);
+
+	[[nodiscard]] std::size_t output_count() const noexcept final;
+
+	[[nodiscard]] std::optional<number> current_output(std::size_t index, delay_map const& delays) const final;
+	[[nodiscard]] number evaluate_output(std::size_t index, evaluation_context const& context) const final;
+
+private:
+	[[nodiscard]] number evaluate_output_impl(std::size_t index, evaluation_context const& context) const final;
+
+	number m_initial_value;
+};
+
+} // namespace asic
+
+#endif // ASIC_SIMULATION_SPECIAL_OPERATIONS_H
\ No newline at end of file
diff --git a/setup.py b/setup.py
index f91d34d54336708275a28b1f9f02ea1b2d80141f..0568514756180c02b81a7b323789fb72a6e5bf26 100644
--- a/setup.py
+++ b/setup.py
@@ -6,7 +6,7 @@ import setuptools
 from setuptools import Extension
 from setuptools.command.build_ext import build_ext
 
-CMAKE_EXE = os.environ.get('CMAKE_EXE', shutil.which('cmake'))
+CMAKE_EXE = os.environ.get("CMAKE_EXE", shutil.which("cmake"))
 
 class CMakeExtension(Extension):
     def __init__(self, name, sourcedir = ""):
@@ -25,6 +25,7 @@ class CMakeBuild(build_ext):
         cmake_output_dir = os.path.abspath(os.path.dirname(self.get_ext_fullpath(ext.name)))
         cmake_configure_argv = [
             CMAKE_EXE, ext.sourcedir,
+            "-DASIC_BUILDING_PYTHON_DISTRIBUTION=true",
             "-DCMAKE_BUILD_TYPE=" + cmake_build_type,
             "-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=" + cmake_output_dir,
             "-DPYTHON_EXECUTABLE=" + sys.executable,
diff --git a/src/algorithm.h b/src/algorithm.h
new file mode 100644
index 0000000000000000000000000000000000000000..c86275d1c4ef09a525372d38a4c07a0beb11e8c4
--- /dev/null
+++ b/src/algorithm.h
@@ -0,0 +1,325 @@
+#ifndef ASIC_ALGORITHM_H
+#define ASIC_ALGORITHM_H
+
+#include <cstddef>
+#include <iterator>
+#include <memory>
+#include <type_traits>
+#include <utility>
+
+namespace asic {
+namespace detail {
+
+template <typename Reference>
+class arrow_proxy final {
+public:
+	template <typename Ref>
+	constexpr explicit arrow_proxy(Ref&& r) : m_r(std::forward<Ref>(r)) {}
+
+	Reference* operator->() {
+		return std::addressof(m_r);
+	}
+
+private:
+	Reference m_r;
+};
+
+template <typename T>
+struct range_view final {
+	class iterator final {
+	public:
+		using difference_type = std::ptrdiff_t;
+		using value_type = T const;
+		using reference = value_type&;
+		using pointer = value_type*;
+		using iterator_category = std::random_access_iterator_tag;
+
+		constexpr iterator() noexcept = default;
+		constexpr explicit iterator(T value) noexcept : m_value(value) {}
+
+		[[nodiscard]] constexpr bool operator==(iterator const& other) const noexcept {
+			return m_value == other.m_value;
+		}
+
+		[[nodiscard]] constexpr bool operator!=(iterator const& other) const noexcept {
+			return m_value != other.m_value;
+		}
+
+		[[nodiscard]] constexpr bool operator<(iterator const& other) const noexcept {
+			return m_value < other.m_value;
+		}
+
+		[[nodiscard]] constexpr bool operator>(iterator const& other) const noexcept {
+			return m_value > other.m_value;
+		}
+
+		[[nodiscard]] constexpr bool operator<=(iterator const& other) const noexcept {
+			return m_value <= other.m_value;
+		}
+
+		[[nodiscard]] constexpr bool operator>=(iterator const& other) const noexcept {
+			return m_value >= other.m_value;
+		}
+
+		[[nodiscard]] constexpr reference operator*() const noexcept {
+			return m_value;
+		}
+
+		[[nodiscard]] constexpr pointer operator->() const noexcept {
+			return std::addressof(**this);
+		}
+
+		constexpr iterator& operator++() noexcept {
+			++m_value;
+			return *this;
+		}
+
+		constexpr iterator operator++(int) noexcept {
+			return iterator{m_value++};
+		}
+
+		constexpr iterator& operator--() noexcept {
+			--m_value;
+			return *this;
+		}
+
+		constexpr iterator operator--(int) noexcept {
+			return iterator{m_value--};
+		}
+
+		constexpr iterator& operator+=(difference_type n) noexcept {
+			m_value += n;
+			return *this;
+		}
+
+		constexpr iterator& operator-=(difference_type n) noexcept {
+			m_value -= n;
+			return *this;
+		}
+
+		[[nodiscard]] constexpr T operator[](difference_type n) noexcept {
+			return m_value + static_cast<T>(n);
+		}
+
+		[[nodiscard]] constexpr friend iterator operator+(iterator const& lhs, difference_type rhs) noexcept {
+			return iterator{lhs.m_value + rhs};
+		}
+
+		[[nodiscard]] constexpr friend iterator operator+(difference_type lhs, iterator const& rhs) noexcept {
+			return iterator{lhs + rhs.m_value};
+		}
+
+		[[nodiscard]] constexpr friend iterator operator-(iterator const& lhs, difference_type rhs) noexcept {
+			return iterator{lhs.m_value - rhs};
+		}
+
+		[[nodiscard]] constexpr friend difference_type operator-(iterator const& lhs, iterator const& rhs) noexcept {
+			return static_cast<difference_type>(lhs.m_value - rhs.m_value);
+		}
+
+	private:
+		T m_value{};
+	};
+
+	using sentinel = iterator;
+
+	template <typename First, typename Last>
+	constexpr range_view(First&& first, Last&& last) noexcept : m_begin(std::forward<First>(first)), m_end(std::forward<Last>(last)) {}
+
+	[[nodiscard]] constexpr iterator begin() const noexcept {
+		return m_begin;
+	}
+	[[nodiscard]] constexpr sentinel end() const noexcept {
+		return m_end;
+	}
+
+	iterator m_begin;
+	sentinel m_end;
+};
+
+template <typename Range, typename Iterator, typename Sentinel>
+struct enumerate_view final {
+	using sentinel = Sentinel;
+
+	class iterator final {
+	public:
+		using difference_type = typename std::iterator_traits<Iterator>::difference_type;
+		using value_type = typename std::iterator_traits<Iterator>::value_type;
+		using reference = std::pair<std::size_t const&, decltype(*std::declval<Iterator const>())>;
+		using pointer = arrow_proxy<reference>;
+		using iterator_category =
+			std::common_type_t<typename std::iterator_traits<Iterator>::iterator_category, std::bidirectional_iterator_tag>;
+
+		constexpr iterator() = default;
+
+		constexpr iterator(Iterator it, std::size_t index) : m_it(std::move(it)), m_index(index) {}
+
+		[[nodiscard]] constexpr bool operator==(iterator const& other) const {
+			return m_it == other.m_it;
+		}
+
+		[[nodiscard]] constexpr bool operator!=(iterator const& other) const {
+			return m_it != other.m_it;
+		}
+
+		[[nodiscard]] constexpr bool operator==(sentinel const& other) const {
+			return m_it == other;
+		}
+
+		[[nodiscard]] constexpr bool operator!=(sentinel const& other) const {
+			return m_it != other;
+		}
+
+		[[nodiscard]] constexpr reference operator*() const {
+			return reference{m_index, *m_it};
+		}
+
+		[[nodiscard]] constexpr pointer operator->() const {
+			return pointer{**this};
+		}
+
+		constexpr iterator& operator++() {
+			++m_it;
+			++m_index;
+			return *this;
+		}
+
+		constexpr iterator operator++(int) {
+			return iterator{m_it++, m_index++};
+		}
+
+		constexpr iterator& operator--() {
+			--m_it;
+			--m_index;
+			return *this;
+		}
+
+		constexpr iterator operator--(int) {
+			return iterator{m_it--, m_index--};
+		}
+
+	private:
+		Iterator m_it;
+		std::size_t m_index = 0;
+	};
+
+	constexpr iterator begin() const {
+		return iterator{std::begin(m_range), 0};
+	}
+
+	constexpr sentinel end() const {
+		return std::end(m_range);
+	}
+
+	Range m_range;
+};
+
+template <typename Range1, typename Range2, typename Iterator1, typename Iterator2, typename Sentinel1, typename Sentinel2>
+struct zip_view final {
+	using sentinel = std::pair<Sentinel1, Sentinel2>;
+
+	class iterator final {
+	public:
+		using difference_type = std::common_type_t<typename std::iterator_traits<Iterator1>::difference_type,
+			typename std::iterator_traits<Iterator2>::difference_type>;
+		using value_type =
+			std::pair<typename std::iterator_traits<Iterator1>::value_type, typename std::iterator_traits<Iterator2>::value_type>;
+		using reference = std::pair<decltype(*std::declval<Iterator1 const>()), decltype(*std::declval<Iterator2 const>())>;
+		using pointer = arrow_proxy<reference>;
+		using iterator_category = std::common_type_t<typename std::iterator_traits<Iterator1>::iterator_category,
+			typename std::iterator_traits<Iterator2>::iterator_category, std::bidirectional_iterator_tag>;
+
+		constexpr iterator() = default;
+
+		constexpr iterator(Iterator1 it1, Iterator2 it2) : m_it1(std::move(it1)), m_it2(std::move(it2)) {}
+
+		[[nodiscard]] constexpr bool operator==(iterator const& other) const {
+			return m_it1 == other.m_it1 && m_it2 == other.m_it2;
+		}
+
+		[[nodiscard]] constexpr bool operator!=(iterator const& other) const {
+			return !(*this == other);
+		}
+
+		[[nodiscard]] constexpr bool operator==(sentinel const& other) const {
+			return m_it1 == other.first || m_it2 == other.second;
+		}
+
+		[[nodiscard]] constexpr bool operator!=(sentinel const& other) const {
+			return !(*this == other);
+		}
+
+		[[nodiscard]] constexpr reference operator*() const {
+			return reference{*m_it1, *m_it2};
+		}
+
+		[[nodiscard]] constexpr pointer operator->() const {
+			return pointer{**this};
+		}
+
+		constexpr iterator& operator++() {
+			++m_it1;
+			++m_it2;
+			return *this;
+		}
+
+		constexpr iterator operator++(int) {
+			return iterator{m_it1++, m_it2++};
+		}
+
+		constexpr iterator& operator--() {
+			--m_it1;
+			--m_it2;
+			return *this;
+		}
+
+		constexpr iterator operator--(int) {
+			return iterator{m_it1--, m_it2--};
+		}
+
+	private:
+		Iterator1 m_it1;
+		Iterator2 m_it2;
+	};
+
+	constexpr iterator begin() const {
+		return iterator{std::begin(m_range1), std::begin(m_range2)};
+	}
+
+	constexpr sentinel end() const {
+		return sentinel{std::end(m_range1), std::end(m_range2)};
+	}
+
+	Range1 m_range1;
+	Range2 m_range2;
+};
+
+} // namespace detail
+
+template <typename First, typename Last, typename T = std::remove_cv_t<std::remove_reference_t<First>>>
+[[nodiscard]] constexpr auto range(First&& first, Last&& last) {
+	return detail::range_view<T>{std::forward<First>(first), std::forward<Last>(last)};
+}
+
+template <typename Last, typename T = std::remove_cv_t<std::remove_reference_t<Last>>>
+[[nodiscard]] constexpr auto range(Last&& last) {
+	return detail::range_view<T>{T{}, std::forward<Last>(last)};
+}
+
+template <typename Range, typename Iterator = decltype(std::begin(std::declval<Range>())),
+	typename Sentinel = decltype(std::end(std::declval<Range>()))>
+[[nodiscard]] constexpr auto enumerate(Range&& range) {
+	return detail::enumerate_view<Range, Iterator, Sentinel>{std::forward<Range>(range)};
+}
+
+template <typename Range1, typename Range2, typename Iterator1 = decltype(std::begin(std::declval<Range1>())),
+	typename Iterator2 = decltype(std::begin(std::declval<Range2>())), typename Sentinel1 = decltype(std::end(std::declval<Range1>())),
+	typename Sentinel2 = decltype(std::end(std::declval<Range2>()))>
+[[nodiscard]] constexpr auto zip(Range1&& range1, Range2&& range2) {
+	return detail::zip_view<Range1, Range2, Iterator1, Iterator2, Sentinel1, Sentinel2>{
+		std::forward<Range1>(range1), std::forward<Range2>(range2)};
+}
+
+} // namespace asic
+
+#endif // ASIC_ALGORITHM_H
\ No newline at end of file
diff --git a/src/debug.h b/src/debug.h
new file mode 100644
index 0000000000000000000000000000000000000000..a11aa057db644dbe2d29399398a1f48ca599876f
--- /dev/null
+++ b/src/debug.h
@@ -0,0 +1,80 @@
+#ifndef ASIC_DEBUG_H
+#define ASIC_DEBUG_H
+
+#ifndef NDEBUG
+#define ASIC_ENABLE_DEBUG_LOGGING 1
+#define ASIC_ENABLE_ASSERTS 1
+#else
+#define ASIC_ENABLE_DEBUG_LOGGING 0
+#define ASIC_ENABLE_ASSERTS 0
+#endif // NDEBUG
+
+#if ASIC_ENABLE_DEBUG_LOGGING
+#include <filesystem>
+#include <fmt/format.h>
+#include <fstream>
+#include <ostream>
+#include <string_view>
+#include <utility>
+#endif // ASIC_ENABLE_DEBUG_LOGGING
+
+#if ASIC_ENABLE_ASSERTS
+#include <filesystem>
+#include <cstdlib>
+#include <cstdio>
+#include <string_view>
+#include <fmt/format.h>
+#endif // ASIC_ENABLE_ASSERTS
+
+namespace asic {
+
+constexpr auto debug_log_filename = "_b_asic_debug_log.txt";
+
+namespace detail {
+
+#if ASIC_ENABLE_DEBUG_LOGGING
+inline void log_debug_msg_string(std::string_view file, int line, std::string_view string) {
+	static auto log_file = std::ofstream{debug_log_filename, std::ios::trunc};
+	log_file << fmt::format("{:<40}: {}", fmt::format("{}:{}", std::filesystem::path{file}.filename().generic_string(), line), string)
+			 << std::endl;
+}
+
+template <typename Format, typename... Args>
+inline void log_debug_msg(std::string_view file, int line, Format&& format, Args&&... args) {
+	log_debug_msg_string(file, line, fmt::format(std::forward<Format>(format), std::forward<Args>(args)...));
+}
+#endif // ASIC_ENABLE_DEBUG_LOGGING
+
+#if ASIC_ENABLE_ASSERTS
+inline void fail_assert(std::string_view file, int line, std::string_view condition_string) {
+#if ASIC_ENABLE_DEBUG_LOGGING
+	log_debug_msg(file, line, "Assertion failed: {}", condition_string);
+#endif // ASIC_ENABLE_DEBUG_LOGGING
+	fmt::print(stderr, "{}:{}: Assertion failed: {}\n", std::filesystem::path{file}.filename().generic_string(), line, condition_string);
+	std::abort();
+}
+
+template <typename BoolConvertible>
+inline void check_assert(std::string_view file, int line, std::string_view condition_string, BoolConvertible&& condition) {
+	if (!static_cast<bool>(condition)) {
+		fail_assert(file, line, condition_string);
+	}
+}
+#endif // ASIC_ENABLE_ASSERTS
+
+} // namespace detail
+} // namespace asic
+
+#if ASIC_ENABLE_DEBUG_LOGGING
+#define ASIC_DEBUG_MSG(...) (asic::detail::log_debug_msg(__FILE__, __LINE__, __VA_ARGS__))
+#else
+#define ASIC_DEBUG_MSG(...) ((void)0)
+#endif // ASIC_ENABLE_DEBUG_LOGGING
+
+#if ASIC_ENABLE_ASSERTS
+#define ASIC_ASSERT(condition) (asic::detail::check_assert(__FILE__, __LINE__, #condition, (condition)))
+#else
+#define ASIC_ASSERT(condition) ((void)0)
+#endif
+
+#endif // ASIC_DEBUG_H
\ No newline at end of file
diff --git a/src/main.cpp b/src/main.cpp
index bc4e83c69e7d331bbacfa37d8b22baec35833682..f5c4be532aa47468592e2e2f008308d1724e41b8 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -1,21 +1,9 @@
+#include "simulation.h"
 #include <pybind11/pybind11.h>
 
 namespace py = pybind11;
 
-namespace asic {
-
-int add(int a, int b) {
-	return a + b;
-}
-
-int sub(int a, int b) {
-	return a - b;
-}
-
-} // namespace asic
-
-PYBIND11_MODULE(_b_asic, m) {
-	m.doc() = "Better ASIC Toolbox Extension Module.";
-	m.def("add", &asic::add, "A function which adds two numbers.", py::arg("a"), py::arg("b"));
-	m.def("sub", &asic::sub, "A function which subtracts two numbers.", py::arg("a"), py::arg("b"));
+PYBIND11_MODULE(_b_asic, module) {
+	module.doc() = "Better ASIC Toolbox Extension Module.";
+	asic::define_simulation_class(module);
 }
\ No newline at end of file
diff --git a/src/number.h b/src/number.h
new file mode 100644
index 0000000000000000000000000000000000000000..9cb5b42f53be4eb0cfcc86d00be65005147384e2
--- /dev/null
+++ b/src/number.h
@@ -0,0 +1,13 @@
+#ifndef ASIC_NUMBER_H
+#define ASIC_NUMBER_H
+
+#include <complex>
+#include <pybind11/complex.h>
+
+namespace asic {
+
+using number = std::complex<double>;
+
+} // namespace asic
+
+#endif // ASIC_NUMBER_H
\ No newline at end of file
diff --git a/src/simulation.cpp b/src/simulation.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..33280f604be77614f7eadf1ad40d868177dd6e95
--- /dev/null
+++ b/src/simulation.cpp
@@ -0,0 +1,61 @@
+#include "simulation.h"
+#include "simulation/simulation.h"
+
+namespace py = pybind11;
+
+namespace asic {
+
+void define_simulation_class(pybind11::module& module) {
+	// clang-format off
+	py::class_<simulation>(module, "FastSimulation")
+		.def(py::init<py::handle>(),
+			py::arg("sfg"),
+			"SFG Constructor.")
+
+		.def(py::init<py::handle, std::optional<std::vector<std::optional<input_provider_t>>>>(),
+			py::arg("sfg"), py::arg("input_providers"),
+			"SFG Constructor.")
+
+		.def("set_input", &simulation::set_input,
+			py::arg("index"), py::arg("input_provider"),
+			"Set the input function used to get values for the specific input at the given index to the internal SFG.")
+
+		.def("set_inputs", &simulation::set_inputs,
+			py::arg("input_providers"),
+			"Set the input functions used to get values for the inputs to the internal SFG.")
+
+		.def("step", &simulation::step,
+			py::arg("save_results") = true, py::arg("bits_override") = py::none{}, py::arg("truncate") = true,
+			"Run one iteration of the simulation and return the resulting output values.")
+
+		.def("run_until", &simulation::run_until,
+			py::arg("iteration"), py::arg("save_results") = true, py::arg("bits_override") = py::none{}, py::arg("truncate") = true,
+			"Run the simulation until its iteration is greater than or equal to the given iteration\n"
+			"and return the output values of the last iteration.")
+
+		.def("run_for", &simulation::run_for,
+			py::arg("iterations"), py::arg("save_results") = true, py::arg("bits_override") = py::none{}, py::arg("truncate") = true,
+			"Run a given number of iterations of the simulation and return the output values of the last iteration.")
+
+		.def("run", &simulation::run,
+			py::arg("save_results") = true, py::arg("bits_override") = py::none{}, py::arg("truncate") = true,
+			"Run the simulation until the end of its input arrays and return the output values of the last iteration.")
+
+		.def_property_readonly("iteration", &simulation::iteration,
+			"Get the current iteration number of the simulation.")
+
+		.def_property_readonly("results", &simulation::results,
+			"Get a mapping from result keys to numpy arrays containing all results, including intermediate values,\n"
+			"calculated for each iteration up until now that was run with save_results enabled.\n"
+			"The mapping is indexed using the key() method of Operation with the appropriate output index.\n"
+			"Example result after 3 iterations: {\"c1\": [3, 6, 7], \"c2\": [4, 5, 5], \"bfly1.0\": [7, 0, 0], \"bfly1.1\": [-1, 0, 2], \"0\": [7, -2, -1]}")
+
+		.def("clear_results", &simulation::clear_results,
+			"Clear all results that were saved until now.")
+
+		.def("clear_state", &simulation::clear_state,
+			"Clear all current state of the simulation, except for the results and iteration.");
+	// clang-format on
+}
+
+} // namespace asic
\ No newline at end of file
diff --git a/src/simulation.h b/src/simulation.h
new file mode 100644
index 0000000000000000000000000000000000000000..aefa3a4e92b861b9c7b795c7301f900dc54ace6f
--- /dev/null
+++ b/src/simulation.h
@@ -0,0 +1,12 @@
+#ifndef ASIC_SIMULATION_H
+#define ASIC_SIMULATION_H
+
+#include <pybind11/pybind11.h>
+
+namespace asic {
+
+void define_simulation_class(pybind11::module& module);
+
+} // namespace asic
+
+#endif // ASIC_SIMULATION_H
\ No newline at end of file
diff --git a/src/simulation/compile.cpp b/src/simulation/compile.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..1f93b921b4000b3c776284714cbe9a07e49fc504
--- /dev/null
+++ b/src/simulation/compile.cpp
@@ -0,0 +1,313 @@
+#define NOMINMAX
+#include "compile.h"
+
+#include "../algorithm.h"
+#include "../debug.h"
+#include "../span.h"
+#include "format_code.h"
+
+#include <Python.h>
+#include <fmt/format.h>
+#include <limits>
+#include <optional>
+#include <string_view>
+#include <tuple>
+#include <unordered_map>
+#include <utility>
+
+namespace py = pybind11;
+
+namespace asic {
+
+[[nodiscard]] static result_key key_base(py::handle op, std::string_view prefix) {
+	auto const graph_id = op.attr("graph_id").cast<std::string_view>();
+	return (prefix.empty()) ? result_key{graph_id} : fmt::format("{}.{}", prefix, graph_id);
+}
+
+[[nodiscard]] static result_key key_of_output(py::handle op, std::size_t output_index, std::string_view prefix) {
+	auto const base = key_base(op, prefix);
+	if (base.empty()) {
+		return fmt::to_string(output_index);
+	}
+	if (op.attr("output_count").cast<std::size_t>() == 1) {
+		return base;
+	}
+	return fmt::format("{}.{}", base, output_index);
+}
+
+class compiler final {
+public:
+	simulation_code compile(py::handle sfg) {
+		ASIC_DEBUG_MSG("Compiling code...");
+		this->initialize_code(sfg.attr("input_count").cast<std::size_t>(), sfg.attr("output_count").cast<std::size_t>());
+		auto deferred_delays = delay_queue{};
+		this->add_outputs(sfg, deferred_delays);
+		this->add_deferred_delays(std::move(deferred_delays));
+		this->resolve_invalid_result_indices();
+		ASIC_DEBUG_MSG("Compiled code:\n{}\n", format_compiled_simulation_code(m_code));
+		return std::move(m_code);
+	}
+
+private:
+	struct sfg_info final {
+		py::handle sfg;
+		std::size_t prefix_length;
+
+		sfg_info(py::handle sfg, std::size_t prefix_length)
+			: sfg(sfg)
+			, prefix_length(prefix_length) {}
+
+		[[nodiscard]] std::size_t find_input_operation_index(py::handle op) const {
+			for (auto const& [i, in] : enumerate(sfg.attr("input_operations"))) {
+				if (in.is(op)) {
+					return i;
+				}
+			}
+			throw py::value_error{"Stray Input operation in simulation SFG"};
+		}
+	};
+
+	using sfg_info_stack = std::vector<sfg_info>;
+	using delay_queue = std::vector<std::tuple<std::size_t, py::handle, std::string, sfg_info_stack>>;
+	using added_output_cache = std::unordered_set<PyObject const*>;
+	using added_result_cache = std::unordered_map<PyObject const*, result_index_t>;
+	using added_custom_operation_cache = std::unordered_map<PyObject const*, std::size_t>;
+
+	static constexpr auto no_result_index = std::numeric_limits<result_index_t>::max();
+
+	void initialize_code(std::size_t input_count, std::size_t output_count) {
+		m_code.required_stack_size = 0;
+		m_code.input_count = input_count;
+		m_code.output_count = output_count;
+	}
+
+	void add_outputs(py::handle sfg, delay_queue& deferred_delays) {
+		for (auto const i : range(m_code.output_count)) {
+			this->add_operation_output(sfg, i, std::string_view{}, sfg_info_stack{}, deferred_delays);
+		}
+	}
+
+	void add_deferred_delays(delay_queue&& deferred_delays) {
+		while (!deferred_delays.empty()) {
+			auto new_deferred_delays = delay_queue{};
+			for (auto const& [delay_index, op, prefix, sfg_stack] : deferred_delays) {
+				this->add_source(op, 0, prefix, sfg_stack, deferred_delays);
+				this->add_instruction(instruction_type::update_delay, no_result_index, -1).index = delay_index;
+			}
+			deferred_delays = new_deferred_delays;
+		}
+	}
+
+	void resolve_invalid_result_indices() {
+		for (auto& instruction : m_code.instructions) {
+			if (instruction.result_index == no_result_index) {
+				instruction.result_index = static_cast<result_index_t>(m_code.result_keys.size());
+			}
+		}
+	}
+
+	[[nodiscard]] static sfg_info_stack push_sfg(sfg_info_stack const& sfg_stack, py::handle sfg, std::size_t prefix_length) {
+		auto const new_size = static_cast<std::size_t>(sfg_stack.size() + 1);
+		auto new_sfg_stack = sfg_info_stack{};
+		new_sfg_stack.reserve(new_size);
+		for (auto const& info : sfg_stack) {
+			new_sfg_stack.push_back(info);
+		}
+		new_sfg_stack.emplace_back(sfg, prefix_length);
+		return new_sfg_stack;
+	}
+
+	[[nodiscard]] static sfg_info_stack pop_sfg(sfg_info_stack const& sfg_stack) {
+		ASIC_ASSERT(!sfg_stack.empty());
+		auto const new_size = static_cast<std::size_t>(sfg_stack.size() - 1);
+		auto new_sfg_stack = sfg_info_stack{};
+		new_sfg_stack.reserve(new_size);
+		for (auto const& info : span{sfg_stack}.first(new_size)) {
+			new_sfg_stack.push_back(info);
+		}
+		return new_sfg_stack;
+	}
+
+	instruction& add_instruction(instruction_type type, result_index_t result_index, std::ptrdiff_t stack_diff) {
+		m_stack_depth += stack_diff;
+		if (m_stack_depth < 0) {
+			throw py::value_error{"Detected input/output count mismatch in simulation SFG"};
+		}
+		if (auto const stack_size = static_cast<std::size_t>(m_stack_depth); stack_size > m_code.required_stack_size) {
+			m_code.required_stack_size = stack_size;
+		}
+		auto& instruction = m_code.instructions.emplace_back();
+		instruction.type = type;
+		instruction.result_index = result_index;
+		return instruction;
+	}
+
+	[[nodiscard]] std::optional<result_index_t> begin_operation_output(py::handle op, std::size_t output_index, std::string_view prefix) {
+		auto const pointer = op.attr("outputs")[py::int_{output_index}].ptr();
+		if (m_incomplete_outputs.count(pointer) != 0) {
+			// Make sure the output doesn't depend on its own value, unless it's a delay operation.
+			if (op.attr("type_name")().cast<std::string_view>() != "t") {
+				throw py::value_error{"Direct feedback loop detected in simulation SFG"};
+			}
+		}
+		// Try to add a new result.
+		auto const [it, inserted] = m_added_results.try_emplace(pointer, static_cast<result_index_t>(m_code.result_keys.size()));
+		if (inserted) {
+			if (m_code.result_keys.size() >= static_cast<std::size_t>(std::numeric_limits<result_index_t>::max())) {
+				throw py::value_error{fmt::format("Simulation SFG requires too many outputs to be stored (limit: {})",
+												  std::numeric_limits<result_index_t>::max())};
+			}
+			m_code.result_keys.push_back(key_of_output(op, output_index, prefix));
+			m_incomplete_outputs.insert(pointer);
+			return it->second;
+		}
+		// If the result has already been added, we re-use the old result and
+		// return std::nullopt to indicate that we don't need to add all the required instructions again.
+		this->add_instruction(instruction_type::push_result, it->second, 1).index = static_cast<std::size_t>(it->second);
+		return std::nullopt;
+	}
+
+	void end_operation_output(py::handle op, std::size_t output_index) {
+		auto const pointer = op.attr("outputs")[py::int_{output_index}].ptr();
+		[[maybe_unused]] auto const erased = m_incomplete_outputs.erase(pointer);
+		ASIC_ASSERT(erased == 1);
+	}
+
+	[[nodiscard]] std::size_t try_add_custom_operation(py::handle op) {
+		auto const [it, inserted] = m_added_custom_operations.try_emplace(op.ptr(), m_added_custom_operations.size());
+		if (inserted) {
+			auto& custom_operation = m_code.custom_operations.emplace_back();
+			custom_operation.evaluate_output = op.attr("evaluate_output");
+			custom_operation.input_count = op.attr("input_count").cast<std::size_t>();
+			custom_operation.output_count = op.attr("output_count").cast<std::size_t>();
+		}
+		return it->second;
+	}
+
+	[[nodiscard]] std::size_t add_delay_info(number initial_value, result_index_t result_index) {
+		auto const delay_index = m_code.delays.size();
+		auto& delay = m_code.delays.emplace_back();
+		delay.initial_value = initial_value;
+		delay.result_index = result_index;
+		return delay_index;
+	}
+
+	void add_source(py::handle op, std::size_t input_index, std::string_view prefix, sfg_info_stack const& sfg_stack,
+					delay_queue& deferred_delays) {
+		auto const signal = py::object{op.attr("inputs")[py::int_{input_index}].attr("signals")[py::int_{0}]};
+		auto const src = py::handle{signal.attr("source")};
+		auto const operation = py::handle{src.attr("operation")};
+		auto const index = src.attr("index").cast<std::size_t>();
+		this->add_operation_output(operation, index, prefix, sfg_stack, deferred_delays);
+		if (!signal.attr("bits").is_none()) {
+			auto const bits = signal.attr("bits").cast<std::size_t>();
+			if (bits > 64) {
+				throw py::value_error{"Cannot truncate to more than 64 bits"};
+			}
+			this->add_instruction(instruction_type::truncate, no_result_index, 0).bit_mask = static_cast<std::int64_t>(std::int64_t{1}
+																													   << bits);
+		}
+	}
+
+	void add_unary_operation_output(py::handle op, result_index_t result_index, std::string_view prefix, sfg_info_stack const& sfg_stack,
+									delay_queue& deferred_delays, instruction_type type) {
+		this->add_source(op, 0, prefix, sfg_stack, deferred_delays);
+		this->add_instruction(type, result_index, 0);
+	}
+
+	void add_binary_operation_output(py::handle op, result_index_t result_index, std::string_view prefix, sfg_info_stack const& sfg_stack,
+									 delay_queue& deferred_delays, instruction_type type) {
+		this->add_source(op, 0, prefix, sfg_stack, deferred_delays);
+		this->add_source(op, 1, prefix, sfg_stack, deferred_delays);
+		this->add_instruction(type, result_index, -1);
+	}
+
+	void add_operation_output(py::handle op, std::size_t output_index, std::string_view prefix, sfg_info_stack const& sfg_stack,
+							  delay_queue& deferred_delays) {
+		auto const type_name = op.attr("type_name")().cast<std::string_view>();
+		if (type_name == "out") {
+			this->add_source(op, 0, prefix, sfg_stack, deferred_delays);
+		} else if (auto const result_index = this->begin_operation_output(op, output_index, prefix)) {
+			if (type_name == "c") {
+				this->add_instruction(instruction_type::push_constant, *result_index, 1).value = op.attr("value").cast<number>();
+			} else if (type_name == "add") {
+				this->add_binary_operation_output(op, *result_index, prefix, sfg_stack, deferred_delays, instruction_type::addition);
+			} else if (type_name == "sub") {
+				this->add_binary_operation_output(op, *result_index, prefix, sfg_stack, deferred_delays, instruction_type::subtraction);
+			} else if (type_name == "mul") {
+				this->add_binary_operation_output(op, *result_index, prefix, sfg_stack, deferred_delays, instruction_type::multiplication);
+			} else if (type_name == "div") {
+				this->add_binary_operation_output(op, *result_index, prefix, sfg_stack, deferred_delays, instruction_type::division);
+			} else if (type_name == "min") {
+				this->add_binary_operation_output(op, *result_index, prefix, sfg_stack, deferred_delays, instruction_type::min);
+			} else if (type_name == "max") {
+				this->add_binary_operation_output(op, *result_index, prefix, sfg_stack, deferred_delays, instruction_type::max);
+			} else if (type_name == "sqrt") {
+				this->add_unary_operation_output(op, *result_index, prefix, sfg_stack, deferred_delays, instruction_type::square_root);
+			} else if (type_name == "conj") {
+				this->add_unary_operation_output(
+					op, *result_index, prefix, sfg_stack, deferred_delays, instruction_type::complex_conjugate);
+			} else if (type_name == "abs") {
+				this->add_unary_operation_output(op, *result_index, prefix, sfg_stack, deferred_delays, instruction_type::absolute);
+			} else if (type_name == "cmul") {
+				this->add_source(op, 0, prefix, sfg_stack, deferred_delays);
+				this->add_instruction(instruction_type::constant_multiplication, *result_index, 0).value = op.attr("value").cast<number>();
+			} else if (type_name == "bfly") {
+				if (output_index == 0) {
+					this->add_source(op, 0, prefix, sfg_stack, deferred_delays);
+					this->add_source(op, 1, prefix, sfg_stack, deferred_delays);
+					this->add_instruction(instruction_type::addition, *result_index, -1);
+				} else {
+					this->add_source(op, 0, prefix, sfg_stack, deferred_delays);
+					this->add_source(op, 1, prefix, sfg_stack, deferred_delays);
+					this->add_instruction(instruction_type::subtraction, *result_index, -1);
+				}
+			} else if (type_name == "in") {
+				if (sfg_stack.empty()) {
+					throw py::value_error{"Encountered Input operation outside SFG in simulation"};
+				}
+				auto const& info = sfg_stack.back();
+				auto const input_index = info.find_input_operation_index(op);
+				if (sfg_stack.size() == 1) {
+					this->add_instruction(instruction_type::push_input, *result_index, 1).index = input_index;
+				} else {
+					this->add_source(info.sfg, input_index, prefix.substr(0, info.prefix_length), pop_sfg(sfg_stack), deferred_delays);
+					this->add_instruction(instruction_type::forward_value, *result_index, 0);
+				}
+			} else if (type_name == "t") {
+				auto const delay_index = this->add_delay_info(op.attr("initial_value").cast<number>(), *result_index);
+				deferred_delays.emplace_back(delay_index, op, std::string{prefix}, sfg_stack);
+				this->add_instruction(instruction_type::push_delay, *result_index, 1).index = delay_index;
+			} else if (type_name == "sfg") {
+				auto const output_op = py::handle{op.attr("output_operations")[py::int_{output_index}]};
+				this->add_source(output_op, 0, key_base(op, prefix), push_sfg(sfg_stack, op, prefix.size()), deferred_delays);
+				this->add_instruction(instruction_type::forward_value, *result_index, 0);
+			} else {
+				auto const custom_operation_index = this->try_add_custom_operation(op);
+				auto const& custom_operation = m_code.custom_operations[custom_operation_index];
+				for (auto const i : range(custom_operation.input_count)) {
+					this->add_source(op, i, prefix, sfg_stack, deferred_delays);
+				}
+				auto const custom_source_index = m_code.custom_sources.size();
+				auto& custom_source = m_code.custom_sources.emplace_back();
+				custom_source.custom_operation_index = custom_operation_index;
+				custom_source.output_index = output_index;
+				auto const stack_diff = std::ptrdiff_t{1} - static_cast<std::ptrdiff_t>(custom_operation.input_count);
+				this->add_instruction(instruction_type::custom, *result_index, stack_diff).index = custom_source_index;
+			}
+			this->end_operation_output(op, output_index);
+		}
+	}
+
+	simulation_code m_code;
+	added_output_cache m_incomplete_outputs;
+	added_result_cache m_added_results;
+	added_custom_operation_cache m_added_custom_operations;
+	std::ptrdiff_t m_stack_depth = 0;
+};
+
+simulation_code compile_simulation(pybind11::handle sfg) {
+	return compiler{}.compile(sfg);
+}
+
+} // namespace asic
\ No newline at end of file
diff --git a/src/simulation/compile.h b/src/simulation/compile.h
new file mode 100644
index 0000000000000000000000000000000000000000..883f4c5832978ea1bfd33c767fc947c1efde718e
--- /dev/null
+++ b/src/simulation/compile.h
@@ -0,0 +1,61 @@
+#ifndef ASIC_SIMULATION_COMPILE_H
+#define ASIC_SIMULATION_COMPILE_H
+
+#include "instruction.h"
+
+#include <cstddef>
+#include <pybind11/pybind11.h>
+#include <string>
+#include <vector>
+
+namespace asic {
+
+using result_key = std::string;
+
+struct simulation_code final {
+	struct custom_operation final {
+		// Python function used to evaluate the custom operation.
+		pybind11::object evaluate_output;
+		// Number of inputs that the custom operation takes.
+		std::size_t input_count;
+		// Number of outputs that the custom operation gives.
+		std::size_t output_count;
+	};
+
+	struct custom_source final {
+		// Index into custom_operations where the custom_operation corresponding to this custom_source is located.
+		std::size_t custom_operation_index;
+		// Output index of the custom_operation that this source gets it value from.
+		std::size_t output_index;
+	};
+
+	struct delay_info final {
+		// Initial value to set at the start of the simulation.
+		number initial_value;
+		// The result index where the current value should be stored at the start of each iteration.
+		result_index_t result_index;
+	};
+
+	// Instructions to execute for one full iteration of the simulation.
+	std::vector<instruction> instructions;
+	// Custom operations used by the simulation.
+	std::vector<custom_operation> custom_operations;
+	// Signal sources that use custom operations.
+	std::vector<custom_source> custom_sources;
+	// Info about the delay operations used in the simulation.
+	std::vector<delay_info> delays;
+	// Keys for each result produced by the simulation. The index of the key matches the index of the result in the simulation state.
+	std::vector<result_key> result_keys;
+	// Number of values expected as input to the simulation.
+	std::size_t input_count;
+	// Number of values given as output from the simulation. This will be the number of values left on the stack after a full iteration of the simulation has been run.
+	std::size_t output_count;
+	// Maximum number of values that need to be able to fit on the stack in order to run a full iteration of the simulation.
+	std::size_t required_stack_size;
+};
+
+[[nodiscard]] simulation_code compile_simulation(pybind11::handle sfg);
+
+} // namespace asic
+
+#endif // ASIC_SIMULATION_COMPILE_H
\ No newline at end of file
diff --git a/src/simulation/format_code.h b/src/simulation/format_code.h
new file mode 100644
index 0000000000000000000000000000000000000000..5ebbb95d1f11eb18b915dbab9fbccbb82d83304c
--- /dev/null
+++ b/src/simulation/format_code.h
@@ -0,0 +1,129 @@
+#ifndef ASIC_SIMULATION_FORMAT_CODE_H
+#define ASIC_SIMULATION_FORMAT_CODE_H
+
+#include "../algorithm.h"
+#include "../debug.h"
+#include "../number.h"
+#include "compile.h"
+#include "instruction.h"
+
+#include <fmt/format.h>
+#include <string>
+
+namespace asic {
+
+[[nodiscard]] inline std::string format_number(number const& value) {
+	if (value.imag() == 0) {
+		return fmt::to_string(value.real());
+	}
+	if (value.real() == 0) {
+		return fmt::format("{}j", value.imag());
+	}
+	if (value.imag() < 0) {
+		return fmt::format("{}-{}j", value.real(), -value.imag());
+	}
+	return fmt::format("{}+{}j", value.real(), value.imag());
+}
+
+[[nodiscard]] inline std::string format_compiled_simulation_code_result_keys(simulation_code const& code) {
+	auto result = std::string{};
+	for (auto const& [i, result_key] : enumerate(code.result_keys)) {
+		result += fmt::format("{:>2}: \"{}\"\n", i, result_key);
+	}
+	return result;
+}
+
+[[nodiscard]] inline std::string format_compiled_simulation_code_delays(simulation_code const& code) {
+	auto result = std::string{};
+	for (auto const& [i, delay] : enumerate(code.delays)) {
+		ASIC_ASSERT(delay.result_index < code.result_keys.size());
+		result += fmt::format("{:>2}: Initial value: {}, Result: {}: \"{}\"\n",
+							  i,
+							  format_number(delay.initial_value),
+							  delay.result_index,
+							  code.result_keys[delay.result_index]);
+	}
+	return result;
+}
+
+[[nodiscard]] inline std::string format_compiled_simulation_code_instruction(instruction const& instruction) {
+	switch (instruction.type) {
+		// clang-format off
+		case instruction_type::push_input:              return fmt::format("push_input inputs[{}]", instruction.index);
+		case instruction_type::push_result:             return fmt::format("push_result results[{}]", instruction.index);
+		case instruction_type::push_delay:              return fmt::format("push_delay delays[{}]", instruction.index);
+		case instruction_type::push_constant:           return fmt::format("push_constant {}", format_number(instruction.value));
+		case instruction_type::truncate:                return fmt::format("truncate {:#018x}", instruction.bit_mask);
+		case instruction_type::addition:                return "addition";
+		case instruction_type::subtraction:             return "subtraction";
+		case instruction_type::multiplication:          return "multiplication";
+		case instruction_type::division:                return "division";
+		case instruction_type::min:                     return "min";
+		case instruction_type::max:                     return "max";
+		case instruction_type::square_root:             return "square_root";
+		case instruction_type::complex_conjugate:       return "complex_conjugate";
+		case instruction_type::absolute:                return "absolute";
+		case instruction_type::constant_multiplication: return fmt::format("constant_multiplication {}", format_number(instruction.value));
+		case instruction_type::update_delay:            return fmt::format("update_delay delays[{}]", instruction.index);
+		case instruction_type::custom:                  return fmt::format("custom custom_sources[{}]", instruction.index);
+		case instruction_type::forward_value:           return "forward_value";
+		// clang-format on
+	}
+	return std::string{};
+}
+
+[[nodiscard]] inline std::string format_compiled_simulation_code_instructions(simulation_code const& code) {
+	auto result = std::string{};
+	for (auto const& [i, instruction] : enumerate(code.instructions)) {
+		auto instruction_string = format_compiled_simulation_code_instruction(instruction);
+		if (instruction.result_index < code.result_keys.size()) {
+			instruction_string = fmt::format(
+				"{:<26} -> {}: \"{}\"", instruction_string, instruction.result_index, code.result_keys[instruction.result_index]);
+		}
+		result += fmt::format("{:>2}: {}\n", i, instruction_string);
+	}
+	return result;
+}
+
+[[nodiscard]] inline std::string format_compiled_simulation_code(simulation_code const& code) {
+	return fmt::format(
+		"==============================================\n"
+		"> Code stats\n"
+		"==============================================\n"
+		"Input count: {}\n"
+		"Output count: {}\n"
+		"Instruction count: {}\n"
+		"Required stack size: {}\n"
+		"Delay count: {}\n"
+		"Result count: {}\n"
+		"Custom operation count: {}\n"
+		"Custom source count: {}\n"
+		"==============================================\n"
+		"> Delays\n"
+		"==============================================\n"
+		"{}"
+		"==============================================\n"
+		"> Result keys\n"
+		"==============================================\n"
+		"{}"
+		"==============================================\n"
+		"> Instructions\n"
+		"==============================================\n"
+		"{}"
+		"==============================================",
+		code.input_count,
+		code.output_count,
+		code.instructions.size(),
+		code.required_stack_size,
+		code.delays.size(),
+		code.result_keys.size(),
+		code.custom_operations.size(),
+		code.custom_sources.size(),
+		format_compiled_simulation_code_delays(code),
+		format_compiled_simulation_code_result_keys(code),
+		format_compiled_simulation_code_instructions(code));
+}
+
+} // namespace asic
+
+#endif // ASIC_SIMULATION_FORMAT_CODE
\ No newline at end of file
diff --git a/src/simulation/instruction.h b/src/simulation/instruction.h
new file mode 100644
index 0000000000000000000000000000000000000000..d650c651394a243c52eee7e5ad2fe463f96bdad7
--- /dev/null
+++ b/src/simulation/instruction.h
@@ -0,0 +1,57 @@
+#ifndef ASIC_SIMULATION_INSTRUCTION_H
+#define ASIC_SIMULATION_INSTRUCTION_H
+
+#include "../number.h"
+
+#include <cstddef>
+#include <cstdint>
+#include <optional>
+
+namespace asic {
+
+enum class instruction_type : std::uint8_t {
+	push_input,              // push(inputs[index])
+	push_result,             // push(results[index])
+	push_delay,              // push(delays[index])
+	push_constant,           // push(value)
+	truncate,                // push(trunc(pop(), bit_mask))
+	addition,                // push(pop() + pop())
+	subtraction,             // push(pop() - pop())
+	multiplication,          // push(pop() * pop())
+	division,                // push(pop() / pop())
+	min,                     // push(min(pop(), pop()))
+	max,                     // push(max(pop(), pop()))
+	square_root,             // push(sqrt(pop()))
+	complex_conjugate,       // push(conj(pop()))
+	absolute,                // push(abs(pop()))
+	constant_multiplication, // push(pop() * value)
+	update_delay,            // delays[index] = pop()
+	custom,                  // Custom operation. Uses custom_source[index].
+	forward_value            // Forward the current value on the stack (push(pop()), i.e. do nothing).
+};
+
+using result_index_t = std::uint16_t;
+
+struct instruction final {
+	constexpr instruction() noexcept
+		: index(0)
+		, result_index(0)
+		, type(instruction_type::forward_value) {}
+
+	union {
+		// Index used by push_input, push_result, delay and custom.
+		std::size_t index;
+		// Bit mask used by truncate.
+		std::int64_t bit_mask;
+		// Constant value used by push_constant and constant_multiplication.
+		number value;
+	};
+	// Index into where the result of the instruction will be stored. If the result should be ignored, this index will be one past the last valid result index.
+	result_index_t result_index;
+	// Specifies what kind of operation the instruction should execute.
+	instruction_type type;
+};
+
+} // namespace asic
+
+#endif // ASIC_SIMULATION_INSTRUCTION_H
\ No newline at end of file
diff --git a/src/simulation/run.cpp b/src/simulation/run.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..1f27b0c70e4d70ec057953caded37a4f40ab1b10
--- /dev/null
+++ b/src/simulation/run.cpp
@@ -0,0 +1,176 @@
+#define NOMINMAX
+#include "run.h"
+
+#include "../algorithm.h"
+#include "../debug.h"
+#include "format_code.h"
+
+#include <algorithm>
+#include <complex>
+#include <cstddef>
+#include <fmt/format.h>
+#include <iterator>
+#include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
+#include <stdexcept>
+
+namespace py = pybind11;
+
+namespace asic {
+
+[[nodiscard]] static number truncate_value(number value, std::int64_t bit_mask) {
+	if (value.imag() != 0) {
+		throw py::type_error{"Complex value cannot be truncated"};
+	}
+	return number{static_cast<number::value_type>(static_cast<std::int64_t>(value.real()) & bit_mask)};
+}
+
+[[nodiscard]] static std::int64_t setup_truncation_parameters(bool& truncate, std::optional<std::uint8_t>& bits_override) {
+	if (truncate && bits_override) {
+		truncate = false; // Ignore truncate instructions, they will be truncated using bits_override instead.
+		if (*bits_override > 64) {
+			throw py::value_error{"Cannot truncate to more than 64 bits"};
+		}
+		return static_cast<std::int64_t>(std::int64_t{1} << *bits_override); // Return the bit mask override to use.
+	}
+	bits_override.reset(); // Don't use bits_override if truncate is false.
+	return std::int64_t{};
+}
+
+simulation_state run_simulation(simulation_code const& code, span<number const> inputs, span<number> delays,
+								std::optional<std::uint8_t> bits_override, bool truncate) {
+	ASIC_ASSERT(inputs.size() == code.input_count);
+	ASIC_ASSERT(delays.size() == code.delays.size());
+	ASIC_ASSERT(code.output_count <= code.required_stack_size);
+
+	auto state = simulation_state{};
+
+	// Setup results.
+	state.results.resize(code.result_keys.size() + 1); // Add one space to store ignored results.
+	// Initialize delay results to their current values.
+	for (auto const& [i, delay] : enumerate(code.delays)) {
+		state.results[delay.result_index] = delays[i];
+	}
+
+	// Setup stack.
+	state.stack.resize(code.required_stack_size);
+	auto stack_pointer = state.stack.data();
+
+	// Utility functions to make the stack manipulation code below more readable.
+	// Should hopefully be inlined by the compiler.
+	auto const push = [&](number value) -> void {
+		ASIC_ASSERT(std::distance(state.stack.data(), stack_pointer) < static_cast<std::ptrdiff_t>(state.stack.size()));
+		*stack_pointer++ = value;
+	};
+	auto const pop = [&]() -> number {
+		ASIC_ASSERT(std::distance(state.stack.data(), stack_pointer) > std::ptrdiff_t{0});
+		return *--stack_pointer;
+	};
+	auto const peek = [&]() -> number {
+		ASIC_ASSERT(std::distance(state.stack.data(), stack_pointer) > std::ptrdiff_t{0});
+		ASIC_ASSERT(std::distance(state.stack.data(), stack_pointer) <= static_cast<std::ptrdiff_t>(state.stack.size()));
+		return *(stack_pointer - 1);
+	};
+
+	// Check if results should be truncated.
+	auto const bit_mask_override = setup_truncation_parameters(truncate, bits_override);
+
+	// Hot instruction evaluation loop.
+	for (auto const& instruction : code.instructions) {
+		ASIC_DEBUG_MSG("Evaluating {}.", format_compiled_simulation_code_instruction(instruction));
+		// Execute the instruction.
+		switch (instruction.type) {
+			case instruction_type::push_input:
+				push(inputs[instruction.index]);
+				break;
+			case instruction_type::push_result:
+				push(state.results[instruction.index]);
+				break;
+			case instruction_type::push_delay:
+				push(delays[instruction.index]);
+				break;
+			case instruction_type::push_constant:
+				push(instruction.value);
+				break;
+			case instruction_type::truncate:
+				if (truncate) {
+					push(truncate_value(pop(), instruction.bit_mask));
+				}
+				break;
+			case instruction_type::addition:
+				push(pop() + pop());
+				break;
+			case instruction_type::subtraction:
+				push(pop() - pop());
+				break;
+			case instruction_type::multiplication:
+				push(pop() * pop());
+				break;
+			case instruction_type::division:
+				push(pop() / pop());
+				break;
+			case instruction_type::min: {
+				auto const lhs = pop();
+				auto const rhs = pop();
+				if (lhs.imag() != 0 || rhs.imag() != 0) {
+					throw std::runtime_error{"Min does not support complex numbers."};
+				}
+				push(std::min(lhs.real(), rhs.real()));
+				break;
+			}
+			case instruction_type::max: {
+				auto const lhs = pop();
+				auto const rhs = pop();
+				if (lhs.imag() != 0 || rhs.imag() != 0) {
+					throw std::runtime_error{"Max does not support complex numbers."};
+				}
+				push(std::max(lhs.real(), rhs.real()));
+				break;
+			}
+			case instruction_type::square_root:
+				push(std::sqrt(pop()));
+				break;
+			case instruction_type::complex_conjugate:
+				push(std::conj(pop()));
+				break;
+			case instruction_type::absolute:
+				push(number{std::abs(pop())});
+				break;
+			case instruction_type::constant_multiplication:
+				push(pop() * instruction.value);
+				break;
+			case instruction_type::update_delay:
+				delays[instruction.index] = pop();
+				break;
+			case instruction_type::custom: {
+				using namespace pybind11::literals;
+				auto const& src = code.custom_sources[instruction.index];
+				auto const& op = code.custom_operations[src.custom_operation_index];
+				auto input_values = std::vector<number>{};
+				input_values.reserve(op.input_count);
+				for (auto i = std::size_t{0}; i < op.input_count; ++i) {
+					input_values.push_back(pop());
+				}
+				push(op.evaluate_output(src.output_index, std::move(input_values), "truncate"_a = truncate).cast<number>());
+				break;
+			}
+			case instruction_type::forward_value:
+				// Do nothing, since doing push(pop()) would be pointless.
+				break;
+		}
+		// If we've been given a global override for how many bits to use, always truncate the result.
+		if (bits_override) {
+			push(truncate_value(pop(), bit_mask_override));
+		}
+		// Store the result.
+		state.results[instruction.result_index] = peek();
+	}
+
+	// Remove the space that we used for ignored results.
+	state.results.pop_back();
+	// Erase the portion of the stack that does not contain the output values.
+	state.stack.erase(state.stack.begin() + static_cast<std::ptrdiff_t>(code.output_count), state.stack.end());
+	return state;
+}
+
+} // namespace asic
\ No newline at end of file
diff --git a/src/simulation/run.h b/src/simulation/run.h
new file mode 100644
index 0000000000000000000000000000000000000000..2174c571ef59f3e12236471e3e064f2619c38a60
--- /dev/null
+++ b/src/simulation/run.h
@@ -0,0 +1,23 @@
+#ifndef ASIC_SIMULATION_RUN_H
+#define ASIC_SIMULATION_RUN_H
+
+#include "../number.h"
+#include "../span.h"
+#include "compile.h"
+
+#include <cstdint>
+#include <vector>
+
+namespace asic {
+
+struct simulation_state final {
+	std::vector<number> stack;
+	std::vector<number> results;
+};
+
+simulation_state run_simulation(simulation_code const& code, span<number const> inputs, span<number> delays,
+								std::optional<std::uint8_t> bits_override, bool truncate);
+
+} // namespace asic
+
+#endif // ASIC_SIMULATION_RUN_H
\ No newline at end of file
diff --git a/src/simulation/simulation.cpp b/src/simulation/simulation.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..3af24c10e62bfe09590a05153abd25468d6bee4c
--- /dev/null
+++ b/src/simulation/simulation.cpp
@@ -0,0 +1,129 @@
+#define NOMINMAX
+#include "simulation.h"
+
+#include "../algorithm.h"
+#include "../debug.h"
+#include "compile.h"
+#include "run.h"
+
+#include <fmt/format.h>
+#include <limits>
+#include <pybind11/numpy.h>
+#include <utility>
+
+namespace py = pybind11;
+
+namespace asic {
+
+simulation::simulation(pybind11::handle sfg, std::optional<std::vector<std::optional<input_provider_t>>> input_providers)
+	: m_code(compile_simulation(sfg))
+	, m_input_functions(sfg.attr("input_count").cast<std::size_t>(), [](iteration_t) -> number { return number{}; }) {
+	m_delays.reserve(m_code.delays.size());
+	for (auto const& delay : m_code.delays) {
+		m_delays.push_back(delay.initial_value);
+	}
+	if (input_providers) {
+		this->set_inputs(std::move(*input_providers));
+	}
+}
+
+void simulation::set_input(std::size_t index, input_provider_t input_provider) {
+	if (index >= m_input_functions.size()) {
+		throw py::index_error{fmt::format("Input index out of range (expected 0-{}, got {})", m_input_functions.size() - 1, index)};
+	}
+	if (auto* const callable = std::get_if<input_function_t>(&input_provider)) {
+		m_input_functions[index] = std::move(*callable);
+	} else if (auto* const numeric = std::get_if<number>(&input_provider)) {
+		m_input_functions[index] = [value = *numeric](iteration_t) -> number {
+			return value;
+		};
+	} else if (auto* const list = std::get_if<std::vector<number>>(&input_provider)) {
+		if (!m_input_length) {
+			m_input_length = static_cast<iteration_t>(list->size());
+		} else if (*m_input_length != static_cast<iteration_t>(list->size())) {
+			throw py::value_error{fmt::format("Inconsistent input length for simulation (was {}, got {})", *m_input_length, list->size())};
+		}
+		m_input_functions[index] = [values = std::move(*list)](iteration_t n) -> number {
+			return values.at(n);
+		};
+	}
+}
+
+void simulation::set_inputs(std::vector<std::optional<input_provider_t>> input_providers) {
+	if (input_providers.size() != m_input_functions.size()) {
+		throw py::value_error{fmt::format(
+			"Wrong number of inputs supplied to simulation (expected {}, got {})", m_input_functions.size(), input_providers.size())};
+	}
+	for (auto&& [i, input_provider] : enumerate(input_providers)) {
+		if (input_provider) {
+			this->set_input(i, std::move(*input_provider));
+		}
+	}
+}
+
+std::vector<number> simulation::step(bool save_results, std::optional<std::uint8_t> bits_override, bool truncate) {
+	return this->run_for(1, save_results, bits_override, truncate);
+}
+
+std::vector<number> simulation::run_until(iteration_t iteration, bool save_results, std::optional<std::uint8_t> bits_override,
+										  bool truncate) {
+	auto result = std::vector<number>{};
+	while (m_iteration < iteration) {
+		ASIC_DEBUG_MSG("Running simulation iteration.");
+		auto inputs = std::vector<number>(m_code.input_count);
+		for (auto&& [input, function] : zip(inputs, m_input_functions)) {
+			input = function(m_iteration);
+		}
+		auto state = run_simulation(m_code, inputs, m_delays, bits_override, truncate);
+		result = std::move(state.stack);
+		if (save_results) {
+			m_results.push_back(std::move(state.results));
+		}
+		++m_iteration;
+	}
+	return result;
+}
+
+std::vector<number> simulation::run_for(iteration_t iterations, bool save_results, std::optional<std::uint8_t> bits_override,
+										bool truncate) {
+	if (iterations > std::numeric_limits<iteration_t>::max() - m_iteration) {
+		throw py::value_error("Simulation iteration type overflow!");
+	}
+	return this->run_until(m_iteration + iterations, save_results, bits_override, truncate);
+}
+
+std::vector<number> simulation::run(bool save_results, std::optional<std::uint8_t> bits_override, bool truncate) {
+	if (m_input_length) {
+		return this->run_until(*m_input_length, save_results, bits_override, truncate);
+	}
+	throw py::index_error{"Tried to run unlimited simulation"};
+}
+
+iteration_t simulation::iteration() const noexcept {
+	return m_iteration;
+}
+
+pybind11::dict simulation::results() const noexcept {
+	auto results = py::dict{};
+	if (!m_results.empty()) {
+		for (auto const& [i, key] : enumerate(m_code.result_keys)) {
+			auto values = std::vector<number>{};
+			values.reserve(m_results.size());
+			for (auto const& result : m_results) {
+				values.push_back(result[i]);
+			}
+			results[py::str{key}] = py::array{static_cast<py::ssize_t>(values.size()), values.data()};
+		}
+	}
+	return results;
+}
+
+void simulation::clear_results() noexcept {
+	m_results.clear();
+}
+
+void simulation::clear_state() noexcept {
+	m_delays.clear();
+}
+
+} // namespace asic
diff --git a/src/simulation/simulation.h b/src/simulation/simulation.h
new file mode 100644
index 0000000000000000000000000000000000000000..c1a36cbc93492494af14a198c970a6a534477794
--- /dev/null
+++ b/src/simulation/simulation.h
@@ -0,0 +1,54 @@
+#ifndef ASIC_SIMULATION_DOD_H
+#define ASIC_SIMULATION_DOD_H
+
+#include "../number.h"
+#include "compile.h"
+
+#include <cstddef>
+#include <cstdint>
+#include <functional>
+#include <optional>
+#include <pybind11/functional.h>
+#include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
+#include <variant>
+#include <vector>
+
+namespace asic {
+
+using iteration_t = std::uint32_t;
+using input_function_t = std::function<number(iteration_t)>;
+using input_provider_t = std::variant<number, std::vector<number>, input_function_t>;
+
+class simulation final {
+public:
+	simulation(pybind11::handle sfg, std::optional<std::vector<std::optional<input_provider_t>>> input_providers = std::nullopt);
+
+	void set_input(std::size_t index, input_provider_t input_provider);
+	void set_inputs(std::vector<std::optional<input_provider_t>> input_providers);
+
+	[[nodiscard]] std::vector<number> step(bool save_results, std::optional<std::uint8_t> bits_override, bool truncate);
+	[[nodiscard]] std::vector<number> run_until(iteration_t iteration, bool save_results, std::optional<std::uint8_t> bits_override,
+												bool truncate);
+	[[nodiscard]] std::vector<number> run_for(iteration_t iterations, bool save_results, std::optional<std::uint8_t> bits_override,
+											  bool truncate);
+	[[nodiscard]] std::vector<number> run(bool save_results, std::optional<std::uint8_t> bits_override, bool truncate);
+
+	[[nodiscard]] iteration_t iteration() const noexcept;
+	[[nodiscard]] pybind11::dict results() const noexcept;
+
+	void clear_results() noexcept;
+	void clear_state() noexcept;
+
+private:
+	simulation_code m_code;
+	std::vector<number> m_delays;
+	std::vector<input_function_t> m_input_functions;
+	std::optional<iteration_t> m_input_length;
+	iteration_t m_iteration = 0;
+	std::vector<std::vector<number>> m_results;
+};
+
+} // namespace asic
+
+#endif // ASIC_SIMULATION_DOD_H
\ No newline at end of file
diff --git a/src/span.h b/src/span.h
new file mode 100644
index 0000000000000000000000000000000000000000..2ad454e13e3978c74355ae3ca0f955fad1bb9753
--- /dev/null
+++ b/src/span.h
@@ -0,0 +1,314 @@
+#ifndef ASIC_SPAN_H
+#define ASIC_SPAN_H
+
+#include <cstddef>
+#include <type_traits>
+#include <utility>
+#include <iterator>
+#include <limits>
+#include <array>
+#include <algorithm>
+#include <cassert>
+
+namespace asic {
+
+constexpr auto dynamic_size = static_cast<std::size_t>(-1);
+
+// C++17-compatible std::span substitute.
+template <typename T, std::size_t Size = dynamic_size>
+class span;
+
+namespace detail {
+
+template <typename T>
+struct is_span_impl : std::false_type {};
+
+template <typename T, std::size_t Size>
+struct is_span_impl<span<T, Size>> : std::true_type {};
+
+template <typename T>
+struct is_span : is_span_impl<std::remove_cv_t<T>> {};
+
+template <typename T>
+constexpr auto is_span_v = is_span<T>::value;
+
+template <typename T>
+struct is_std_array_impl : std::false_type {};
+
+template <typename T, std::size_t Size>
+struct is_std_array_impl<std::array<T, Size>> : std::true_type {};
+
+template <typename T>
+struct is_std_array : is_std_array_impl<std::remove_cv_t<T>> {};
+
+template <typename T>
+constexpr auto is_std_array_v = is_std_array<T>::value;
+
+template <std::size_t From, std::size_t To>
+struct is_size_convertible : std::bool_constant<From == To || From == dynamic_size || To == dynamic_size> {};
+
+template <std::size_t From, std::size_t To>
+constexpr auto is_size_convertible_v = is_size_convertible<From, To>::value;
+
+template <typename From, typename To>
+struct is_element_type_convertible : std::bool_constant<std::is_convertible_v<From(*)[], To(*)[]>> {};
+
+template <typename From, typename To>
+constexpr auto is_element_type_convertible_v = is_element_type_convertible<From, To>::value;
+
+template <typename T, std::size_t Size>
+struct span_base {
+	using element_type	= T;
+	using pointer		= element_type*;
+	using size_type		= std::size_t;
+
+	constexpr span_base() noexcept = default;
+	constexpr span_base(pointer data, [[maybe_unused]] size_type size) : m_data(data) { assert(size == Size); }
+
+	template <size_type N>
+	constexpr span_base(span_base<T, N> other) : m_data(other.data()) {
+		static_assert(N == Size || N == dynamic_size);
+		assert(other.size() == Size);
+	}
+
+	[[nodiscard]] constexpr pointer data() const noexcept	{ return m_data; }
+	[[nodiscard]] constexpr size_type size() const noexcept	{ return Size; }
+
+private:
+	pointer m_data = nullptr;
+};
+
+template <typename T>
+struct span_base<T, dynamic_size> {
+	using element_type	= T;
+	using pointer		= element_type*;
+	using size_type		= std::size_t;
+
+	constexpr span_base() noexcept = default;
+	constexpr span_base(pointer data, size_type size) : m_data(data), m_size(size) {}
+
+	template <size_type N>
+	explicit constexpr span_base(span_base<T, N> other) : m_data(other.data()), m_size(other.size()) {}
+
+	[[nodiscard]] constexpr pointer data() const noexcept	{ return m_data; }
+	[[nodiscard]] constexpr size_type size() const noexcept	{ return m_size; }
+
+private:
+	pointer		m_data = nullptr;
+	size_type	m_size = 0;
+};
+
+template <typename T, std::size_t Size, std::size_t Offset, std::size_t N>
+struct subspan_type {
+	using type = span<
+		T,
+		(N != dynamic_size) ?
+			N :
+			(Size != dynamic_size) ?
+				Size - Offset :
+				Size
+	>;
+};
+
+template <typename T, std::size_t Size, std::size_t Offset, std::size_t Count>
+using subspan_type_t = typename subspan_type<T, Size, Offset, Count>::type;
+
+} // namespace detail
+
+template <typename T, std::size_t Size>
+class span final : public detail::span_base<T, Size> {
+public:
+	using element_type				= typename detail::span_base<T, Size>::element_type;
+	using pointer					= typename detail::span_base<T, Size>::pointer;
+	using size_type					= typename detail::span_base<T, Size>::size_type;
+	using value_type				= std::remove_cv_t<element_type>;
+	using reference					= element_type&;
+	using iterator					= element_type*;
+	using const_iterator			= const element_type*;
+	using reverse_iterator			= std::reverse_iterator<iterator>;
+	using const_reverse_iterator	= std::reverse_iterator<const_iterator>;
+
+	// Default constructor.
+	constexpr span() noexcept = default;
+
+	// Construct from pointer, size.
+	constexpr span(pointer data, size_type size) : detail::span_base<T, Size>(data, size) {}
+
+	// Copy constructor.
+	template <
+		typename U, std::size_t N,
+		typename = std::enable_if_t<detail::is_size_convertible_v<N, Size>>,
+		typename = std::enable_if_t<detail::is_element_type_convertible_v<U, T>>
+	>
+	constexpr span(span<U, N> const& other) : span(other.data(), other.size()) {}
+
+	// Copy assignment.
+	constexpr span& operator=(span const&) noexcept = default;
+
+	// Destructor.
+	~span() = default;
+
+	// Construct from begin, end.
+	constexpr span(pointer begin, pointer end) : span(begin, end - begin) {}
+
+	// Construct from C array.
+	template <std::size_t N>
+	constexpr span(element_type(&arr)[N]) noexcept : span(std::data(arr), N) {}
+
+	// Construct from std::array.
+	template <
+		std::size_t N,
+		typename = std::enable_if_t<N != 0>
+	>
+	constexpr span(std::array<value_type, N>& arr) noexcept : span(std::data(arr), N) {}
+
+	// Construct from empty std::array.
+	constexpr span(std::array<value_type, 0>&) noexcept : span() {}
+
+	// Construct from const std::array.
+	template <
+		std::size_t N,
+		typename = std::enable_if_t<N != 0>
+	>
+	constexpr span(std::array<value_type, N> const& arr) noexcept : span(std::data(arr), N) {}
+
+	// Construct from empty const std::array.
+	constexpr span(std::array<value_type, 0> const&) noexcept : span() {}
+
+	// Construct from other container.
+	template <
+		typename Container,
+		typename = std::enable_if_t<!detail::is_span_v<Container>>,
+		typename = std::enable_if_t<!detail::is_std_array_v<Container>>,
+		typename = decltype(std::data(std::declval<Container>())),
+		typename = decltype(std::size(std::declval<Container>())),
+		typename = std::enable_if_t<std::is_convertible_v<typename Container::pointer, pointer>>,
+		typename = std::enable_if_t<std::is_convertible_v<typename Container::pointer, decltype(std::data(std::declval<Container>()))>>
+	>
+	constexpr span(Container& container) : span(std::data(container), std::size(container)) {}
+
+	// Construct from other const container.
+	template <
+		typename Container,
+		typename Element = element_type,
+		typename = std::enable_if_t<std::is_const_v<Element>>,
+		typename = std::enable_if_t<!detail::is_span_v<Container>>,
+		typename = std::enable_if_t<!detail::is_std_array_v<Container>>,
+		typename = decltype(std::data(std::declval<Container>())),
+		typename = decltype(std::size(std::declval<Container>())),
+		typename = std::enable_if_t<std::is_convertible_v<typename Container::pointer, pointer>>,
+		typename = std::enable_if_t<std::is_convertible_v<typename Container::pointer, decltype(std::data(std::declval<Container>()))>>
+	>
+	constexpr span(Container const& container) : span(std::data(container), std::size(container)) {}
+
+	[[nodiscard]] constexpr iterator begin() const noexcept						{ return this->data(); }
+	[[nodiscard]] constexpr const_iterator cbegin() const noexcept				{ return this->data(); }
+	[[nodiscard]] constexpr iterator end() const noexcept						{ return this->data() + this->size(); }
+	[[nodiscard]] constexpr const_iterator cend() const noexcept				{ return this->data() + this->size(); }
+	[[nodiscard]] constexpr reverse_iterator rbegin() const noexcept			{ return std::make_reverse_iterator(this->end()); }
+	[[nodiscard]] constexpr const_reverse_iterator crbegin() const noexcept		{ return std::make_reverse_iterator(this->cend()); }
+	[[nodiscard]] constexpr reverse_iterator rend() const noexcept				{ return std::make_reverse_iterator(this->begin()); }
+	[[nodiscard]] constexpr const_reverse_iterator crend() const noexcept		{ return std::make_reverse_iterator(this->cbegin()); }
+
+	[[nodiscard]] constexpr reference operator[](size_type i) const noexcept	{ assert(i < this->size()); return this->data()[i]; }
+	[[nodiscard]] constexpr reference operator()(size_type i) const noexcept	{ assert(i < this->size()); return this->data()[i]; }
+
+	[[nodiscard]] constexpr size_type size_bytes() const noexcept				{ return this->size() * sizeof(element_type); }
+	[[nodiscard]] constexpr bool empty() const noexcept							{ return this->size() == 0; }
+
+	[[nodiscard]] constexpr reference front() const noexcept					{ assert(!this->empty()); return this->data()[0]; }
+	[[nodiscard]] constexpr reference back() const noexcept						{ assert(!this->empty()); return this->data()[this->size() - 1]; }
+
+	template <std::size_t N>
+	[[nodiscard]] constexpr span<T, N> first() const {
+		static_assert(N != dynamic_size && N <= Size);
+		return {this->data(), N};
+	}
+
+	template <std::size_t N>
+	[[nodiscard]] constexpr span<T, N> last() const {
+		static_assert(N != dynamic_size && N <= Size);
+		return {this->data() + (Size - N), N};
+	}
+
+	template <std::size_t Offset, std::size_t N = dynamic_size>
+	[[nodiscard]] constexpr auto subspan() const -> detail::subspan_type_t<T, Size, Offset, N> {
+		static_assert(Offset <= Size);
+		return {this->data() + Offset, (N == dynamic_size) ? this->size() - Offset : N};
+	}
+
+	[[nodiscard]] constexpr span<T, dynamic_size> first(size_type n) const {
+		assert(n <= this->size());
+		return { this->data(), n };
+	}
+
+	[[nodiscard]] constexpr span<T, dynamic_size> last(size_type n) const {
+		return this->subspan(this->size() - n);
+	}
+
+	[[nodiscard]] constexpr span<T, dynamic_size> subspan(size_type offset, size_type n = dynamic_size) const {
+		if constexpr (Size == dynamic_size) {
+			assert(offset <= this->size());
+			if (n == dynamic_size) {
+				return { this->data() + offset, this->size() - offset };
+			}
+			assert(n <= this->size());
+			assert(offset + n <= this->size());
+			return {this->data() + offset, n};
+		} else {
+			return span<T, dynamic_size>{*this}.subspan(offset, n);
+		}
+	}
+};
+
+template <typename T, std::size_t LhsSize, std::size_t RhsSize>
+[[nodiscard]] constexpr bool operator==(span<T, LhsSize> lhs, span<T, RhsSize> rhs) {
+	return std::equal(lhs.begin(), lhs.end(), rhs.begin(), rhs.end());
+}
+
+template <typename T, std::size_t LhsSize, std::size_t RhsSize>
+[[nodiscard]] constexpr bool operator!=(span<T, LhsSize> lhs, span<T, RhsSize> rhs) {
+	return !(lhs == rhs);
+}
+
+template <typename T, std::size_t LhsSize, std::size_t RhsSize>
+[[nodiscard]] constexpr bool operator<(span<T, LhsSize> lhs, span<T, RhsSize> rhs) {
+	return std::lexicographical_compare(lhs.begin(), lhs.end(), rhs.begin(), rhs.end());
+}
+
+template <typename T, std::size_t LhsSize, std::size_t RhsSize>
+[[nodiscard]] constexpr bool operator<=(span<T, LhsSize> lhs, span<T, RhsSize> rhs) {
+	return !(lhs > rhs);
+}
+
+template <typename T, std::size_t LhsSize, std::size_t RhsSize>
+[[nodiscard]] constexpr bool operator>(span<T, LhsSize> lhs, span<T, RhsSize> rhs) {
+	return rhs < lhs;
+}
+
+template <typename T, std::size_t LhsSize, std::size_t RhsSize>
+[[nodiscard]] constexpr bool operator>=(span<T, LhsSize> lhs, span<T, RhsSize> rhs) {
+	return !(lhs < rhs);
+}
+
+template <typename Container>
+span(Container&) -> span<typename Container::value_type>;
+
+template <typename Container>
+span(Container const&) -> span<typename Container::value_type const>;
+
+template <typename T, std::size_t N>
+span(T(&)[N]) -> span<T, N>;
+
+template <typename T, std::size_t N>
+span(std::array<T, N>&) -> span<T, N>;
+
+template <typename T, std::size_t N>
+span(std::array<T, N> const&) -> span<T const, N>;
+
+template <typename T, typename Dummy>
+span(T, Dummy&&) -> span<std::remove_reference_t<decltype(std::declval<T>()[0])>>;
+
+} // namespace asic
+
+#endif // ASIC_SPAN_H
\ No newline at end of file
diff --git a/test/conftest.py b/test/conftest.py
index 48b49489424817e1439f6b2b6eb3d7cd63b29a75..63cf5ce1d0eb3d6652dc4eee14113aba98a3f2fc 100644
--- a/test/conftest.py
+++ b/test/conftest.py
@@ -2,4 +2,4 @@ from test.fixtures.signal import signal, signals
 from test.fixtures.operation_tree import *
 from test.fixtures.port import *
 from test.fixtures.signal_flow_graph import *
-import pytest
+import pytest
\ No newline at end of file
diff --git a/test/fixtures/signal_flow_graph.py b/test/fixtures/signal_flow_graph.py
index 6cf434dbfa5fbad4380e4b4d05ef07b9a02e30ec..a2c25ec9b10083b08c46543c7a5bfc30607560ef 100644
--- a/test/fixtures/signal_flow_graph.py
+++ b/test/fixtures/signal_flow_graph.py
@@ -1,12 +1,13 @@
 import pytest
 
-from b_asic import SFG, Input, Output, Constant, Register, ConstantMultiplication, Addition, Butterfly
+from b_asic import SFG, Input, Output, Constant, Delay, Addition, ConstantMultiplication, Butterfly, AbstractOperation, Name, TypeName, SignalSourceProvider
+from typing import Optional
 
 
 @pytest.fixture
 def sfg_two_inputs_two_outputs():
     """Valid SFG with two inputs and two outputs.
-             .               .
+         .               .
     in1-------+  +--------->out1
          .    |  |       .
          .    v  |       .
@@ -17,9 +18,9 @@ def sfg_two_inputs_two_outputs():
        | .          ^    .
        | .          |    .
        +------------+    .
-             .               .
+         .               .
     out1 = in1 + in2
-        out2 = in1 + 2 * in2
+    out2 = in1 + 2 * in2
     """
     in1 = Input("IN1")
     in2 = Input("IN2")
@@ -34,7 +35,7 @@ def sfg_two_inputs_two_outputs():
 def sfg_two_inputs_two_outputs_independent():
     """Valid SFG with two inputs and two outputs, where the first output only depends
     on the first input and the second output only depends on the second input.
-             .               .
+         .               .
     in1-------------------->out1
          .               .
          .               .
@@ -45,9 +46,9 @@ def sfg_two_inputs_two_outputs_independent():
          .   |      ^    .
          .   |      |    .
          .   +------+    .
-             .               .
+         .               .
     out1 = in1
-        out2 = in2 + 3
+    out2 = in2 + 3
     """
     in1 = Input("IN1")
     in2 = Input("IN2")
@@ -62,14 +63,15 @@ def sfg_two_inputs_two_outputs_independent():
 def sfg_two_inputs_two_outputs_independent_with_cmul():
     """Valid SFG with two inputs and two outputs, where the first output only depends
     on the first input and the second output only depends on the second input.
-             .               .
+        .                 .
     in1--->cmul1--->cmul2--->out1
-         .               .
-         .               .
-    c1------+    .
-            |   
-            v    .
+        .                 .
+        .                 .
+        .  c1             .
+        .   |             .
+        .   v             .
     in2--->add1---->cmul3--->out2
+        .                 .
     """
     in1 = Input("IN1")
     in2 = Input("IN2")
@@ -78,8 +80,8 @@ def sfg_two_inputs_two_outputs_independent_with_cmul():
     cmul3 = ConstantMultiplication(2, add1, "CMUL3", 3)
     cmul1 = ConstantMultiplication(5, in1, "CMUL1", 5)
     cmul2 = ConstantMultiplication(4, cmul1, "CMUL2", 4)
-    out1 = Output(in1, "OUT1")
-    out2 = Output(add1, "OUT2")
+    out1 = Output(cmul2, "OUT1")
+    out2 = Output(cmul3, "OUT2")
     return SFG(inputs=[in1, in2], outputs=[out1, out2])
 
 
@@ -109,10 +111,9 @@ def sfg_delay():
     out1 = in1'
     """
     in1 = Input()
-    reg1 = Register(in1)
-    out1 = Output(reg1)
-    return SFG(inputs=[in1], outputs=[out1])
-
+    t1 = Delay(in1)
+    out1 = Output(t1)
+    return SFG(inputs = [in1], outputs = [out1])
 
 @pytest.fixture
 def sfg_accumulator():
@@ -121,51 +122,90 @@ def sfg_accumulator():
     """
     data_in = Input()
     reset = Input()
-    reg = Register()
-    reg.input(0).connect((reg + data_in) * (1 - reset))
-    data_out = Output(reg)
-    return SFG(inputs=[data_in, reset], outputs=[data_out])
+    t = Delay()
+    t << (t + data_in) * (1 - reset)
+    data_out = Output(t)
+    return SFG(inputs = [data_in, reset], outputs = [data_out])
 
 
 @pytest.fixture
-def simple_filter():
+def sfg_simple_accumulator():
+    """Valid SFG with two inputs and one output.
+         .                .
+    in1----->add1-----+----->out1
+         .    ^       |   .
+         .    |       |   .
+         .    +--t1<--+   .
+         .                .
+    """
+    in1 = Input()
+    t1 = Delay()
+    add1 = in1 + t1
+    t1 << add1
+    out1 = Output(add1)
+    return SFG(inputs = [in1], outputs = [out1])
+
+@pytest.fixture
+def sfg_simple_filter():
     """A valid SFG that is used as a filter in the first lab for TSTE87.
-                +----<constmul1----+
-                |                  |
-                |                  |
-    in1>------add1>------reg>------+------out1>
+         .                 .
+         .   +--cmul1<--+  .
+         .   |          |  .
+         .   v          |  .
+    in1---->add1----->t1+---->out1
+         .                 .
     """
     in1 = Input("IN1")
-    constmul1 = ConstantMultiplication(0.5, name="CMUL1")
-    add1 = Addition(in1, constmul1, "ADD1")
+    cmul1 = ConstantMultiplication(0.5, name="CMUL1")
+    add1 = Addition(in1, cmul1, "ADD1")
     add1.input(1).signals[0].name = "S2"
-    reg = Register(add1, name="REG1")
-    constmul1.input(0).connect(reg, "S1")
-    out1 = Output(reg, "OUT1")
+    t1 = Delay(add1, name="T1")
+    cmul1.input(0).connect(t1, "S1")
+    out1 = Output(t1, "OUT1")
     return SFG(inputs=[in1], outputs=[out1], name="simple_filter")
 
-
 @pytest.fixture
-def precedence_sfg_registers():
-    """A sfg with registers and interesting layout for precednce list generation.
+def sfg_custom_operation():
+    """A valid SFG containing a custom operation."""
+    class CustomOperation(AbstractOperation):
+        def __init__(self, src0: Optional[SignalSourceProvider] = None, name: Name = ""):
+            super().__init__(input_count = 1, output_count = 2, name = name, input_sources = [src0])
+        
+        @classmethod
+        def type_name(self) -> TypeName:
+            return "custom"
+
+        def evaluate(self, a):
+            return a * 2, 2 ** a
 
+    in1 = Input()
+    custom1 = CustomOperation(in1)
+    out1 = Output(custom1.output(0))
+    out2 = Output(custom1.output(1))
+    return SFG(inputs=[in1], outputs=[out1, out2])
+
+
+@pytest.fixture
+def precedence_sfg_delays():
+    """A sfg with delays and interesting layout for precednce list generation.
+         .                                          .
     IN1>--->C0>--->ADD1>--->Q1>---+--->A0>--->ADD4>--->OUT1
-                     ^            |            ^
-                     |            T1           |
-                     |            |            |
-                   ADD2<---<B1<---+--->A1>--->ADD3
-                     ^            |            ^
-                     |            T2           |
-                     |            |            |
-                     +-----<B2<---+--->A2>-----+
+         .           ^            |            ^    .
+         .           |            T1           |    .
+         .           |            |            |    .
+         .         ADD2<---<B1<---+--->A1>--->ADD3  .
+         .           ^            |            ^    .
+         .           |            T2           |    .
+         .           |            |            |    .
+         .           +-----<B2<---+--->A2>-----+    .
     """
     in1 = Input("IN1")
     c0 = ConstantMultiplication(5, in1, "C0")
     add1 = Addition(c0, None, "ADD1")
     # Not sure what operation "Q" is supposed to be in the example
     Q1 = ConstantMultiplication(1, add1, "Q1")
-    T1 = Register(Q1, 0, "T1")
-    T2 = Register(T1, 0, "T2")
+    T1 = Delay(Q1, 0, "T1")
+    T2 = Delay(T1, 0, "T2")
     b2 = ConstantMultiplication(2, T2, "B2")
     b1 = ConstantMultiplication(3, T1, "B1")
     add2 = Addition(b1, b2, "ADD2")
@@ -181,14 +221,14 @@ def precedence_sfg_registers():
 
 
 @pytest.fixture
-def precedence_sfg_registers_and_constants():
+def precedence_sfg_delays_and_constants():
     in1 = Input("IN1")
     c0 = ConstantMultiplication(5, in1, "C0")
     add1 = Addition(c0, None, "ADD1")
     # Not sure what operation "Q" is supposed to be in the example
     Q1 = ConstantMultiplication(1, add1, "Q1")
-    T1 = Register(Q1, 0, "T1")
-    const1 = Constant(10, "CONST1")  # Replace T2 register with a constant
+    T1 = Delay(Q1, 0, "T1")
+    const1 = Constant(10, "CONST1")  # Replace T2 delay with a constant
     b2 = ConstantMultiplication(2, const1, "B2")
     b1 = ConstantMultiplication(3, T1, "B1")
     add2 = Addition(b1, b2, "ADD2")
@@ -200,6 +240,6 @@ def precedence_sfg_registers_and_constants():
     # Replace ADD4 with a butterfly to test multiple output ports
     bfly1 = Butterfly(a0, add3, "BFLY1")
     out1 = Output(bfly1.output(0), "OUT1")
-    out2 = Output(bfly1.output(1), "OUT2")
+    Output(bfly1.output(1), "OUT2")
 
     return SFG(inputs=[in1], outputs=[out1], name="SFG")
diff --git a/test/test_fast_simulation.py b/test/test_fast_simulation.py
new file mode 100644
index 0000000000000000000000000000000000000000..6eb3b251bec6a2c556230dbedcb8f712c9f56d48
--- /dev/null
+++ b/test/test_fast_simulation.py
@@ -0,0 +1,226 @@
+import pytest
+import numpy as np
+
+from b_asic import SFG, Output, FastSimulation, Addition, Subtraction, Constant, Butterfly
+
+
+class TestRunFor:
+    def test_with_lambdas_as_input(self, sfg_two_inputs_two_outputs):
+        simulation = FastSimulation(sfg_two_inputs_two_outputs, [lambda n: n + 3, lambda n: 1 + n * 2])
+
+        output = simulation.run_for(101, save_results = True)
+
+        assert output[0] == 304
+        assert output[1] == 505
+
+        assert simulation.results["0"][100] == 304
+        assert simulation.results["1"][100] == 505
+
+        assert simulation.results["in1"][0] == 3
+        assert simulation.results["in2"][0] == 1
+        assert simulation.results["add1"][0] == 4
+        assert simulation.results["add2"][0] == 5
+        assert simulation.results["0"][0] == 4
+        assert simulation.results["1"][0] == 5
+
+        assert simulation.results["in1"][1] == 4
+        assert simulation.results["in2"][1] == 3
+        assert simulation.results["add1"][1] == 7
+        assert simulation.results["add2"][1] == 10
+        assert simulation.results["0"][1] == 7
+        assert simulation.results["1"][1] == 10
+
+        assert simulation.results["in1"][2] == 5
+        assert simulation.results["in2"][2] == 5
+        assert simulation.results["add1"][2] == 10
+        assert simulation.results["add2"][2] == 15
+        assert simulation.results["0"][2] == 10
+        assert simulation.results["1"][2] == 15
+
+        assert simulation.results["in1"][3] == 6
+        assert simulation.results["in2"][3] == 7
+        assert simulation.results["add1"][3] == 13
+        assert simulation.results["add2"][3] == 20
+        assert simulation.results["0"][3] == 13
+        assert simulation.results["1"][3] == 20
+
+    def test_with_numpy_arrays_as_input(self, sfg_two_inputs_two_outputs):
+        input0 = np.array([5, 9, 25, -5, 7])
+        input1 = np.array([7, 3, 3,  54, 2])
+        simulation = FastSimulation(sfg_two_inputs_two_outputs, [input0, input1])
+
+        output = simulation.run_for(5, save_results = True)
+
+        assert output[0] == 9
+        assert output[1] == 11
+
+        assert isinstance(simulation.results["in1"], np.ndarray)
+        assert isinstance(simulation.results["in2"], np.ndarray)
+        assert isinstance(simulation.results["add1"], np.ndarray)
+        assert isinstance(simulation.results["add2"], np.ndarray)
+        assert isinstance(simulation.results["0"], np.ndarray)
+        assert isinstance(simulation.results["1"], np.ndarray)
+
+        assert simulation.results["in1"][0] == 5
+        assert simulation.results["in2"][0] == 7
+        assert simulation.results["add1"][0] == 12
+        assert simulation.results["add2"][0] == 19
+        assert simulation.results["0"][0] == 12
+        assert simulation.results["1"][0] == 19
+
+        assert simulation.results["in1"][1] == 9
+        assert simulation.results["in2"][1] == 3
+        assert simulation.results["add1"][1] == 12
+        assert simulation.results["add2"][1] == 15
+        assert simulation.results["0"][1] == 12
+        assert simulation.results["1"][1] == 15
+
+        assert simulation.results["in1"][2] == 25
+        assert simulation.results["in2"][2] == 3
+        assert simulation.results["add1"][2] == 28
+        assert simulation.results["add2"][2] == 31
+        assert simulation.results["0"][2] == 28
+        assert simulation.results["1"][2] == 31
+
+        assert simulation.results["in1"][3] == -5
+        assert simulation.results["in2"][3] == 54
+        assert simulation.results["add1"][3] == 49
+        assert simulation.results["add2"][3] == 103
+        assert simulation.results["0"][3] == 49
+        assert simulation.results["1"][3] == 103
+
+        assert simulation.results["0"][4] == 9
+        assert simulation.results["1"][4] == 11
+    
+    def test_with_numpy_array_overflow(self, sfg_two_inputs_two_outputs):
+        input0 = np.array([5, 9, 25, -5, 7])
+        input1 = np.array([7, 3, 3,  54, 2])
+        simulation = FastSimulation(sfg_two_inputs_two_outputs, [input0, input1])
+        simulation.run_for(5)
+        with pytest.raises(IndexError):
+            simulation.step()
+
+    def test_run_whole_numpy_array(self, sfg_two_inputs_two_outputs):
+        input0 = np.array([5, 9, 25, -5, 7])
+        input1 = np.array([7, 3, 3,  54, 2])
+        simulation = FastSimulation(sfg_two_inputs_two_outputs, [input0, input1])
+        simulation.run()
+        assert len(simulation.results["0"]) == 5
+        assert len(simulation.results["1"]) == 5
+        with pytest.raises(IndexError):
+            simulation.step()
+
+    def test_delay(self, sfg_delay):
+        simulation = FastSimulation(sfg_delay)
+        simulation.set_input(0, [5, -2, 25, -6, 7, 0])
+        simulation.run_for(6, save_results = True)
+
+        assert simulation.results["0"][0] == 0
+        assert simulation.results["0"][1] == 5
+        assert simulation.results["0"][2] == -2
+        assert simulation.results["0"][3] == 25
+        assert simulation.results["0"][4] == -6
+        assert simulation.results["0"][5] == 7
+
+class TestRun:
+    def test_save_results(self, sfg_two_inputs_two_outputs):
+        simulation = FastSimulation(sfg_two_inputs_two_outputs, [2, 3])
+        assert not simulation.results
+        simulation.run_for(10, save_results = False)
+        assert not simulation.results
+        simulation.run_for(10)
+        assert len(simulation.results["0"]) == 10
+        assert len(simulation.results["1"]) == 10
+        simulation.run_for(10, save_results = True)
+        assert len(simulation.results["0"]) == 20
+        assert len(simulation.results["1"]) == 20
+        simulation.run_for(10, save_results = False)
+        assert len(simulation.results["0"]) == 20
+        assert len(simulation.results["1"]) == 20
+        simulation.run_for(13, save_results = True)
+        assert len(simulation.results["0"]) == 33
+        assert len(simulation.results["1"]) == 33
+        simulation.step(save_results = False)
+        assert len(simulation.results["0"]) == 33
+        assert len(simulation.results["1"]) == 33
+        simulation.step()
+        assert len(simulation.results["0"]) == 34
+        assert len(simulation.results["1"]) == 34
+        simulation.clear_results()
+        assert not simulation.results
+
+    def test_nested(self, sfg_nested):
+        input0 = np.array([5, 9])
+        input1 = np.array([7, 3])
+        simulation = FastSimulation(sfg_nested, [input0, input1])
+
+        output0 = simulation.step()
+        output1 = simulation.step()
+
+        assert output0[0] == 11405
+        assert output1[0] == 4221
+    
+    def test_accumulator(self, sfg_accumulator):
+        data_in = np.array([5, -2, 25, -6, 7, 0])
+        reset   = np.array([0, 0,  0,  1,  0, 0])
+        simulation = FastSimulation(sfg_accumulator, [data_in, reset])
+        output0 = simulation.step()
+        output1 = simulation.step()
+        output2 = simulation.step()
+        output3 = simulation.step()
+        output4 = simulation.step()
+        output5 = simulation.step()
+        assert output0[0] == 0
+        assert output1[0] == 5
+        assert output2[0] == 3
+        assert output3[0] == 28
+        assert output4[0] == 0
+        assert output5[0] == 7
+
+    def test_simple_accumulator(self, sfg_simple_accumulator):
+        data_in = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
+        simulation = FastSimulation(sfg_simple_accumulator, [data_in])
+        simulation.run()
+        assert list(simulation.results["0"]) == [0, 1, 3, 6, 10, 15, 21, 28, 36, 45]
+        
+    def test_simple_filter(self, sfg_simple_filter):
+        input0 = np.array([1, 2, 3, 4, 5])
+        simulation = FastSimulation(sfg_simple_filter, [input0])
+        simulation.run_for(len(input0), save_results = True)
+        assert all(simulation.results["0"] == np.array([0, 1.0, 2.5, 4.25, 6.125]))
+
+    def test_custom_operation(self, sfg_custom_operation):
+        simulation = FastSimulation(sfg_custom_operation, [lambda n: n + 1])
+        simulation.run_for(5)
+        assert all(simulation.results["0"] == np.array([2, 4, 6, 8, 10]))
+        assert all(simulation.results["1"] == np.array([2, 4, 8, 16, 32]))
+
+
+class TestLarge:
+    def test_1k_additions(self):
+        prev_op = Addition(Constant(1), Constant(1))
+        for _ in range(999):
+            prev_op = Addition(prev_op, Constant(2))
+        sfg = SFG(outputs=[Output(prev_op)])
+        simulation = FastSimulation(sfg, [])
+        assert simulation.step()[0] == 2000
+        
+    def test_1k_subtractions(self):
+        prev_op = Subtraction(Constant(0), Constant(2))
+        for _ in range(999):
+            prev_op = Subtraction(prev_op, Constant(2))
+        sfg = SFG(outputs=[Output(prev_op)])
+        simulation = FastSimulation(sfg, [])
+        assert simulation.step()[0] == -2000
+        
+    def test_1k_butterfly(self):
+        prev_op_add = Addition(Constant(1), Constant(1))
+        prev_op_sub = Subtraction(Constant(-1), Constant(1))
+        for _ in range(499):
+            prev_op_add = Addition(prev_op_add, Constant(2))
+        for _ in range(499):
+            prev_op_sub = Subtraction(prev_op_sub, Constant(2))
+        butterfly = Butterfly(prev_op_add, prev_op_sub)
+        sfg = SFG(outputs=[Output(butterfly.output(0)), Output(butterfly.output(1))])
+        simulation = FastSimulation(sfg, [])
+        assert list(simulation.step()) == [0, 2000]
\ No newline at end of file
diff --git a/test/test_schema.py b/test/test_schema.py
index 510a0d997cb6cbdb525d600f992cbbae3468e8dd..78a713a9ceda574feede72f48bacf02e6a9c4025 100644
--- a/test/test_schema.py
+++ b/test/test_schema.py
@@ -6,51 +6,51 @@ from b_asic import Schema, Addition, ConstantMultiplication
 
 
 class TestInit:
-    def test_simple_filter_normal_latency(self, simple_filter):
-        simple_filter.set_latency_of_type(Addition.type_name(), 5)
-        simple_filter.set_latency_of_type(ConstantMultiplication.type_name(), 4)
+    def test_simple_filter_normal_latency(self, sfg_simple_filter):
+        sfg_simple_filter.set_latency_of_type(Addition.type_name(), 5)
+        sfg_simple_filter.set_latency_of_type(ConstantMultiplication.type_name(), 4)
 
-        schema = Schema(simple_filter)
+        schema = Schema(sfg_simple_filter)
 
         assert schema._start_times == {"add1": 4, "cmul1": 0}
 
-    def test_complicated_single_outputs_normal_latency(self, precedence_sfg_registers):
-        precedence_sfg_registers.set_latency_of_type(Addition.type_name(), 4)
-        precedence_sfg_registers.set_latency_of_type(ConstantMultiplication.type_name(), 3)
+    def test_complicated_single_outputs_normal_latency(self, precedence_sfg_delays):
+        precedence_sfg_delays.set_latency_of_type(Addition.type_name(), 4)
+        precedence_sfg_delays.set_latency_of_type(ConstantMultiplication.type_name(), 3)
 
-        schema = Schema(precedence_sfg_registers, scheduling_alg="ASAP")
+        schema = Schema(precedence_sfg_delays, scheduling_alg="ASAP")
 
         for op in schema._sfg.get_operations_topological_order():
             print(op.latency_offsets)
 
         start_times_names = dict()
         for op_id, start_time in schema._start_times.items():
-            op_name = precedence_sfg_registers.find_by_id(op_id).name
+            op_name = precedence_sfg_delays.find_by_id(op_id).name
             start_times_names[op_name] = start_time
 
         assert start_times_names == {"C0": 0, "B1": 0, "B2": 0, "ADD2": 3, "ADD1": 7, "Q1": 11,
                                      "A0": 14, "A1": 0, "A2": 0, "ADD3": 3, "ADD4": 17}
 
-    def test_complicated_single_outputs_complex_latencies(self, precedence_sfg_registers):
-        precedence_sfg_registers.set_latency_offsets_of_type(ConstantMultiplication.type_name(), {'in0': 3, 'out0': 5})
+    def test_complicated_single_outputs_complex_latencies(self, precedence_sfg_delays):
+        precedence_sfg_delays.set_latency_offsets_of_type(ConstantMultiplication.type_name(), {'in0': 3, 'out0': 5})
 
-        precedence_sfg_registers.find_by_name("B1")[0].set_latency_offsets({'in0': 4, 'out0': 7})
-        precedence_sfg_registers.find_by_name("B2")[0].set_latency_offsets({'in0': 1, 'out0': 4})
-        precedence_sfg_registers.find_by_name("ADD2")[0].set_latency_offsets({'in0': 4, 'in1': 2, 'out0': 4})
-        precedence_sfg_registers.find_by_name("ADD1")[0].set_latency_offsets({'in0': 1, 'in1': 2, 'out0': 4})
-        precedence_sfg_registers.find_by_name("Q1")[0].set_latency_offsets({'in0': 3, 'out0': 6})
-        precedence_sfg_registers.find_by_name("A0")[0].set_latency_offsets({'in0': 0, 'out0': 2})
+        precedence_sfg_delays.find_by_name("B1")[0].set_latency_offsets({'in0': 4, 'out0': 7})
+        precedence_sfg_delays.find_by_name("B2")[0].set_latency_offsets({'in0': 1, 'out0': 4})
+        precedence_sfg_delays.find_by_name("ADD2")[0].set_latency_offsets({'in0': 4, 'in1': 2, 'out0': 4})
+        precedence_sfg_delays.find_by_name("ADD1")[0].set_latency_offsets({'in0': 1, 'in1': 2, 'out0': 4})
+        precedence_sfg_delays.find_by_name("Q1")[0].set_latency_offsets({'in0': 3, 'out0': 6})
+        precedence_sfg_delays.find_by_name("A0")[0].set_latency_offsets({'in0': 0, 'out0': 2})
 
-        precedence_sfg_registers.find_by_name("A1")[0].set_latency_offsets({'in0': 0, 'out0': 5})
-        precedence_sfg_registers.find_by_name("A2")[0].set_latency_offsets({'in0': 2, 'out0': 3})
-        precedence_sfg_registers.find_by_name("ADD3")[0].set_latency_offsets({'in0': 2, 'in1': 1, 'out0': 4})
-        precedence_sfg_registers.find_by_name("ADD4")[0].set_latency_offsets({'in0': 6, 'in1': 7, 'out0': 9})
+        precedence_sfg_delays.find_by_name("A1")[0].set_latency_offsets({'in0': 0, 'out0': 5})
+        precedence_sfg_delays.find_by_name("A2")[0].set_latency_offsets({'in0': 2, 'out0': 3})
+        precedence_sfg_delays.find_by_name("ADD3")[0].set_latency_offsets({'in0': 2, 'in1': 1, 'out0': 4})
+        precedence_sfg_delays.find_by_name("ADD4")[0].set_latency_offsets({'in0': 6, 'in1': 7, 'out0': 9})
 
-        schema = Schema(precedence_sfg_registers, scheduling_alg="ASAP")
+        schema = Schema(precedence_sfg_delays, scheduling_alg="ASAP")
 
         start_times_names = dict()
         for op_id, start_time in schema._start_times.items():
-            op_name = precedence_sfg_registers.find_by_id(op_id).name
+            op_name = precedence_sfg_delays.find_by_id(op_id).name
             start_times_names[op_name] = start_time
 
         assert start_times_names == {'C0': 0, 'B1': 0, 'B2': 0, 'ADD2': 3, 'ADD1': 5, 'Q1': 6, 'A0': 12,
diff --git a/test/test_sfg.py b/test/test_sfg.py
index 5a13e151dbb682c60f732cfc8399da4ea1e3a34b..460996c13c15c6cef094a75f8bba1df79087d590 100644
--- a/test/test_sfg.py
+++ b/test/test_sfg.py
@@ -3,7 +3,7 @@ import io
 import sys
 
 
-from b_asic import SFG, Signal, Input, Output, Constant, ConstantMultiplication, Addition, Multiplication, Register, \
+from b_asic import SFG, Signal, Input, Output, Constant, ConstantMultiplication, Addition, Multiplication, Delay, \
     Butterfly, Subtraction, SquareRoot
 
 
@@ -108,17 +108,16 @@ class TestPrintSfg:
             str(sfg.find_by_name("OUT1")[0]) + "\n" + \
             "----------------------------------------------------------------------------------------------------\n"
 
-    def test_simple_filter(self, simple_filter):
-
-        assert simple_filter.__str__() == \
+    def test_simple_filter(self, sfg_simple_filter):
+        assert sfg_simple_filter.__str__() == \
             "id: no_id, \tname: simple_filter, \tinputs: {0: '-'}, \toutputs: {0: '-'}\n" + \
             "Internal Operations:\n" + \
             "----------------------------------------------------------------------------------------------------\n" + \
-            str(simple_filter.find_by_name("IN1")[0]) + "\n" + \
-            str(simple_filter.find_by_name("ADD1")[0]) + "\n" + \
-            str(simple_filter.find_by_name("REG1")[0]) + "\n" + \
-            str(simple_filter.find_by_name("CMUL1")[0]) + "\n" + \
-            str(simple_filter.find_by_name("OUT1")[0]) + "\n" + \
+            str(sfg_simple_filter.find_by_name("IN1")[0]) + "\n" + \
+            str(sfg_simple_filter.find_by_name("ADD1")[0]) + "\n" + \
+            str(sfg_simple_filter.find_by_name("T1")[0]) + "\n" + \
+            str(sfg_simple_filter.find_by_name("CMUL1")[0]) + "\n" + \
+            str(sfg_simple_filter.find_by_name("OUT1")[0]) + "\n" + \
             "----------------------------------------------------------------------------------------------------\n"
 
 
@@ -383,9 +382,9 @@ class TestFindComponentsWithTypeName:
 
 class TestGetPrecedenceList:
 
-    def test_inputs_registers(self, precedence_sfg_registers):
+    def test_inputs_delays(self, precedence_sfg_delays):
 
-        precedence_list = precedence_sfg_registers.get_precedence_list()
+        precedence_list = precedence_sfg_delays.get_precedence_list()
 
         assert len(precedence_list) == 7
 
@@ -410,9 +409,9 @@ class TestGetPrecedenceList:
         assert set([port.operation.key(port.index, port.operation.name)
                     for port in precedence_list[6]]) == {"ADD4"}
 
-    def test_inputs_constants_registers_multiple_outputs(self, precedence_sfg_registers_and_constants):
+    def test_inputs_constants_delays_multiple_outputs(self, precedence_sfg_delays_and_constants):
 
-        precedence_list = precedence_sfg_registers_and_constants.get_precedence_list()
+        precedence_list = precedence_sfg_delays_and_constants.get_precedence_list()
 
         assert len(precedence_list) == 7
 
@@ -494,8 +493,8 @@ class TestGetPrecedenceList:
 
 
 class TestPrintPrecedence:
-    def test_registers(self, precedence_sfg_registers):
-        sfg = precedence_sfg_registers
+    def test_delays(self, precedence_sfg_delays):
+        sfg = precedence_sfg_delays
 
         captured_output = io.StringIO()
         sys.stdout = captured_output
@@ -699,35 +698,35 @@ class TestConnectExternalSignalsToComponentsMultipleComp:
 
 
 class TestTopologicalOrderOperations:
-    def test_feedback_sfg(self, simple_filter):
-        topological_order = simple_filter.get_operations_topological_order()
+    def test_feedback_sfg(self, sfg_simple_filter):
+        topological_order = sfg_simple_filter.get_operations_topological_order()
 
-        assert [comp.name for comp in topological_order] == ["IN1", "ADD1", "REG1", "CMUL1", "OUT1"]
+        assert [comp.name for comp in topological_order] == ["IN1", "ADD1", "T1", "CMUL1", "OUT1"]
 
     def test_multiple_independent_inputs(self, sfg_two_inputs_two_outputs_independent):
         topological_order = sfg_two_inputs_two_outputs_independent.get_operations_topological_order()
 
         assert [comp.name for comp in topological_order] == ["IN1", "OUT1", "IN2", "C1", "ADD1", "OUT2"]
 
-    def test_complex_graph(self, precedence_sfg_registers):
-        topological_order = precedence_sfg_registers.get_operations_topological_order()
+    def test_complex_graph(self, precedence_sfg_delays):
+        topological_order = precedence_sfg_delays.get_operations_topological_order()
 
         assert [comp.name for comp in topological_order] == \
             ['IN1', 'C0', 'ADD1', 'Q1', 'A0', 'T1', 'B1', 'A1', 'T2', 'B2', 'ADD2', 'A2', 'ADD3', 'ADD4', 'OUT1']
 
 
 class TestRemove:
-    def test_remove_single_input_outputs(self, simple_filter):
-        new_sfg = simple_filter.remove_operation("cmul1")
+    def test_remove_single_input_outputs(self, sfg_simple_filter):
+        new_sfg = sfg_simple_filter.remove_operation("cmul1")
 
-        assert set(op.name for op in simple_filter.find_by_name("REG1")[0].subsequent_operations) == {"CMUL1", "OUT1"}
-        assert set(op.name for op in new_sfg.find_by_name("REG1")[0].subsequent_operations) == {"ADD1", "OUT1"}
+        assert set(op.name for op in sfg_simple_filter.find_by_name("T1")[0].subsequent_operations) == {"CMUL1", "OUT1"}
+        assert set(op.name for op in new_sfg.find_by_name("T1")[0].subsequent_operations) == {"ADD1", "OUT1"}
 
-        assert set(op.name for op in simple_filter.find_by_name("ADD1")[0].preceding_operations) == {"CMUL1", "IN1"}
-        assert set(op.name for op in new_sfg.find_by_name("ADD1")[0].preceding_operations) == {"REG1", "IN1"}
+        assert set(op.name for op in sfg_simple_filter.find_by_name("ADD1")[0].preceding_operations) == {"CMUL1", "IN1"}
+        assert set(op.name for op in new_sfg.find_by_name("ADD1")[0].preceding_operations) == {"T1", "IN1"}
 
-        assert "S1" in set([sig.name for sig in simple_filter.find_by_name("REG1")[0].output(0).signals])
-        assert "S2" in set([sig.name for sig in new_sfg.find_by_name("REG1")[0].output(0).signals])
+        assert "S1" in set([sig.name for sig in sfg_simple_filter.find_by_name("T1")[0].output(0).signals])
+        assert "S2" in set([sig.name for sig in new_sfg.find_by_name("T1")[0].output(0).signals])
 
     def test_remove_multiple_inputs_outputs(self, butterfly_operation_tree):
         out1 = Output(butterfly_operation_tree.output(0), "OUT1")
@@ -780,9 +779,9 @@ class TestRemove:
 
         assert "bfly2" not in set(op.name for op in new_sfg.operations)
 
-    def remove_different_number_inputs_outputs(self, simple_filter):
+    def remove_different_number_inputs_outputs(self, sfg_simple_filter):
         with pytest.raises(ValueError):
-            simple_filter.remove_operation("add1")
+            sfg_simple_filter.remove_operation("add1")
 
 
 class TestGetComponentsOfType:
diff --git a/test/test_simulation.py b/test/test_simulation.py
index 70d4ede54d39dc3bfed2dac87ca3703016935a92..e1075a74a2c839eefcfa122ff02ec6d11314b11e 100644
--- a/test/test_simulation.py
+++ b/test/test_simulation.py
@@ -1,90 +1,96 @@
 import pytest
 import numpy as np
 
-from b_asic import SFG, Output, Simulation
+from b_asic import SFG, Simulation
 
 
 class TestRunFor:
     def test_with_lambdas_as_input(self, sfg_two_inputs_two_outputs):
-        simulation = Simulation(sfg_two_inputs_two_outputs, [lambda n: n + 3, lambda n: 1 + n * 2], save_results = True)
+        simulation = Simulation(sfg_two_inputs_two_outputs, [lambda n: n + 3, lambda n: 1 + n * 2])
 
-        output = simulation.run_for(101)
+        output = simulation.run_for(101, save_results = True)
 
         assert output[0] == 304
         assert output[1] == 505
 
-        assert simulation.results[100]["0"] == 304
-        assert simulation.results[100]["1"] == 505
-
-        assert simulation.results[0]["in1"] == 3
-        assert simulation.results[0]["in2"] == 1
-        assert simulation.results[0]["add1"] == 4
-        assert simulation.results[0]["add2"] == 5
-        assert simulation.results[0]["0"] == 4
-        assert simulation.results[0]["1"] == 5
-
-        assert simulation.results[1]["in1"] == 4
-        assert simulation.results[1]["in2"] == 3
-        assert simulation.results[1]["add1"] == 7
-        assert simulation.results[1]["add2"] == 10
-        assert simulation.results[1]["0"] == 7
-        assert simulation.results[1]["1"] == 10
-
-        assert simulation.results[2]["in1"] == 5
-        assert simulation.results[2]["in2"] == 5
-        assert simulation.results[2]["add1"] == 10
-        assert simulation.results[2]["add2"] == 15
-        assert simulation.results[2]["0"] == 10
-        assert simulation.results[2]["1"] == 15
-
-        assert simulation.results[3]["in1"] == 6
-        assert simulation.results[3]["in2"] == 7
-        assert simulation.results[3]["add1"] == 13
-        assert simulation.results[3]["add2"] == 20
-        assert simulation.results[3]["0"] == 13
-        assert simulation.results[3]["1"] == 20
+        assert simulation.results["0"][100] == 304
+        assert simulation.results["1"][100] == 505
+
+        assert simulation.results["in1"][0] == 3
+        assert simulation.results["in2"][0] == 1
+        assert simulation.results["add1"][0] == 4
+        assert simulation.results["add2"][0] == 5
+        assert simulation.results["0"][0] == 4
+        assert simulation.results["1"][0] == 5
+
+        assert simulation.results["in1"][1] == 4
+        assert simulation.results["in2"][1] == 3
+        assert simulation.results["add1"][1] == 7
+        assert simulation.results["add2"][1] == 10
+        assert simulation.results["0"][1] == 7
+        assert simulation.results["1"][1] == 10
+
+        assert simulation.results["in1"][2] == 5
+        assert simulation.results["in2"][2] == 5
+        assert simulation.results["add1"][2] == 10
+        assert simulation.results["add2"][2] == 15
+        assert simulation.results["0"][2] == 10
+        assert simulation.results["1"][2] == 15
+
+        assert simulation.results["in1"][3] == 6
+        assert simulation.results["in2"][3] == 7
+        assert simulation.results["add1"][3] == 13
+        assert simulation.results["add2"][3] == 20
+        assert simulation.results["0"][3] == 13
+        assert simulation.results["1"][3] == 20
 
     def test_with_numpy_arrays_as_input(self, sfg_two_inputs_two_outputs):
         input0 = np.array([5, 9, 25, -5, 7])
         input1 = np.array([7, 3, 3,  54, 2])
         simulation = Simulation(sfg_two_inputs_two_outputs, [input0, input1])
-        simulation.save_results = True
 
-        output = simulation.run_for(5)
+        output = simulation.run_for(5, save_results = True)
 
         assert output[0] == 9
         assert output[1] == 11
 
-        assert simulation.results[0]["in1"] == 5
-        assert simulation.results[0]["in2"] == 7
-        assert simulation.results[0]["add1"] == 12
-        assert simulation.results[0]["add2"] == 19
-        assert simulation.results[0]["0"] == 12
-        assert simulation.results[0]["1"] == 19
-
-        assert simulation.results[1]["in1"] == 9
-        assert simulation.results[1]["in2"] == 3
-        assert simulation.results[1]["add1"] == 12
-        assert simulation.results[1]["add2"] == 15
-        assert simulation.results[1]["0"] == 12
-        assert simulation.results[1]["1"] == 15
-
-        assert simulation.results[2]["in1"] == 25
-        assert simulation.results[2]["in2"] == 3
-        assert simulation.results[2]["add1"] == 28
-        assert simulation.results[2]["add2"] == 31
-        assert simulation.results[2]["0"] == 28
-        assert simulation.results[2]["1"] == 31
-
-        assert simulation.results[3]["in1"] == -5
-        assert simulation.results[3]["in2"] == 54
-        assert simulation.results[3]["add1"] == 49
-        assert simulation.results[3]["add2"] == 103
-        assert simulation.results[3]["0"] == 49
-        assert simulation.results[3]["1"] == 103
-
-        assert simulation.results[4]["0"] == 9
-        assert simulation.results[4]["1"] == 11
+        assert isinstance(simulation.results["in1"], np.ndarray)
+        assert isinstance(simulation.results["in2"], np.ndarray)
+        assert isinstance(simulation.results["add1"], np.ndarray)
+        assert isinstance(simulation.results["add2"], np.ndarray)
+        assert isinstance(simulation.results["0"], np.ndarray)
+        assert isinstance(simulation.results["1"], np.ndarray)
+
+        assert simulation.results["in1"][0] == 5
+        assert simulation.results["in2"][0] == 7
+        assert simulation.results["add1"][0] == 12
+        assert simulation.results["add2"][0] == 19
+        assert simulation.results["0"][0] == 12
+        assert simulation.results["1"][0] == 19
+
+        assert simulation.results["in1"][1] == 9
+        assert simulation.results["in2"][1] == 3
+        assert simulation.results["add1"][1] == 12
+        assert simulation.results["add2"][1] == 15
+        assert simulation.results["0"][1] == 12
+        assert simulation.results["1"][1] == 15
+
+        assert simulation.results["in1"][2] == 25
+        assert simulation.results["in2"][2] == 3
+        assert simulation.results["add1"][2] == 28
+        assert simulation.results["add2"][2] == 31
+        assert simulation.results["0"][2] == 28
+        assert simulation.results["1"][2] == 31
+
+        assert simulation.results["in1"][3] == -5
+        assert simulation.results["in2"][3] == 54
+        assert simulation.results["add1"][3] == 49
+        assert simulation.results["add2"][3] == 103
+        assert simulation.results["0"][3] == 49
+        assert simulation.results["1"][3] == 103
+
+        assert simulation.results["0"][4] == 9
+        assert simulation.results["1"][4] == 11
     
     def test_with_numpy_array_overflow(self, sfg_two_inputs_two_outputs):
         input0 = np.array([5, 9, 25, -5, 7])
@@ -92,28 +98,64 @@ class TestRunFor:
         simulation = Simulation(sfg_two_inputs_two_outputs, [input0, input1])
         simulation.run_for(5)
         with pytest.raises(IndexError):
-            simulation.run_for(1)
+            simulation.step()
+
+    def test_run_whole_numpy_array(self, sfg_two_inputs_two_outputs):
+        input0 = np.array([5, 9, 25, -5, 7])
+        input1 = np.array([7, 3, 3,  54, 2])
+        simulation = Simulation(sfg_two_inputs_two_outputs, [input0, input1])
+        simulation.run()
+        assert len(simulation.results["0"]) == 5
+        assert len(simulation.results["1"]) == 5
+        with pytest.raises(IndexError):
+            simulation.step()
 
     def test_delay(self, sfg_delay):
-        simulation = Simulation(sfg_delay, save_results = True)
+        simulation = Simulation(sfg_delay)
         simulation.set_input(0, [5, -2, 25, -6, 7, 0])
-        simulation.run_for(6)
+        simulation.run_for(6, save_results = True)
 
-        assert simulation.results[0]["0"] == 0
-        assert simulation.results[1]["0"] == 5
-        assert simulation.results[2]["0"] == -2
-        assert simulation.results[3]["0"] == 25
-        assert simulation.results[4]["0"] == -6
-        assert simulation.results[5]["0"] == 7
+        assert simulation.results["0"][0] == 0
+        assert simulation.results["0"][1] == 5
+        assert simulation.results["0"][2] == -2
+        assert simulation.results["0"][3] == 25
+        assert simulation.results["0"][4] == -6
+        assert simulation.results["0"][5] == 7
 
 class TestRun:
+    def test_save_results(self, sfg_two_inputs_two_outputs):
+        simulation = Simulation(sfg_two_inputs_two_outputs, [2, 3])
+        assert not simulation.results
+        simulation.run_for(10, save_results = False)
+        assert not simulation.results
+        simulation.run_for(10)
+        assert len(simulation.results["0"]) == 10
+        assert len(simulation.results["1"]) == 10
+        simulation.run_for(10, save_results = True)
+        assert len(simulation.results["0"]) == 20
+        assert len(simulation.results["1"]) == 20
+        simulation.run_for(10, save_results = False)
+        assert len(simulation.results["0"]) == 20
+        assert len(simulation.results["1"]) == 20
+        simulation.run_for(13, save_results = True)
+        assert len(simulation.results["0"]) == 33
+        assert len(simulation.results["1"]) == 33
+        simulation.step(save_results = False)
+        assert len(simulation.results["0"]) == 33
+        assert len(simulation.results["1"]) == 33
+        simulation.step()
+        assert len(simulation.results["0"]) == 34
+        assert len(simulation.results["1"]) == 34
+        simulation.clear_results()
+        assert not simulation.results
+
     def test_nested(self, sfg_nested):
         input0 = np.array([5, 9])
         input1 = np.array([7, 3])
         simulation = Simulation(sfg_nested, [input0, input1])
 
-        output0 = simulation.run()
-        output1 = simulation.run()
+        output0 = simulation.step()
+        output1 = simulation.step()
 
         assert output0[0] == 11405
         assert output1[0] == 4221
@@ -122,21 +164,33 @@ class TestRun:
         data_in = np.array([5, -2, 25, -6, 7, 0])
         reset   = np.array([0, 0,  0,  1,  0, 0])
         simulation = Simulation(sfg_accumulator, [data_in, reset])
-        output0 = simulation.run()
-        output1 = simulation.run()
-        output2 = simulation.run()
-        output3 = simulation.run()
-        output4 = simulation.run()
-        output5 = simulation.run()
+        output0 = simulation.step()
+        output1 = simulation.step()
+        output2 = simulation.step()
+        output3 = simulation.step()
+        output4 = simulation.step()
+        output5 = simulation.step()
         assert output0[0] == 0
         assert output1[0] == 5
         assert output2[0] == 3
         assert output3[0] == 28
         assert output4[0] == 0
         assert output5[0] == 7
+
+    def test_simple_accumulator(self, sfg_simple_accumulator):
+        data_in = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
+        simulation = Simulation(sfg_simple_accumulator, [data_in])
+        simulation.run()
+        assert list(simulation.results["0"]) == [0, 1, 3, 6, 10, 15, 21, 28, 36, 45]
         
-    def test_simple_filter(self, simple_filter):
+    def test_simple_filter(self, sfg_simple_filter):
         input0 = np.array([1, 2, 3, 4, 5])
-        simulation = Simulation(simple_filter, [input0], save_results=True)
-        output0 = [simulation.run()[0] for _ in range(len(input0))]
-        assert output0 == [0, 1.0, 2.5, 4.25, 6.125]
+        simulation = Simulation(sfg_simple_filter, [input0])
+        simulation.run_for(len(input0), save_results = True)
+        assert all(simulation.results["0"] == np.array([0, 1.0, 2.5, 4.25, 6.125]))
+
+    def test_custom_operation(self, sfg_custom_operation):
+        simulation = Simulation(sfg_custom_operation, [lambda n: n + 1])
+        simulation.run_for(5)
+        assert all(simulation.results["0"] == np.array([2, 4, 6, 8, 10]))
+        assert all(simulation.results["1"] == np.array([2, 4, 8, 16, 32]))