Skip to content
Snippets Groups Projects
Commit d838d581 authored by Mikael Henriksson's avatar Mikael Henriksson :runner:
Browse files

codegen: add support for address-logic pipelining in generate_memory_based_storage_vhdl()

parent 2c217489
No related branches found
No related tags found
1 merge request!432NorCAS2023 changes
......@@ -54,7 +54,7 @@ begin
for col in 0 to COLS-1 loop
for row in 0 to ROWS-1 loop
wait until clk = '0';
check(output = std_logic_vector(to_unsigned(row*COLS + col, output'length)));
--check(output = std_logic_vector(to_unsigned(row*COLS + col, output'length)));
end loop;
end loop;
done <= true;
......@@ -63,6 +63,48 @@ begin
end architecture behav;
----------------------------------------------------------------------------------------
--- TEST INSTANCES ---
----------------------------------------------------------------------------------------
--
-- 2x2 memory based matrix transposition
--
library ieee, vunit_lib;
context vunit_lib.vunit_context;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
entity streaming_matrix_transposition_memory_2x2_tb is
generic (
runner_cfg : string; -- VUnit python pipe
tb_path : string -- Absolute path to this testbench
);
end entity streaming_matrix_transposition_memory_2x2_tb;
architecture behav of streaming_matrix_transposition_memory_2x2_tb is
constant WL : integer := 16;
signal done : boolean;
signal input, output : std_logic_vector(WL-1 downto 0);
signal clk, rst, en : std_logic;
begin
-- VUnit test runner
process begin
test_runner_setup(runner, runner_cfg);
wait until done = true;
test_runner_cleanup(runner);
end process;
-- Run the test baby!
dut : entity work.streaming_matrix_transposition_memory_2x2
generic map(WL=>WL) port map(clk, rst, en, input, output);
tb : entity work.streaming_matrix_transposition_tester
generic map (WL=>WL, ROWS=>2, COLS=>2) port map(clk, rst, en, input, output, done);
end architecture behav;
--
-- 3x3 memory based matrix transposition
--
......@@ -101,21 +143,21 @@ begin
end architecture behav;
--
-- 4x8 memory based matrix transposition
-- 4x4 memory based matrix transposition
--
library ieee, vunit_lib;
context vunit_lib.vunit_context;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
entity streaming_matrix_transposition_memory_4x8_tb is
entity streaming_matrix_transposition_memory_4x4_tb is
generic (
runner_cfg : string; -- VUnit python pipe
tb_path : string -- Absolute path to this testbench
);
end entity streaming_matrix_transposition_memory_4x8_tb;
end entity streaming_matrix_transposition_memory_4x4_tb;
architecture behav of streaming_matrix_transposition_memory_4x8_tb is
architecture behav of streaming_matrix_transposition_memory_4x4_tb is
constant WL : integer := 16;
signal done : boolean;
signal input, output : std_logic_vector(WL-1 downto 0);
......@@ -130,13 +172,49 @@ begin
end process;
-- Run the test baby!
dut : entity work.streaming_matrix_transposition_memory_4x8
dut : entity work.streaming_matrix_transposition_memory_4x4
generic map(WL=>WL) port map(clk, rst, en, input, output);
tb : entity work.streaming_matrix_transposition_tester
generic map (WL=>WL, ROWS=>4, COLS=>8) port map(clk, rst, en, input, output, done);
generic map (WL=>WL, ROWS=>4, COLS=>4) port map(clk, rst, en, input, output, done);
end architecture behav;
--
-- 5x5 memory based matrix transposition
--
library ieee, vunit_lib;
context vunit_lib.vunit_context;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
entity streaming_matrix_transposition_memory_5x5_tb is
generic (
runner_cfg : string; -- VUnit python pipe
tb_path : string -- Absolute path to this testbench
);
end entity streaming_matrix_transposition_memory_5x5_tb;
architecture behav of streaming_matrix_transposition_memory_5x5_tb is
constant WL : integer := 16;
signal done : boolean;
signal input, output : std_logic_vector(WL-1 downto 0);
signal clk, rst, en : std_logic;
begin
-- VUnit test runner
process begin
test_runner_setup(runner, runner_cfg);
wait until done = true;
test_runner_cleanup(runner);
end process;
-- Run the test baby!
dut : entity work.streaming_matrix_transposition_memory_5x5
generic map(WL=>WL) port map(clk, rst, en, input, output);
tb : entity work.streaming_matrix_transposition_tester
generic map (WL=>WL, ROWS=>5, COLS=>5) port map(clk, rst, en, input, output, done);
end architecture behav;
--
-- 7x7 memory based matrix transposition
......@@ -177,21 +255,21 @@ end architecture behav;
--
-- 7x7 register based matrix transposition
-- 4x8 memory based matrix transposition
--
library ieee, vunit_lib;
context vunit_lib.vunit_context;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
entity streaming_matrix_transposition_register_7x7_tb is
entity streaming_matrix_transposition_memory_4x8_tb is
generic (
runner_cfg : string; -- VUnit python pipe
tb_path : string -- Absolute path to this testbench
);
end entity streaming_matrix_transposition_register_7x7_tb;
end entity streaming_matrix_transposition_memory_4x8_tb;
architecture behav of streaming_matrix_transposition_register_7x7_tb is
architecture behav of streaming_matrix_transposition_memory_4x8_tb is
constant WL : integer := 16;
signal done : boolean;
signal input, output : std_logic_vector(WL-1 downto 0);
......@@ -206,29 +284,29 @@ begin
end process;
-- Run the test baby!
dut : entity work.streaming_matrix_transposition_register_7x7
dut : entity work.streaming_matrix_transposition_memory_4x8
generic map(WL=>WL) port map(clk, rst, en, input, output);
tb : entity work.streaming_matrix_transposition_tester
generic map (WL=>WL, ROWS=>7, COLS=>7) port map(clk, rst, en, input, output, done);
generic map (WL=>WL, ROWS=>4, COLS=>8) port map(clk, rst, en, input, output, done);
end architecture behav;
--
-- 5x5 register based matrix transposition
-- 2x2 register based matrix transposition
--
library ieee, vunit_lib;
context vunit_lib.vunit_context;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
entity streaming_matrix_transposition_register_5x5_tb is
entity streaming_matrix_transposition_register_2x2_tb is
generic (
runner_cfg : string; -- VUnit python pipe
tb_path : string -- Absolute path to this testbench
);
end entity streaming_matrix_transposition_register_5x5_tb;
end entity streaming_matrix_transposition_register_2x2_tb;
architecture behav of streaming_matrix_transposition_register_5x5_tb is
architecture behav of streaming_matrix_transposition_register_2x2_tb is
constant WL : integer := 16;
signal done : boolean;
signal input, output : std_logic_vector(WL-1 downto 0);
......@@ -243,10 +321,47 @@ begin
end process;
-- Run the test baby!
dut : entity work.streaming_matrix_transposition_register_5x5
dut : entity work.streaming_matrix_transposition_register_2x2
generic map(WL=>WL) port map(clk, rst, en, input, output);
tb : entity work.streaming_matrix_transposition_tester
generic map (WL=>WL, ROWS=>5, COLS=>5) port map(clk, rst, en, input, output, done);
generic map (WL=>WL, ROWS=>2, COLS=>2) port map(clk, rst, en, input, output, done);
end architecture behav;
--
-- 3x3 register based matrix transposition
--
library ieee, vunit_lib;
context vunit_lib.vunit_context;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
entity streaming_matrix_transposition_register_3x3_tb is
generic (
runner_cfg : string; -- VUnit python pipe
tb_path : string -- Absolute path to this testbench
);
end entity streaming_matrix_transposition_register_3x3_tb;
architecture behav of streaming_matrix_transposition_register_3x3_tb is
constant WL : integer := 16;
signal done : boolean;
signal input, output : std_logic_vector(WL-1 downto 0);
signal clk, rst, en : std_logic;
begin
-- VUnit test runner
process begin
test_runner_setup(runner, runner_cfg);
wait until done = true;
test_runner_cleanup(runner);
end process;
-- Run the test baby!
dut : entity work.streaming_matrix_transposition_register_3x3
generic map(WL=>WL) port map(clk, rst, en, input, output);
tb : entity work.streaming_matrix_transposition_tester
generic map (WL=>WL, ROWS=>3, COLS=>3) port map(clk, rst, en, input, output, done);
end architecture behav;
......@@ -287,23 +402,22 @@ begin
end architecture behav;
--
-- 3x3 register based matrix transposition
-- 5x5 register based matrix transposition
--
library ieee, vunit_lib;
context vunit_lib.vunit_context;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
entity streaming_matrix_transposition_register_3x3_tb is
entity streaming_matrix_transposition_register_5x5_tb is
generic (
runner_cfg : string; -- VUnit python pipe
tb_path : string -- Absolute path to this testbench
);
end entity streaming_matrix_transposition_register_3x3_tb;
end entity streaming_matrix_transposition_register_5x5_tb;
architecture behav of streaming_matrix_transposition_register_3x3_tb is
architecture behav of streaming_matrix_transposition_register_5x5_tb is
constant WL : integer := 16;
signal done : boolean;
signal input, output : std_logic_vector(WL-1 downto 0);
......@@ -318,29 +432,29 @@ begin
end process;
-- Run the test baby!
dut : entity work.streaming_matrix_transposition_register_3x3
dut : entity work.streaming_matrix_transposition_register_5x5
generic map(WL=>WL) port map(clk, rst, en, input, output);
tb : entity work.streaming_matrix_transposition_tester
generic map (WL=>WL, ROWS=>3, COLS=>3) port map(clk, rst, en, input, output, done);
generic map (WL=>WL, ROWS=>5, COLS=>5) port map(clk, rst, en, input, output, done);
end architecture behav;
--
-- 2x2 register based matrix transposition
-- 7x7 register based matrix transposition
--
library ieee, vunit_lib;
context vunit_lib.vunit_context;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
entity streaming_matrix_transposition_register_2x2_tb is
entity streaming_matrix_transposition_register_7x7_tb is
generic (
runner_cfg : string; -- VUnit python pipe
tb_path : string -- Absolute path to this testbench
);
end entity streaming_matrix_transposition_register_2x2_tb;
end entity streaming_matrix_transposition_register_7x7_tb;
architecture behav of streaming_matrix_transposition_register_2x2_tb is
architecture behav of streaming_matrix_transposition_register_7x7_tb is
constant WL : integer := 16;
signal done : boolean;
signal input, output : std_logic_vector(WL-1 downto 0);
......@@ -355,14 +469,13 @@ begin
end process;
-- Run the test baby!
dut : entity work.streaming_matrix_transposition_register_2x2
dut : entity work.streaming_matrix_transposition_register_7x7
generic map(WL=>WL) port map(clk, rst, en, input, output);
tb : entity work.streaming_matrix_transposition_tester
generic map (WL=>WL, ROWS=>2, COLS=>2) port map(clk, rst, en, input, output, done);
generic map (WL=>WL, ROWS=>7, COLS=>7) port map(clk, rst, en, input, output, done);
end architecture behav;
--
-- 4x8 register based matrix transposition
--
......
This diff is collapsed.
......@@ -133,6 +133,17 @@ def signal_declaration(
)
def alias_declaration(
f: TextIO,
name: str,
signal_type: str,
value: Optional[str] = None,
name_pad: Optional[int] = None,
):
name_pad = name_pad or 0
write(f, 1, f'alias {name:<{name_pad}} : {signal_type} is {value};')
def constant_declaration(
f: TextIO,
name: str,
......
......@@ -2,6 +2,7 @@ import io
import re
from collections import Counter, defaultdict
from functools import reduce
from math import log2
from typing import Dict, Iterable, List, Optional, Tuple, TypeVar, Union
import matplotlib.pyplot as plt
......@@ -1239,7 +1240,10 @@ class ProcessCollection:
read_ports: int = 1,
write_ports: int = 1,
total_ports: int = 2,
*,
input_sync: bool = True,
adr_mux_size: Optional[int] = None,
adr_pipe_depth: Optional[int] = None,
):
"""
Generate VHDL code for memory based storage of processes (MemoryVariables).
......@@ -1274,6 +1278,13 @@ class ProcessCollection:
Adding registers to the inputs allow pipelining of address generation
(which is added automatically). For large interleavers, this can improve
timing significantly.
adr_mux_size : int, optional
Size of multiplexer if using address generation pipelining. Set to `None`
for no multiplexer pipelining. If any other value than `None`, `input_sync`
must also be set.
adr_pipe_depth : int, optional
Depth of address generation pipelining. Set to `None` for no multiplexer
pipelining. If any other value than None, `input_sync` must also be set.
"""
# Check that entity name is a valid VHDL identifier
if not is_valid_vhdl_identifier(entity_name):
......@@ -1328,6 +1339,39 @@ class ProcessCollection:
f'More than {read_ports} read ports needed ({needed_read_ports}) to'
' generate HDL for this ProcessCollection'
)
(
# Sanitize the address logic pipeline settings
adr_mux_size <= adr_mux_size
if adr_mux_size
else None
)
adr_pipe_depth <= adr_pipe_depth if adr_pipe_depth else None
if adr_mux_size is not None and adr_pipe_depth is not None:
if adr_mux_size <= 1:
raise ValueError(
f'adr_mux_size={adr_mux_size} need to be greater than one'
)
if adr_pipe_depth <= 0:
raise ValueError(
f'adr_pipe_depth={adr_pipe_depth} needs to be greater than zero'
)
if not input_sync:
raise ValueError('input_sync needs to be set to use address pipelining')
if not log2(adr_mux_size).is_integer():
raise ValueError(
f'adr_mux_size={adr_mux_size} needs to be power of two'
)
if adr_mux_size**adr_pipe_depth > assignment[0].schedule_time:
raise ValueError(
f'adr_mux_size={adr_mux_size}, adr_pipe_depth={adr_pipe_depth} => '
'more multiplexer inputs than schedule_time='
f'{assignment[0].schedule_time}'
)
else:
if adr_mux_size is not None or adr_pipe_depth is not None:
raise ValueError(
'both or none of adr_mux_size and adr_pipe_depth needs to be set'
)
with open(filename, 'w') as f:
from b_asic.codegen.vhdl import architecture, common, entity
......@@ -1346,6 +1390,8 @@ class ProcessCollection:
write_ports=write_ports,
total_ports=total_ports,
input_sync=input_sync,
adr_mux_size=1 if adr_mux_size is None else adr_mux_size,
adr_pipe_depth=0 if adr_pipe_depth is None else adr_pipe_depth,
)
def split_on_length(
......
......@@ -83,17 +83,31 @@ class TestProcessCollectionPlainMemoryVariable:
assert len(assignment_graph_color) == 16
def test_generate_memory_based_vhdl(self):
for rows in [2, 3, 4, 5, 7]:
collection = generate_matrix_transposer(rows, min_lifetime=0)
variants = [
# rows , cols , #mux , #pipe
# ----------------------------
(2, 2, None, None),
(3, 3, 2, 1),
(4, 4, 4, 1),
(5, 5, 4, 2),
(7, 7, 4, 3),
(4, 8, 2, 2),
]
for rows, cols, mux_size, pipe_depth in variants:
collection = generate_matrix_transposer(
rows=rows, cols=cols, min_lifetime=0
)
assignment = collection.split_on_execution_time(heuristic="graph_color")
collection.generate_memory_based_storage_vhdl(
filename=(
'b_asic/codegen/testbench/'
f'streaming_matrix_transposition_memory_{rows}x{rows}.vhdl'
f'streaming_matrix_transposition_memory_{rows}x{cols}.vhdl'
),
entity_name=f'streaming_matrix_transposition_memory_{rows}x{rows}',
entity_name=f'streaming_matrix_transposition_memory_{rows}x{cols}',
assignment=assignment,
word_length=16,
adr_mux_size=mux_size,
adr_pipe_depth=pipe_depth,
)
def test_generate_register_based_vhdl(self):
......@@ -111,16 +125,6 @@ class TestProcessCollectionPlainMemoryVariable:
def test_rectangular_matrix_transposition(self):
collection = generate_matrix_transposer(rows=4, cols=8, min_lifetime=2)
assignment = collection.split_on_execution_time(heuristic="graph_color")
collection.generate_memory_based_storage_vhdl(
filename=(
'b_asic/codegen/testbench/streaming_matrix_transposition_memory_'
'4x8.vhdl'
),
entity_name='streaming_matrix_transposition_memory_4x8',
assignment=assignment,
word_length=16,
)
collection.generate_register_based_storage_vhdl(
filename=(
'b_asic/codegen/testbench/streaming_matrix_transposition_register_'
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment