From d83254c92c4d6dbf43b6131d6e051ed55834163c Mon Sep 17 00:00:00 2001
From: Mikael Henriksson <mike.zx@hotmail.com>
Date: Thu, 24 Aug 2023 17:28:19 +0200
Subject: [PATCH] codegen: add support for generic enable-delay in
 transposition testbench

---
 .../streaming_matrix_transposition_tb.vhdl    | 20 ++++++++-----
 b_asic/codegen/vhdl/architecture.py           | 30 ++++---------------
 2 files changed, 19 insertions(+), 31 deletions(-)

diff --git a/b_asic/codegen/testbench/streaming_matrix_transposition_tb.vhdl b/b_asic/codegen/testbench/streaming_matrix_transposition_tb.vhdl
index d14338a8..5fe48325 100644
--- a/b_asic/codegen/testbench/streaming_matrix_transposition_tb.vhdl
+++ b/b_asic/codegen/testbench/streaming_matrix_transposition_tb.vhdl
@@ -10,9 +10,10 @@ use ieee.numeric_std.all;
 
 entity streaming_matrix_transposition_tester is
     generic(
-        WL          : integer;
-        ROWS        : integer;
-        COLS        : integer
+        WL              : integer;
+        ROWS            : integer;
+        COLS            : integer;
+        ENABLE_DEL_CC   : integer := 0  -- CCs after enable to start feeding the circuit
     );
     port(
         clk, rst, en : out std_logic;
@@ -40,6 +41,7 @@ begin
     -- Input generation
     input_gen_proc: process begin
         wait until en = '1';
+        wait for ENABLE_DEL_CC*10 ns;
         for i in 0 to 4*ROWS*COLS-1 loop
             wait until clk = '0';
             input <= std_logic_vector(to_unsigned(i, input'length));
@@ -189,7 +191,8 @@ begin
     dut : entity work.streaming_matrix_transposition_memory_4x4
         generic map(WL=>WL) port map(clk, rst, en, input, output);
     tb : entity work.streaming_matrix_transposition_tester
-        generic map (WL=>WL, ROWS=>4, COLS=>4) port map(clk, rst, en, input, output, done);
+        generic map (WL=>WL, ROWS=>4, COLS=>4, ENABLE_DEL_CC=>1)
+        port map(clk, rst, en, input, output, done);
 
 end architecture behav;
 
@@ -226,7 +229,8 @@ begin
     dut : entity work.streaming_matrix_transposition_memory_5x5
         generic map(WL=>WL) port map(clk, rst, en, input, output);
     tb : entity work.streaming_matrix_transposition_tester
-        generic map (WL=>WL, ROWS=>5, COLS=>5) port map(clk, rst, en, input, output, done);
+        generic map (WL=>WL, ROWS=>5, COLS=>5, ENABLE_DEL_CC=>2)
+        port map(clk, rst, en, input, output, done);
 
 end architecture behav;
 
@@ -263,7 +267,8 @@ begin
     dut : entity work.streaming_matrix_transposition_memory_7x7
         generic map(WL=>WL) port map(clk, rst, en, input, output);
     tb : entity work.streaming_matrix_transposition_tester
-        generic map (WL=>WL, ROWS=>7, COLS=>7) port map(clk, rst, en, input, output, done);
+        generic map (WL=>WL, ROWS=>7, COLS=>7, ENABLE_DEL_CC=>3)
+        port map(clk, rst, en, input, output, done);
 
 end architecture behav;
 
@@ -301,7 +306,8 @@ begin
     dut : entity work.streaming_matrix_transposition_memory_4x8
         generic map(WL=>WL) port map(clk, rst, en, input, output);
     tb : entity work.streaming_matrix_transposition_tester
-        generic map (WL=>WL, ROWS=>4, COLS=>8) port map(clk, rst, en, input, output, done);
+        generic map (WL=>WL, ROWS=>4, COLS=>8, ENABLE_DEL_CC=>2)
+        port map(clk, rst, en, input, output, done);
 
 end architecture behav;
 
diff --git a/b_asic/codegen/vhdl/architecture.py b/b_asic/codegen/vhdl/architecture.py
index ce134ed0..880b8b01 100644
--- a/b_asic/codegen/vhdl/architecture.py
+++ b/b_asic/codegen/vhdl/architecture.py
@@ -143,7 +143,7 @@ def memory_based_storage(
             f, f'read_port_{i}', 'std_logic_vector(MEM_WL-1 downto 0)', name_pad=18
         )
         common.signal_declaration(
-            f, f'read_adr_{i}', f'integer range 0 to {schedule_time}-1', name_pad=18
+            f, f'read_adr_{i}', 'integer range 0 to MEM_DEPTH-1', name_pad=18
         )
         common.signal_declaration(f, f'read_en_{i}', 'std_logic', name_pad=18)
     for i in range(write_ports):
@@ -151,7 +151,7 @@ def memory_based_storage(
             f, f'write_port_{i}', 'std_logic_vector(MEM_WL-1 downto 0)', name_pad=18
         )
         common.signal_declaration(
-            f, f'write_adr_{i}', f'integer range 0 to {schedule_time}-1', name_pad=18
+            f, f'write_adr_{i}', 'integer range 0 to MEM_DEPTH-1', name_pad=18
         )
         common.signal_declaration(f, f'write_en_{i}', 'std_logic', name_pad=18)
 
@@ -163,7 +163,7 @@ def memory_based_storage(
                 common.signal_declaration(
                     f,
                     f'write_adr_{write_port_idx}_{depth}_{rom}',
-                    signal_type=f'integer range 0 to {schedule_time}-1',
+                    signal_type='integer range 0 to MEM_DEPTH-1',
                     name_pad=18,
                 )
     for write_port_idx in range(write_ports):
@@ -181,7 +181,7 @@ def memory_based_storage(
                 common.signal_declaration(
                     f,
                     f'read_adr_{read_port_idx}_{depth}_{rom}',
-                    signal_type=f'integer range 0 to {schedule_time}-1',
+                    signal_type='integer range 0 to MEM_DEPTH-1',
                     name_pad=18,
                 )
 
@@ -192,13 +192,6 @@ def memory_based_storage(
             common.signal_declaration(
                 f, f'p_{i}_in_sync', 'std_logic_vector(WL-1 downto 0)', name_pad=18
             )
-            for pipe_idx in range(adr_pipe_depth):
-                common.signal_declaration(
-                    f,
-                    f'p_{i}_{pipe_idx}',
-                    'std_logic_vector(WL-1 downto 0)',
-                    name_pad=18,
-                )
 
     #
     # Architecture body begin
@@ -245,11 +238,6 @@ def memory_based_storage(
         )
         for i in range(read_ports):
             write(f, 3, f'p_{i}_in_sync <= p_{i}_in;')
-            for pipe_idx in range(adr_pipe_depth):
-                if pipe_idx == 0:
-                    write(f, 3, f'p_{i}_{pipe_idx} <= p_{i}_in_sync;')
-                else:
-                    write(f, 3, f'p_{i}_{pipe_idx} <= p_{i}_{pipe_idx-1};')
         common.synchronous_process_epilogue(
             f=f,
             name='input_sync_proc',
@@ -275,10 +263,7 @@ def memory_based_storage(
     write(f, 1, f'write_adr_0 <= write_adr_0_{adr_pipe_depth}_0;')
     write(f, 1, f'write_en_0 <= write_en_0_{adr_pipe_depth}_0;')
     if input_sync:
-        if adr_pipe_depth == 0:
-            write(f, 1, 'write_port_0 <= p_0_in_sync;')
-        else:
-            write(f, 1, f'write_port_0 <= p_0_{adr_pipe_depth-1};')
+        write(f, 1, 'write_port_0 <= p_0_in_sync;')
     else:
         write(f, 1, 'write_port_0 <= p_0_in;')
 
@@ -300,10 +285,7 @@ def memory_based_storage(
                 write(
                     f,
                     4,
-                    (
-                        f'when {write_time}+{adr_pipe_depth} => '
-                        f'p_0_out <= p_0_{adr_pipe_depth-1};'
-                    ),
+                    f'when {write_time}+{adr_pipe_depth} => p_0_out <= p_0_in_sync;',
                 )
             else:
                 write(f, 4, f'when {write_time} => p_0_out <= p_0_in_sync;')
-- 
GitLab