潮声隔雨深 · 2021年12月07日

delayline设计demo

写在前面

前方高能,建议先把上篇文章《一种数字delayline的设计方案》学透,再来看下面的内容。

本文给出了一个delayline的设计demo,包括RTL及其check脚本。本文涉及的代码,脚本和报告,都可以到公众号后台回复“delayline”获取。

1.端口说明

图片.png

2.电路结构说明

为了提高占空比质量,将delayline分成两级,每一级后面加个反相器:

图片.png

每一级里有14个corase档和4个fine档,一个补偿mux:

图片.png

每个fine档里是一个MUX:

图片.png

每个corse档里是5个MUX:

图片.png

3.RTL实例

module    delay_line
(
    input        [17:0]    delay_ctrl,
    input                  i_pre_delay,
    output                 o_post_delay
);

delay_line_half U_delay_line_state0 (
     .delay_ctrl(delay_ctrl),
     .i_pre_delay(i_pre_delay),
     .o_post_delay(post_delay_state0)
);

`ifdef DLY_SIM
SIM_INVCKLBRMX10 U_cell_inv0 (.I(post_delay_state0), .O(post_delay_state0_inv));
`else 
INVCKLBRMX10 U_cell_inv0 (.I(post_delay_state0), .O(post_delay_state0_inv));
`endif

delay_line_half U_delay_line_state1 (
     .delay_ctrl(delay_ctrl),
     .i_pre_delay(post_delay_state0_inv),
     .o_post_delay(post_delay_state1)
);
`ifdef DLY_SIM
SIM_INVCKLBRMX10 U_cell_inv1 (.I(post_delay_state1), .O(o_post_delay));
`else
INVCKLBRMX10 U_cell_inv1 (.I(post_delay_state1), .O(o_post_delay));
`endif
endmodule

module    delay_line_half
(
    input        [17:0]    delay_ctrl,
    input                  i_pre_delay,
    output                 o_post_delay
);

//coarse delay part
coarse_delay U_dly13_coarse     (.sel(delay_ctrl[17]), .sig_in1(i_pre_delay), .sig_in2(i_pre_delay) , .sig_out(cdly13));
coarse_delay U_dly12_coarse     (.sel(delay_ctrl[16]), .sig_in1(i_pre_delay), .sig_in2(cdly13),       .sig_out(cdly12));
coarse_delay U_dly11_coarse     (.sel(delay_ctrl[15]), .sig_in1(i_pre_delay), .sig_in2(cdly12),       .sig_out(cdly11));
coarse_delay U_dly10_coarse     (.sel(delay_ctrl[14]), .sig_in1(i_pre_delay), .sig_in2(cdly11),       .sig_out(cdly10));
coarse_delay U_dly09_coarse     (.sel(delay_ctrl[13]), .sig_in1(i_pre_delay), .sig_in2(cdly10),       .sig_out(cdly09));
coarse_delay U_dly08_coarse     (.sel(delay_ctrl[12]), .sig_in1(i_pre_delay), .sig_in2(cdly09),       .sig_out(cdly08));
coarse_delay U_dly07_coarse     (.sel(delay_ctrl[11]), .sig_in1(i_pre_delay), .sig_in2(cdly08),       .sig_out(cdly07));
coarse_delay U_dly06_coarse     (.sel(delay_ctrl[10]), .sig_in1(i_pre_delay), .sig_in2(cdly07),       .sig_out(cdly06));
coarse_delay U_dly05_coarse     (.sel(delay_ctrl[09]), .sig_in1(i_pre_delay), .sig_in2(cdly06),       .sig_out(cdly05));
coarse_delay U_dly04_coarse     (.sel(delay_ctrl[08]), .sig_in1(i_pre_delay), .sig_in2(cdly05),       .sig_out(cdly04));
coarse_delay U_dly03_coarse     (.sel(delay_ctrl[07]), .sig_in1(i_pre_delay), .sig_in2(cdly04),       .sig_out(cdly03));
coarse_delay U_dly02_coarse     (.sel(delay_ctrl[06]), .sig_in1(i_pre_delay), .sig_in2(cdly03),       .sig_out(cdly02));
coarse_delay U_dly01_coarse     (.sel(delay_ctrl[05]), .sig_in1(i_pre_delay), .sig_in2(cdly02),       .sig_out(cdly01));
coarse_delay U_dly00_coarse     (.sel(delay_ctrl[4]), .sig_in1(i_pre_delay), .sig_in2(cdly01),       .sig_out(pre_fine));



//fine delay part
`ifdef DLY_SIM
SIM_MUX2CKLBRMX8   U_cell_fdt0   (.A(pre_fine), .B(1'b0), .S(1'b0),  .O(pre_fine_d0));
`else
MUX2CKLBRMX8   U_cell_fdt0   (.A(pre_fine), .B(1'b0), .S(1'b0),  .O(pre_fine_d0));
`endif
fine_delay U_dly03_fine (.sel(delay_ctrl[03]), .sig_in1(pre_fine), .sig_in2(pre_fine_d0),   .sig_out(fdly03));
fine_delay U_dly02_fine (.sel(delay_ctrl[02]), .sig_in1(pre_fine), .sig_in2(fdly03),   .sig_out(fdly02));
fine_delay U_dly01_fine (.sel(delay_ctrl[01]), .sig_in1(pre_fine), .sig_in2(fdly02),   .sig_out(fdly01));
fine_delay U_dly00_fine (.sel(delay_ctrl[00]), .sig_in1(pre_fine), .sig_in2(fdly01),   .sig_out(o_post_delay));


endmodule

module coarse_delay
(
    input        sel,
    input        sig_in1,//original sig in, not be delayed
    input        sig_in2,//delayed sig in
    output       sig_out
);
`ifdef DLY_SIM
SIM_MUX2CKLBRMX2   U_cell_dt4 (.A(1'b0),    .B(sig_in2),  .S(1'b1), .O(dly4));
SIM_MUX2CKLBRMX2   U_cell_dt3 (.A(1'b0),    .B(dly4),     .S(1'b1), .O(dly3));
SIM_MUX2CKLBRMX2   U_cell_dt2 (.A(1'b0),    .B(dly3),     .S(1'b1), .O(dly2));
SIM_MUX2CKLBRMX2   U_cell_dt1 (.A(1'b0),    .B(dly2),     .S(1'b1), .O(dly1));
SIM_MUX2CKLBRMX8   U_cell_dt0 (.A(sig_in1), .B(dly1),  .S(sel),  .O(sig_out));
`else 
MUX2CKLBRMX2   U_cell_dt4 (.A(1'b0),    .B(sig_in2),  .S(1'b1), .O(dly4));
MUX2CKLBRMX2   U_cell_dt3 (.A(1'b0),    .B(dly4),     .S(1'b1), .O(dly3));
MUX2CKLBRMX2   U_cell_dt2 (.A(1'b0),    .B(dly3),     .S(1'b1), .O(dly2));
MUX2CKLBRMX2   U_cell_dt1 (.A(1'b0),    .B(dly2),     .S(1'b1), .O(dly1));
MUX2CKLBRMX8   U_cell_dt0 (.A(sig_in1), .B(dly1),  .S(sel),  .O(sig_out));
`endif

endmodule

module fine_delay
(
    input        sel,
    input        sig_in1,//original sig in, not be delayed
    input        sig_in2,//delayed sig in
    output       sig_out
);
`ifdef DLY_SIM
SIM_MUX2CKLBRMX8   U_cell_dt0 (.A(sig_in1), .B(sig_in2), .S(sel), .O(sig_out));
`else 
MUX2CKLBRMX8   U_cell_dt0 (.A(sig_in1), .B(sig_in2), .S(sel), .O(sig_out));
`endif

endmodule


//only for simulation
`ifdef DLY_SIM

    module SIM_MUX2CKLBRMX2 (A,B,S,O);
      input A;
      input B;
      input S;
      output O;
        assign #0.12 O=S?B:A; //delay 0.12ns
    endmodule

    module SIM_MUX2CKLBRMX8 (A,B,S,O);
      input A;
      input B;
      input S;
      output O;
        assign #0.11 O=S?B:A; //delay 0.11ns
    endmodule

    module SIM_INVCKLBRMX10 (I,O);
      input I;
      output O;
        assign #0.06 O=~I; //delay 0.06ns
    endmodule

`endif  

看完代码,请思考以下问题:

  • 1.为什么每个corse档里是5个MUX?
  • 2.为什么fine档的MUX和coarse的最后一个MUX的驱动比其它MUX的驱动大?
  • 3.为什么反相器的驱动比较大?

4.check脚本说明

要去check自己撸出来的delayline是否正确,无非就是想办法确认其每一档的delay是否符合预期,最大量程是多少。下面给出一个tcl脚本,可用于dc和pt中。


set corse_bit_num 14  
set fine_bit_num 4 


proc get_fminval {filename} {
    set file_handle [open "$filename"]
      set line_num 0
      while {[gets $file_handle cur_value] >= 0} {
          if {$line_num==0} {
              set min_value $cur_value
          } elseif {$min_value>$cur_value} {
              set min_value $cur_value
          }
         incr line_num
      }
    close $file_handle   
    return $min_value
}


proc get_fmaxval {filename} {
    set file_handle [open "$filename"]
      set line_num 0
      while {[gets $file_handle cur_value] >= 0} {
          if {$line_num==0} {
              set max_value $cur_value
          } elseif {$max_value<$cur_value} {
              set max_value $cur_value
          }
         incr line_num
      }
    close $file_handle   
    return $max_value
}


proc get_delayline_info {corse_bit_num fine_bit_num {rc_corner CMAX} {v_corner 0.99} {t_corner -40}} {
        set_case_analysis 0 delay_ctrl*
    report_timing -from i_pre_delay -to o_post_delay  > ./rpt/${rc_corner}_${v_corner}_${t_corner}_tmp.rpt

    echo "#delay_ctrl=[expr $corse_bit_num + $fine_bit_num]'h[string repeat 0 [expr $corse_bit_num + $fine_bit_num]](default)" > ./rpt/${rc_corner}_${v_corner}_${t_corner}_delay_value.rpt

    exec awk  "/data arrival time/{print \$4;exit}"  ./rpt/${rc_corner}_${v_corner}_${t_corner}_tmp.rpt >> ./rpt/${rc_corner}_${v_corner}_${t_corner}_delay_value.rpt;#default value
    redirect -variable a {exec awk  "/data arrival time/{print \$4;exit}"  ./rpt/${rc_corner}_${v_corner}_${t_corner}_tmp.rpt}
    for {set i 0} {$i < $fine_bit_num} {incr i} {
        echo "#delay_ctrl=[expr $corse_bit_num + $fine_bit_num]'h[string repeat 0 [expr $corse_bit_num +$fine_bit_num - $i -1]][string repeat 1 [expr $i+1]]" >> ./rpt/${rc_corner}_${v_corner}_${t_corner}_delay_value.rpt

        set_case_analysis 1 delay_ctrl[$i]
        report_timing -from i_pre_delay -to o_post_delay  > ./rpt/${rc_corner}_${v_corner}_${t_corner}_tmp.rpt
        exec awk  "/data arrival time/{print \$4;exit}" ./rpt/${rc_corner}_${v_corner}_${t_corner}_tmp.rpt >> ./rpt/${rc_corner}_${v_corner}_${t_corner}_delay_value.rpt
        redirect -variable b {exec awk  "/data arrival time/{print \$4;exit}"  ./rpt/${rc_corner}_${v_corner}_${t_corner}_tmp.rpt}
        if {$i==0} {
            echo [expr $b - $a] > ./rpt/${rc_corner}_${v_corner}_${t_corner}_delay_step.rpt
        } else {
            echo [expr $b - $a] >> ./rpt/${rc_corner}_${v_corner}_${t_corner}_delay_step.rpt
        }
        set a $b

    }

    set_case_analysis 0 delay_ctrl*

    for {set i 0} {$i < $corse_bit_num} {incr i} {

        for {set j 0} {$j < $fine_bit_num} {incr j} {
            set_case_analysis 0 delay_ctrl[$j]
        }

        echo "#delay_ctrl=[expr $corse_bit_num + $fine_bit_num]'h[string repeat 0 [expr $corse_bit_num - $i-1]][string repeat 1 [expr $i+1]][string repeat 0 $fine_bit_num]" >> ./rpt/${rc_corner}_${v_corner}_${t_corner}_delay_value.rpt
        set_case_analysis 1 delay_ctrl[[expr $i+$fine_bit_num]]
        report_timing -from i_pre_delay -to o_post_delay  > ./rpt/${rc_corner}_${v_corner}_${t_corner}_tmp.rpt
        exec awk  "/data arrival time/{print \$4;exit}"  ./rpt/${rc_corner}_${v_corner}_${t_corner}_tmp.rpt >> ./rpt/${rc_corner}_${v_corner}_${t_corner}_delay_value.rpt;#default value
        redirect -variable b {exec awk  "/data arrival time/{print \$4;exit}"  ./rpt/${rc_corner}_${v_corner}_${t_corner}_tmp.rpt}
        echo [expr $b - $a] >> ./rpt/${rc_corner}_${v_corner}_${t_corner}_delay_step.rpt
        set a $b

        for {set j 0} {$j < $fine_bit_num} {incr j} {
            echo "#delay_ctrl=[expr $corse_bit_num + $fine_bit_num]'h[string repeat 0 [expr $corse_bit_num - $i -1]][string repeat 1 [expr $i + 1]][string repeat 0 [expr $fine_bit_num - $j -1]][string repeat 1 [expr $j+1]]" >> ./rpt/${rc_corner}_${v_corner}_${t_corner}_delay_value.rpt
            set_case_analysis 1 delay_ctrl[$j]
            report_timing -from i_pre_delay -to o_post_delay  > ./rpt/${rc_corner}_${v_corner}_${t_corner}_tmp.rpt
            exec awk  "/data arrival time/{print \$4;exit}" ./rpt/${rc_corner}_${v_corner}_${t_corner}_tmp.rpt >> ./rpt/${rc_corner}_${v_corner}_${t_corner}_delay_value.rpt
            redirect -variable b {exec awk  "/data arrival time/{print \$4;exit}"  ./rpt/${rc_corner}_${v_corner}_${t_corner}_tmp.rpt}
            echo [expr $b - $a] >> ./rpt/${rc_corner}_${v_corner}_${t_corner}_delay_step.rpt
            set a $b
        }
    }
    redirect -variable min_step {get_fminval ./rpt/${rc_corner}_${v_corner}_${t_corner}_delay_step.rpt}
    redirect -variable max_step {get_fmaxval ./rpt/${rc_corner}_${v_corner}_${t_corner}_delay_step.rpt}
    echo "max_step=$max_step" >> ./rpt/${rc_corner}_${v_corner}_${t_corner}_delay_step.rpt
    echo "min_step=$min_step" >> ./rpt/${rc_corner}_${v_corner}_${t_corner}_delay_step.rpt
    redirect -variable measuring_range {get_fmaxval ./rpt/${rc_corner}_${v_corner}_${t_corner}_delay_value.rpt}
    echo "measuring range=${measuring_range}" >> ./rpt/${rc_corner}_${v_corner}_${t_corner}_delay_value.rpt
file delete ./rpt/${rc_corner}_${v_corner}_${t_corner}_tmp.rpt

}
get_delayline_info $corse_bit_num $fine_bit_num $rc_corner $v_corner $t_corner

脚本可以输出两个报告,如下图。左边列出了每两个档位之间的step,右边列出了每档的delay以及最大量程。从报告里可以发现,上篇文章中提到的结构有一个缺陷,当最高的一个corse档打开时,量程的线性关系(这里的线性关系指的是相邻两个只相差在0.02ns左右)被打破,这主要是因为最后最高的corse前面没有加补偿MUX引起的。

图片.png

5.仿真

通过PT报出路径的delay,并根据实际值修改下面的红框位置的值即可。

图片.png

6.通过hardcore保证P&R的一致性

我们知道,同一个RTL在每次综合和P&R之后,结果都有可能不一样。那么现在RTL写完了,要如何保证delayline的值在每次P&R之后是固定的?又如何减小每一档之间的误差呢?

我们可以考虑它delayline做成hardcore,通过调整P&R,来保证每个cell之间的delay基本一致(这种小电路,基本工具自动做就可以了,无需手动调整)。至于什么是hardcore,怎么做hardcore,这里就不罗嗦了, 有疑问的去请教后端工程师吧。


END

作者:七点班车
原文链接:IC小迷弟

推荐阅读

更多IC设计技术干货请关注IC设计技术专栏。
推荐阅读
关注数
11372
内容数
1224
主要交流IC以及SoC设计流程相关的技术和知识
目录
极术微信服务号
关注极术微信号
实时接收点赞提醒和评论通知
安谋科技学堂公众号
关注安谋科技学堂
实时获取安谋科技及 Arm 教学资源
安谋科技招聘公众号
关注安谋科技招聘
实时获取安谋科技中国职位信息