写在前面
前方高能,建议先把上篇文章《一种数字delayline的设计方案》学透,再来看下面的内容。
本文给出了一个delayline的设计demo,包括RTL及其check脚本。本文涉及的代码,脚本和报告,都可以到公众号后台回复“delayline”获取。
1.端口说明
2.电路结构说明
为了提高占空比质量,将delayline分成两级,每一级后面加个反相器:
每一级里有14个corase档和4个fine档,一个补偿mux:
每个fine档里是一个MUX:
每个corse档里是5个MUX:
3.RTL实例
module delay_line
(
input [17:0] delay_ctrl,
input i_pre_delay,
output o_post_delay
);
delay_line_half U_delay_line_state0 (
.delay_ctrl(delay_ctrl),
.i_pre_delay(i_pre_delay),
.o_post_delay(post_delay_state0)
);
`ifdef DLY_SIM
SIM_INVCKLBRMX10 U_cell_inv0 (.I(post_delay_state0), .O(post_delay_state0_inv));
`else
INVCKLBRMX10 U_cell_inv0 (.I(post_delay_state0), .O(post_delay_state0_inv));
`endif
delay_line_half U_delay_line_state1 (
.delay_ctrl(delay_ctrl),
.i_pre_delay(post_delay_state0_inv),
.o_post_delay(post_delay_state1)
);
`ifdef DLY_SIM
SIM_INVCKLBRMX10 U_cell_inv1 (.I(post_delay_state1), .O(o_post_delay));
`else
INVCKLBRMX10 U_cell_inv1 (.I(post_delay_state1), .O(o_post_delay));
`endif
endmodule
module delay_line_half
(
input [17:0] delay_ctrl,
input i_pre_delay,
output o_post_delay
);
//coarse delay part
coarse_delay U_dly13_coarse (.sel(delay_ctrl[17]), .sig_in1(i_pre_delay), .sig_in2(i_pre_delay) , .sig_out(cdly13));
coarse_delay U_dly12_coarse (.sel(delay_ctrl[16]), .sig_in1(i_pre_delay), .sig_in2(cdly13), .sig_out(cdly12));
coarse_delay U_dly11_coarse (.sel(delay_ctrl[15]), .sig_in1(i_pre_delay), .sig_in2(cdly12), .sig_out(cdly11));
coarse_delay U_dly10_coarse (.sel(delay_ctrl[14]), .sig_in1(i_pre_delay), .sig_in2(cdly11), .sig_out(cdly10));
coarse_delay U_dly09_coarse (.sel(delay_ctrl[13]), .sig_in1(i_pre_delay), .sig_in2(cdly10), .sig_out(cdly09));
coarse_delay U_dly08_coarse (.sel(delay_ctrl[12]), .sig_in1(i_pre_delay), .sig_in2(cdly09), .sig_out(cdly08));
coarse_delay U_dly07_coarse (.sel(delay_ctrl[11]), .sig_in1(i_pre_delay), .sig_in2(cdly08), .sig_out(cdly07));
coarse_delay U_dly06_coarse (.sel(delay_ctrl[10]), .sig_in1(i_pre_delay), .sig_in2(cdly07), .sig_out(cdly06));
coarse_delay U_dly05_coarse (.sel(delay_ctrl[09]), .sig_in1(i_pre_delay), .sig_in2(cdly06), .sig_out(cdly05));
coarse_delay U_dly04_coarse (.sel(delay_ctrl[08]), .sig_in1(i_pre_delay), .sig_in2(cdly05), .sig_out(cdly04));
coarse_delay U_dly03_coarse (.sel(delay_ctrl[07]), .sig_in1(i_pre_delay), .sig_in2(cdly04), .sig_out(cdly03));
coarse_delay U_dly02_coarse (.sel(delay_ctrl[06]), .sig_in1(i_pre_delay), .sig_in2(cdly03), .sig_out(cdly02));
coarse_delay U_dly01_coarse (.sel(delay_ctrl[05]), .sig_in1(i_pre_delay), .sig_in2(cdly02), .sig_out(cdly01));
coarse_delay U_dly00_coarse (.sel(delay_ctrl[4]), .sig_in1(i_pre_delay), .sig_in2(cdly01), .sig_out(pre_fine));
//fine delay part
`ifdef DLY_SIM
SIM_MUX2CKLBRMX8 U_cell_fdt0 (.A(pre_fine), .B(1'b0), .S(1'b0), .O(pre_fine_d0));
`else
MUX2CKLBRMX8 U_cell_fdt0 (.A(pre_fine), .B(1'b0), .S(1'b0), .O(pre_fine_d0));
`endif
fine_delay U_dly03_fine (.sel(delay_ctrl[03]), .sig_in1(pre_fine), .sig_in2(pre_fine_d0), .sig_out(fdly03));
fine_delay U_dly02_fine (.sel(delay_ctrl[02]), .sig_in1(pre_fine), .sig_in2(fdly03), .sig_out(fdly02));
fine_delay U_dly01_fine (.sel(delay_ctrl[01]), .sig_in1(pre_fine), .sig_in2(fdly02), .sig_out(fdly01));
fine_delay U_dly00_fine (.sel(delay_ctrl[00]), .sig_in1(pre_fine), .sig_in2(fdly01), .sig_out(o_post_delay));
endmodule
module coarse_delay
(
input sel,
input sig_in1,//original sig in, not be delayed
input sig_in2,//delayed sig in
output sig_out
);
`ifdef DLY_SIM
SIM_MUX2CKLBRMX2 U_cell_dt4 (.A(1'b0), .B(sig_in2), .S(1'b1), .O(dly4));
SIM_MUX2CKLBRMX2 U_cell_dt3 (.A(1'b0), .B(dly4), .S(1'b1), .O(dly3));
SIM_MUX2CKLBRMX2 U_cell_dt2 (.A(1'b0), .B(dly3), .S(1'b1), .O(dly2));
SIM_MUX2CKLBRMX2 U_cell_dt1 (.A(1'b0), .B(dly2), .S(1'b1), .O(dly1));
SIM_MUX2CKLBRMX8 U_cell_dt0 (.A(sig_in1), .B(dly1), .S(sel), .O(sig_out));
`else
MUX2CKLBRMX2 U_cell_dt4 (.A(1'b0), .B(sig_in2), .S(1'b1), .O(dly4));
MUX2CKLBRMX2 U_cell_dt3 (.A(1'b0), .B(dly4), .S(1'b1), .O(dly3));
MUX2CKLBRMX2 U_cell_dt2 (.A(1'b0), .B(dly3), .S(1'b1), .O(dly2));
MUX2CKLBRMX2 U_cell_dt1 (.A(1'b0), .B(dly2), .S(1'b1), .O(dly1));
MUX2CKLBRMX8 U_cell_dt0 (.A(sig_in1), .B(dly1), .S(sel), .O(sig_out));
`endif
endmodule
module fine_delay
(
input sel,
input sig_in1,//original sig in, not be delayed
input sig_in2,//delayed sig in
output sig_out
);
`ifdef DLY_SIM
SIM_MUX2CKLBRMX8 U_cell_dt0 (.A(sig_in1), .B(sig_in2), .S(sel), .O(sig_out));
`else
MUX2CKLBRMX8 U_cell_dt0 (.A(sig_in1), .B(sig_in2), .S(sel), .O(sig_out));
`endif
endmodule
//only for simulation
`ifdef DLY_SIM
module SIM_MUX2CKLBRMX2 (A,B,S,O);
input A;
input B;
input S;
output O;
assign #0.12 O=S?B:A; //delay 0.12ns
endmodule
module SIM_MUX2CKLBRMX8 (A,B,S,O);
input A;
input B;
input S;
output O;
assign #0.11 O=S?B:A; //delay 0.11ns
endmodule
module SIM_INVCKLBRMX10 (I,O);
input I;
output O;
assign #0.06 O=~I; //delay 0.06ns
endmodule
`endif
看完代码,请思考以下问题:
- 1.为什么每个corse档里是5个MUX?
- 2.为什么fine档的MUX和coarse的最后一个MUX的驱动比其它MUX的驱动大?
- 3.为什么反相器的驱动比较大?
4.check脚本说明
要去check自己撸出来的delayline是否正确,无非就是想办法确认其每一档的delay是否符合预期,最大量程是多少。下面给出一个tcl脚本,可用于dc和pt中。
set corse_bit_num 14
set fine_bit_num 4
proc get_fminval {filename} {
set file_handle [open "$filename"]
set line_num 0
while {[gets $file_handle cur_value] >= 0} {
if {$line_num==0} {
set min_value $cur_value
} elseif {$min_value>$cur_value} {
set min_value $cur_value
}
incr line_num
}
close $file_handle
return $min_value
}
proc get_fmaxval {filename} {
set file_handle [open "$filename"]
set line_num 0
while {[gets $file_handle cur_value] >= 0} {
if {$line_num==0} {
set max_value $cur_value
} elseif {$max_value<$cur_value} {
set max_value $cur_value
}
incr line_num
}
close $file_handle
return $max_value
}
proc get_delayline_info {corse_bit_num fine_bit_num {rc_corner CMAX} {v_corner 0.99} {t_corner -40}} {
set_case_analysis 0 delay_ctrl*
report_timing -from i_pre_delay -to o_post_delay > ./rpt/${rc_corner}_${v_corner}_${t_corner}_tmp.rpt
echo "#delay_ctrl=[expr $corse_bit_num + $fine_bit_num]'h[string repeat 0 [expr $corse_bit_num + $fine_bit_num]](default)" > ./rpt/${rc_corner}_${v_corner}_${t_corner}_delay_value.rpt
exec awk "/data arrival time/{print \$4;exit}" ./rpt/${rc_corner}_${v_corner}_${t_corner}_tmp.rpt >> ./rpt/${rc_corner}_${v_corner}_${t_corner}_delay_value.rpt;#default value
redirect -variable a {exec awk "/data arrival time/{print \$4;exit}" ./rpt/${rc_corner}_${v_corner}_${t_corner}_tmp.rpt}
for {set i 0} {$i < $fine_bit_num} {incr i} {
echo "#delay_ctrl=[expr $corse_bit_num + $fine_bit_num]'h[string repeat 0 [expr $corse_bit_num +$fine_bit_num - $i -1]][string repeat 1 [expr $i+1]]" >> ./rpt/${rc_corner}_${v_corner}_${t_corner}_delay_value.rpt
set_case_analysis 1 delay_ctrl[$i]
report_timing -from i_pre_delay -to o_post_delay > ./rpt/${rc_corner}_${v_corner}_${t_corner}_tmp.rpt
exec awk "/data arrival time/{print \$4;exit}" ./rpt/${rc_corner}_${v_corner}_${t_corner}_tmp.rpt >> ./rpt/${rc_corner}_${v_corner}_${t_corner}_delay_value.rpt
redirect -variable b {exec awk "/data arrival time/{print \$4;exit}" ./rpt/${rc_corner}_${v_corner}_${t_corner}_tmp.rpt}
if {$i==0} {
echo [expr $b - $a] > ./rpt/${rc_corner}_${v_corner}_${t_corner}_delay_step.rpt
} else {
echo [expr $b - $a] >> ./rpt/${rc_corner}_${v_corner}_${t_corner}_delay_step.rpt
}
set a $b
}
set_case_analysis 0 delay_ctrl*
for {set i 0} {$i < $corse_bit_num} {incr i} {
for {set j 0} {$j < $fine_bit_num} {incr j} {
set_case_analysis 0 delay_ctrl[$j]
}
echo "#delay_ctrl=[expr $corse_bit_num + $fine_bit_num]'h[string repeat 0 [expr $corse_bit_num - $i-1]][string repeat 1 [expr $i+1]][string repeat 0 $fine_bit_num]" >> ./rpt/${rc_corner}_${v_corner}_${t_corner}_delay_value.rpt
set_case_analysis 1 delay_ctrl[[expr $i+$fine_bit_num]]
report_timing -from i_pre_delay -to o_post_delay > ./rpt/${rc_corner}_${v_corner}_${t_corner}_tmp.rpt
exec awk "/data arrival time/{print \$4;exit}" ./rpt/${rc_corner}_${v_corner}_${t_corner}_tmp.rpt >> ./rpt/${rc_corner}_${v_corner}_${t_corner}_delay_value.rpt;#default value
redirect -variable b {exec awk "/data arrival time/{print \$4;exit}" ./rpt/${rc_corner}_${v_corner}_${t_corner}_tmp.rpt}
echo [expr $b - $a] >> ./rpt/${rc_corner}_${v_corner}_${t_corner}_delay_step.rpt
set a $b
for {set j 0} {$j < $fine_bit_num} {incr j} {
echo "#delay_ctrl=[expr $corse_bit_num + $fine_bit_num]'h[string repeat 0 [expr $corse_bit_num - $i -1]][string repeat 1 [expr $i + 1]][string repeat 0 [expr $fine_bit_num - $j -1]][string repeat 1 [expr $j+1]]" >> ./rpt/${rc_corner}_${v_corner}_${t_corner}_delay_value.rpt
set_case_analysis 1 delay_ctrl[$j]
report_timing -from i_pre_delay -to o_post_delay > ./rpt/${rc_corner}_${v_corner}_${t_corner}_tmp.rpt
exec awk "/data arrival time/{print \$4;exit}" ./rpt/${rc_corner}_${v_corner}_${t_corner}_tmp.rpt >> ./rpt/${rc_corner}_${v_corner}_${t_corner}_delay_value.rpt
redirect -variable b {exec awk "/data arrival time/{print \$4;exit}" ./rpt/${rc_corner}_${v_corner}_${t_corner}_tmp.rpt}
echo [expr $b - $a] >> ./rpt/${rc_corner}_${v_corner}_${t_corner}_delay_step.rpt
set a $b
}
}
redirect -variable min_step {get_fminval ./rpt/${rc_corner}_${v_corner}_${t_corner}_delay_step.rpt}
redirect -variable max_step {get_fmaxval ./rpt/${rc_corner}_${v_corner}_${t_corner}_delay_step.rpt}
echo "max_step=$max_step" >> ./rpt/${rc_corner}_${v_corner}_${t_corner}_delay_step.rpt
echo "min_step=$min_step" >> ./rpt/${rc_corner}_${v_corner}_${t_corner}_delay_step.rpt
redirect -variable measuring_range {get_fmaxval ./rpt/${rc_corner}_${v_corner}_${t_corner}_delay_value.rpt}
echo "measuring range=${measuring_range}" >> ./rpt/${rc_corner}_${v_corner}_${t_corner}_delay_value.rpt
file delete ./rpt/${rc_corner}_${v_corner}_${t_corner}_tmp.rpt
}
get_delayline_info $corse_bit_num $fine_bit_num $rc_corner $v_corner $t_corner
脚本可以输出两个报告,如下图。左边列出了每两个档位之间的step,右边列出了每档的delay以及最大量程。从报告里可以发现,上篇文章中提到的结构有一个缺陷,当最高的一个corse档打开时,量程的线性关系(这里的线性关系指的是相邻两个只相差在0.02ns左右)被打破,这主要是因为最后最高的corse前面没有加补偿MUX引起的。
5.仿真
通过PT报出路径的delay,并根据实际值修改下面的红框位置的值即可。
6.通过hardcore保证P&R的一致性
我们知道,同一个RTL在每次综合和P&R之后,结果都有可能不一样。那么现在RTL写完了,要如何保证delayline的值在每次P&R之后是固定的?又如何减小每一档之间的误差呢?
我们可以考虑它delayline做成hardcore,通过调整P&R,来保证每个cell之间的delay基本一致(这种小电路,基本工具自动做就可以了,无需手动调整)。至于什么是hardcore,怎么做hardcore,这里就不罗嗦了, 有疑问的去请教后端工程师吧。
END
作者:七点班车
原文链接:IC小迷弟
推荐阅读
更多IC设计技术干货请关注IC设计技术专栏。