
最近在排查一个网络问题,ifconfig eth0 up
后,网卡link up比较慢。因此,分析了下从ifconfig up
到网络驱动的调用流程。这里顺便作个记录。
ifconfig eth0 up
调用的是busybox 的命令,因此从busybox 源码入手,逐步分析下调用流程。代码介绍文件位于:networking/ifenslave.c
ifconfig eth0 up
和ifconfig eth0 down
分别对应busybox 的set_if_up()
和set_if_down()
.
staticintset_if_down(char*ifname,intflags)
{
intres=set_if_flags(ifname,flags~IFF_UP);
if(res)
bb_perror_msg("%s:can'tdown",ifname);
returnres;
}
staticintset_if_up(char*ifname,intflags)
{
intres=set_if_flags(ifname,flags|IFF_UP);
if(res)
bb_perror_msg("%s:can'tup",ifname);
returnres;
}
比如,当我们敲ifconfig eth0 down
时,实则就是调用:
set_if_down("eth0",master_flags.ifr_flags);
set_if_flags()
会将网卡名,up / down
标志位flags
通过ioctl命令SIOCSIFFLAGS
传递给内核网卡驱动。
staticintset_if_flags(char*ifname,intflags)
{
structifreqifr;
ifr.ifr_flags=flags;
returnset_ifrname_and_do_ioctl(SIOCSIFFLAGS,ifr,ifname);
}
接着深入到内核代码中,看下SIOCSIFFLAGS
命令在哪里实现。代码位于kernel\net\core\dev_ioctl.c
。
staticintdev_ifsioc(structnet*net,structifreq*ifr,unsignedintcmd)
{
interr;
structnet_device*dev=__dev_get_by_name(net,ifr->ifr_name);
conststructnet_device_ops*ops;
if(!dev)
return-ENODEV;
ops=dev->netdev_ops;
switch(cmd){
caseSIOCSIFFLAGS:/*Setinterfaceflags*/
returndev_change_flags(dev,ifr->ifr_flags);
caseSIOCSIFMETRIC:/*Setthemetricontheinterface
(currentlyunused)*/
return-EOPNOTSUPP;
...................
}
returnerr;
}
dev_ifsioc()
会调用__dev_get_by_name()
根据 网卡名遍历 net链表,如果匹配到则返回net_device
结构体指针。接着,SIOCSIFFLAGS会调用到dev_change_flags()
,最后调用到__dev_change_flags()
。
intdev_change_flags(structnet_device*dev,unsignedintflags)
{
intret;
unsignedintchanges,old_flags=dev->flags,old_gflags=dev->gflags;
ret=__dev_change_flags(dev,flags);
if(ret<0)
returnret;
changes=(old_flags^dev->flags)|(old_gflags^dev->gflags);
__dev_notify_flags(dev,old_flags,changes);
returnret;
}
int__dev_change_flags(structnet_device*dev,unsignedintflags)
{
unsignedintold_flags=dev->flags;
intret;
ASSERT_RTNL();
/*
*Settheflagsonourdevice.
*/
dev->flags=(flags(IFF_DEBUG|IFF_NOTRAILERS|IFF_NOARP|
IFF_DYNAMIC|IFF_MULTICAST|IFF_PORTSEL|
IFF_AUTOMEDIA))|
(dev->flags(IFF_UP|IFF_VOLATILE|IFF_PROMISC|
IFF_ALLMULTI));
/*
*Loadinthecorrectmulticastlistnowtheflagshavechanged.
*/
if((old_flags^flags)IFF_MULTICAST)
dev_change_rx_flags(dev,IFF_MULTICAST);
dev_set_rx_mode(dev);
/*
*Havewedownedtheinterface.WehandleIFF_UPourselves
*accordingtouserattemptstosetit,ratherthanblindly
*settingit.
*/
ret=0;
/*两个标识有一个是IFF_UP*/
if((old_flags^flags)IFF_UP)
ret=((old_flagsIFF_UP)?__dev_close:__dev_open)(dev);//通过flags判断调用__dev_close还是__dev_open
if((flags^dev->gflags)IFF_PROMISC){
intinc=(flagsIFF_PROMISC)?1:-1;
unsignedintold_flags=dev->flags;
dev->gflags^=IFF_PROMISC;
if(__dev_set_promiscuity(dev,inc,false)>=0)
if(dev->flags!=old_flags)
dev_set_rx_mode(dev);
}
/*NOTE:orderofsynchronizationofIFF_PROMISCandIFF_ALLMULTI
isimportant.Some(broken)driverssetIFF_PROMISC,when
IFF_ALLMULTIisrequestednotaskingusandnotreporting.
*/
if((flags^dev->gflags)IFF_ALLMULTI){
intinc=(flagsIFF_ALLMULTI)?1:-1;
dev->gflags^=IFF_ALLMULTI;
__dev_set_allmulti(dev,inc,false);
}
returnret;
}
在__dev_change_flags(dev, flags)
函数中,通过判断flag的IFF_UP
位上的值是否相反,来实现是调用__dev_close()
还是__dev_open()
来开关eth0。
__dev_close
中会将当前的net_device
加入到等待设备关闭列表中。
staticint__dev_close(structnet_device*dev)
{
intretval;
LIST_HEAD(single);
list_add(dev->close_list,single);
retval=__dev_close_many(single);
list_del(single);
returnretval;
}
__dev_close_many
通知设备正在关闭,等待未发送完的数据发送完,最后清除开启标记。
staticint__dev_close_many(structlist_head*head)
{
structnet_device*dev;
ASSERT_RTNL();
might_sleep();
list_for_each_entry(dev,head,close_list){
/*Temporarilydisablenetpolluntiltheinterfaceisdown*/
/*禁用netpoll*/
netpoll_poll_disable(dev);
/*通知设备正在关闭*/
call_netdevice_notifiers(NETDEV_GOING_DOWN,dev);
/*清除start标志位*/
clear_bit(__LINK_STATE_START,dev->state);
/*Synchronizetoscheduledpoll.Wecannottouchpolllist,it
*canbeevenondifferentcpu.Sojustclearnetif_running().
*
*dev->stop()willinvokenapi_disable()onallofit's
*napi_structinstancesonthisdevice.
*/
smp_mb__after_atomic();/*Commitnetif_running().*/
}
/*未发送完的数据发送完*/
dev_deactivate_many(head);
list_for_each_entry(dev,head,close_list){
conststructnet_device_ops*ops=dev->netdev_ops;
/*
*Callthedevicespecificclose.Thiscannotfail.
*OnlyifdeviceisUP
*
*WeallowittobecalledevenafteraDETACHhot-plug
*event.
*/
/*调用设备关闭操作*/
if(ops->ndo_stop)
ops->ndo_stop(dev);
/*标记设备关闭*/
dev->flags=~IFF_UP;
/*启用netpoll*/
netpoll_poll_enable(dev);
}
return0;
}
ndo_stop
为关闭网卡时,不同网卡驱动注册的不同的关闭函数,我们以海思的网卡驱动为例,分析下ndo_stop函数的实现。代码位于kernel\drivers\net\ethernet\hisilicon\hns\hns_enet.c
。
staticinthns_nic_net_stop(structnet_device*ndev)
{
hns_nic_net_down(ndev);
return0;
}
staticvoidhns_nic_net_down(structnet_device*ndev)
{
inti;
structhnae_ae_ops*ops;
structhns_nic_priv*priv=netdev_priv(ndev);
if(test_and_set_bit(NIC_STATE_DOWN,priv->state))
return;
(void)del_timer_sync(priv->service_timer);
netif_tx_stop_all_queues(ndev);
netif_carrier_off(ndev);
netif_tx_disable(ndev);
priv->link=0;
if(priv->phy)
phy_stop(priv->phy);
ops=priv->ae_handle->dev->ops;
if(ops->stop)
ops->stop(priv->ae_handle);
netif_tx_stop_all_queues(ndev);
for(i=priv->ae_handle->q_num-1;i>=0;i--){
hns_nic_ring_close(ndev,i);
hns_nic_ring_close(ndev,i+priv->ae_handle->q_num);
/*cleantxbuffers*/
hns_nic_tx_clr_all_bufs(priv->ring_data+i);
}
}
hns_nic_net_down()
中会调用netif_carrier_off()
通知内核子系统网络断开。下面我们详细分析下netif_carrier_off()的实现。
voidnetif_carrier_off(structnet_device*dev)
{
/*设置网卡为载波断开状态即nocarrier状态,上行时软中断下半部读到该状态不会进行网卡收包*/
if(!test_and_set_bit(__LINK_STATE_NOCARRIER,dev->state)){
if(dev->reg_state==NETREG_UNINITIALIZED)
return;
/*增加设备改变状态*/
atomic_inc(dev->carrier_changes);
/*加入事件处理队列进行处理*/
linkwatch_fire_event(dev);
}
}
linkwatch_fire_event()函数将设备加入到事件队列,并且进行事件调度,调度中会根据是否为紧急事件做不同处理。
voidlinkwatch_fire_event(structnet_device*dev)
{
/*判断是否是紧急处理的事件*/
boolurgent=linkwatch_urgent_event(dev);
/*判断是否是紧急处理的事件*/
if(!test_and_set_bit(__LINK_STATE_LINKWATCH_PENDING,dev->state)){
/*添加事件到事件列表*/
linkwatch_add_event(dev);
}elseif(!urgent)
/*设备以前已经设置了pending标记,不是紧急事件,直接返回*/
return;
/*事件调度*/
linkwatch_schedule_work(urgent);
}
linkwatch_urgent_event()
判断是否是否需要紧急处理。
staticboollinkwatch_urgent_event(structnet_device*dev)
{
/*设备未运行,非紧急*/
if(!netif_running(dev))
returnfalse;
/*设备的索引号与连接索引号不等,紧急*/
if(dev->ifindex!=dev_get_iflink(dev))
returntrue;
/*设备作为teamport,紧急*/
if(dev->priv_flagsIFF_TEAM_PORT)
returntrue;
/*连接与否发送队列排队规则改变与否*/
returnnetif_carrier_ok(dev)qdisc_tx_changing(dev);
}
linkwatch_add_event()
将设备加入到事件处理链表。
staticvoidlinkwatch_add_event(structnet_device*dev)
{
unsignedlongflags;
spin_lock_irqsave(lweventlist_lock,flags);
/*若未添加,则添加设备到事件列表*/
if(list_empty(dev->link_watch_list)){
list_add_tail(dev->link_watch_list,lweventlist);
dev_hold(dev);
}
spin_unlock_irqrestore(lweventlist_lock,flags);
}
linkwatch_schedule_work()
对事件处理进行调度,紧急事件立即执行,非紧急事件延后执行。
staticvoidlinkwatch_schedule_work(inturgent)
{
unsignedlongdelay=linkwatch_nextevent-jiffies;
/*已经设置了紧急标记,则返回*/
if(test_bit(LW_URGENT,linkwatch_flags))
return;
/*需要紧急调度*/
if(urgent){
/*之前设置了,则返回*/
if(test_and_set_bit(LW_URGENT,linkwatch_flags))
return;
/*未设置紧急,则立即执行*/
delay=0;
}
/*如果大于1s则立即执行*/
if(delay>HZ)
delay=0;
/*如果设置了紧急标记,则立即执行*/
if(test_bit(LW_URGENT,linkwatch_flags))
mod_delayed_work(system_wq,linkwatch_work,0);
else
/*未设置紧急标记,则按照delay执行*/
schedule_delayed_work(linkwatch_work,delay);
}
__linkwatch_run_queue()
完成对事件调度队列中设备的处理。
staticvoid__linkwatch_run_queue(inturgent_only)
{
structnet_device*dev;
LIST_HEAD(wrk);
/*
*Limitthenumberoflinkwatcheventstoone
*persecondsothatarunawaydriverdoesnot
*causeastormofmessagesonthenetlink
*socket.Thislimitdoesnotapplytoupevents
*whilethedeviceqdiscisdown.
*/
/*已达到调度时间*/
if(!urgent_only)
linkwatch_nextevent=jiffies+HZ;
/*Limitwrap-aroundeffectondelay.*/
/*
未到达调度时间,并且下一次调度在当前时间的1s以后
那么设置调度时间是当前时间
*/
elseif(time_after(linkwatch_nextevent,jiffies+HZ))
linkwatch_nextevent=jiffies;
/*清除紧急标识*/
clear_bit(LW_URGENT,linkwatch_flags);
spin_lock_irq(lweventlist_lock);
list_splice_init(lweventlist,wrk);
/*遍历链表*/
while(!list_empty(wrk)){
/*获取设备*/
dev=list_first_entry(wrk,structnet_device,link_watch_list);
/*从链表移除设备*/
list_del_init(dev->link_watch_list);
/*未到达调度时间不需要紧急处理*/
if(urgent_only!linkwatch_urgent_event(dev)){
/*添加到链表尾部*/
list_add_tail(dev->link_watch_list,lweventlist);
/*继续处理*/
continue;
}
spin_unlock_irq(lweventlist_lock);
/*处理设备*/
linkwatch_do_dev(dev);
spin_lock_irq(lweventlist_lock);
}
/*链表有未处理事件,则以非紧急状态调度队列*/
if(!list_empty(lweventlist))
linkwatch_schedule_work(0);
spin_unlock_irq(lweventlist_lock);
}
linkwatch_do_dev()
完成对某个设备的状态改变处理。
staticvoidlinkwatch_do_dev(structnet_device*dev)
{
/*
*Makesuretheabovereadiscompletesinceitcanbe
*rewrittenassoonasweclearthebitbelow.
*/
smp_mb__before_atomic();
/*Weareabouttohandlethisdevice,
*soneweventscanbeaccepted
*/
/*清除pending标记*/
clear_bit(__LINK_STATE_LINKWATCH_PENDING,dev->state);
rfc2863_policy(dev);
/*如果设备启动状态*/
if(dev->flagsIFF_UP){
/*链路连接*/
if(netif_carrier_ok(dev))
/*启用排队规则*/
dev_activate(dev);
else
/*关闭排队规则*/
dev_deactivate(dev);
/*设备状态改变处理,执行netdev_chain上设备状态变更回调*/
netdev_state_change(dev);
}
dev_put(dev);
}
最后,hns_nic_net_down()
中会调用phy_stop()
将网卡link down。
voidphy_stop(structphy_device*phydev)
{
mutex_lock(phydev->lock);
if(PHY_HALTED==phydev->state)
gotoout_unlock;
if(phy_interrupt_is_valid(phydev)){
/*DisablePHYInterrupts*/
phy_config_interrupt(phydev,PHY_INTERRUPT_DISABLED);
/*Clearanypendinginterrupts*/
phy_clear_interrupt(phydev);
}
phydev->state=PHY_HALTED;
out_unlock:
mutex_unlock(phydev->lock);
/*Cannotcallflush_scheduled_work()hereasdesiredbecause
*ofrtnl_lock(),butPHY_HALTEDshallguaranteephy_change()
*willnotreenableinterrupts.
*/
}
phy_stop()
将phydev->state设置为PHY_HALTED,将网卡关闭。
__dev_open
为设备启用核心函数,该函数打开eth0,设置启用标记,并且设置接收模式,排队规则等。
staticint__dev_open(structnet_device*dev)
{
conststructnet_device_ops*ops=dev->netdev_ops;
intret;
ASSERT_RTNL();
/*设备不可用*/
if(!netif_device_present(dev))
return-ENODEV;
/*Blocknetpollfromtryingtodoanyrxpathservicing.
*Ifwedon'tdothisthereisachancendo_poll_controller
*orndo_pollmayberunningwhileweopenthedevice
*/
/*禁用netpoll*/
netpoll_poll_disable(dev);
/*设备打开前通知*/
ret=call_netdevice_notifiers(NETDEV_PRE_UP,dev);
ret=notifier_to_errno(ret);
if(ret)
returnret;
/*设置设备打开标记,设备将设置IFF_UP标志位*/
set_bit(__LINK_STATE_START,dev->state);
/*校验地址*/
if(ops->ndo_validate_addr)
ret=ops->ndo_validate_addr(dev);
/*执行打开*/
if(!retops->ndo_open)
ret=ops->ndo_open(dev);
/*启用netpoll*/
netpoll_poll_enable(dev);
/*失败,清除打开标记*/
if(ret)
clear_bit(__LINK_STATE_START,dev->state);
/*设备打开操作*/
else{
/*设置打开标记*/
dev->flags|=IFF_UP;
/*设置接收模式*/
dev_set_rx_mode(dev);
/*初始化排队规则*/
dev_activate(dev);
/*加入设备数据到熵池*/
add_device_randomness(dev->dev_addr,dev->addr_len);
}
returnret;
}
我们以海思的网卡驱动为例,分析下ndo_open()
函数的实现。代码位于kernel\drivers\net\ethernet\hisilicon\hns\hns_enet.c
。
staticinthns_nic_net_open(structnet_device*ndev)
{
structhns_nic_priv*priv=netdev_priv(ndev);
structhnae_handle*h=priv->ae_handle;
intret;
if(test_bit(NIC_STATE_TESTING,priv->state))
return-EBUSY;
priv->link=0;
netif_carrier_off(ndev);
/*设置txqueue的个数*/
ret=netif_set_real_num_tx_queues(ndev,h->q_num);
if(ret<0){
netdev_err(ndev,"netif_set_real_num_tx_queuesfail,ret=%d!\n",
ret);
returnret;
}
/*设置rxqueue的个数*/
ret=netif_set_real_num_rx_queues(ndev,h->q_num);
if(ret<0){
netdev_err(ndev,
"netif_set_real_num_rx_queuesfail,ret=%d!\n",ret);
returnret;
}
/*启动网卡*/
ret=hns_nic_net_up(ndev);
if(ret){
netdev_err(ndev,
"hnsnetupfail,ret=%d!\n",ret);
returnret;
}
return0;
}
staticinthns_nic_net_up(structnet_device*ndev)
{
structhns_nic_priv*priv=netdev_priv(ndev);
structhnae_handle*h=priv->ae_handle;
inti,j,k;
intret;
/*初始化中断,并设置中断函数为hns_irq_handle,每个rx和txqueue都对应一个中断*/
ret=hns_nic_init_irq(priv);
if(ret!=0){
netdev_err(ndev,"hnsinitirqfailed!ret=%d\n",ret);
returnret;
}
for(i=0;i<h->q_num*2;i++){
/*使能中断,使能napi*/
ret=hns_nic_ring_open(ndev,i);
if(ret)
gotoout_has_some_queues;
}
for(k=0;k<h->q_num;k++)
h->dev->ops->toggle_queue_status(h->qs[k],1);
/*设置mac地址*/
ret=h->dev->ops->set_mac_addr(h,ndev->dev_addr);
if(ret)
gotoout_set_mac_addr_err;
/*hns的start函数为null*/
ret=h->dev->ops->start?h->dev->ops->start(h):0;
if(ret)
gotoout_start_err;
if(priv->phy)
/*启动phy*/
phy_start(priv->phy);
clear_bit(NIC_STATE_DOWN,priv->state);
/*修改time每一秒到期一次*/
(void)mod_timer(priv->service_timer,jiffies+SERVICE_TIMER_HZ);
return0;
out_start_err:
netif_stop_queue(ndev);
out_set_mac_addr_err:
for(k=0;k<h->q_num;k++)
h->dev->ops->toggle_queue_status(h->qs[k],0);
out_has_some_queues:
for(j=i-1;j>=0;j--)
hns_nic_ring_close(ndev,j);
set_bit(NIC_STATE_DOWN,priv->state);
returnret;
}
最后会调用到phy_start()
启动网卡。
voidphy_start(structphy_device*phydev)
{
booldo_resume=false;
interr=0;
mutex_lock(phydev->lock);
switch(phydev->state){
casePHY_STARTING:
phydev->state=PHY_PENDING;
break;
casePHY_READY:
phydev->state=PHY_UP;
break;
casePHY_HALTED:
/*makesureinterruptsarere-enabledforthePHY*/
err=phy_enable_interrupts(phydev);
if(err<0)
break;
phydev->state=PHY_RESUMING;
do_resume=true;
break;
default:
break;
}
mutex_unlock(phydev->lock);
/*ifphywassuspended,bringthephysicallinkupagain*/
if(do_resume)
phy_resume(phydev);
}
https://blog.csdn.net/qq_29044159/article/details/118030335
https://www.likecs.com/show-308571259.html
https://blog.csdn.net/Longyu_wlz/article/details/108026902
http://bbs.chinaunix.net/thread-2020457-1-1.html
https://blog.csdn.net/tiantao2012/article/details/75283527
https://blog.csdn.net/sinat_20184565/article/details/104353185
暂无评论哦,快来评论一下吧!
