【网络驱动】ifconfig up 后内核网络驱动做了什么?
来源:嵌入式技术笔记 发布时间:2023-05-30 分享至微信

最近在排查一个网络问题,ifconfig eth0 up后,网卡link up比较慢。因此,分析了下从ifconfig up到网络驱动的调用流程。这里顺便作个记录。

ifconfig eth0 up调用的是busybox 的命令,因此从busybox 源码入手,逐步分析下调用流程。代码介绍文件位于:networking/ifenslave.c

ifconfig eth0 upifconfig eth0 down分别对应busybox 的set_if_up()set_if_down().

staticintset_if_down(char*ifname,intflags)
{
intres=set_if_flags(ifname,flags~IFF_UP);
if(res)
bb_perror_msg("%s:can'tdown",ifname);
returnres;
}
staticintset_if_up(char*ifname,intflags)
{
intres=set_if_flags(ifname,flags|IFF_UP);
if(res)
bb_perror_msg("%s:can'tup",ifname);
returnres;
}

比如,当我们敲ifconfig eth0 down时,实则就是调用:

set_if_down("eth0",master_flags.ifr_flags);

set_if_flags()会将网卡名,up / down标志位flags通过ioctl命令SIOCSIFFLAGS传递给内核网卡驱动。

staticintset_if_flags(char*ifname,intflags)
{
structifreqifr;

ifr.ifr_flags=flags;
returnset_ifrname_and_do_ioctl(SIOCSIFFLAGS,ifr,ifname);
}

接着深入到内核代码中,看下SIOCSIFFLAGS命令在哪里实现。代码位于kernel\net\core\dev_ioctl.c

staticintdev_ifsioc(structnet*net,structifreq*ifr,unsignedintcmd)
{
interr;
structnet_device*dev=__dev_get_by_name(net,ifr->ifr_name);
conststructnet_device_ops*ops;

if(!dev)
return-ENODEV;

ops=dev->netdev_ops;

switch(cmd){
caseSIOCSIFFLAGS:/*Setinterfaceflags*/
returndev_change_flags(dev,ifr->ifr_flags);

caseSIOCSIFMETRIC:/*Setthemetricontheinterface
(currentlyunused)*/
return-EOPNOTSUPP;

...................

}
returnerr;
}

dev_ifsioc()会调用__dev_get_by_name()根据 网卡名遍历 net链表,如果匹配到则返回net_device结构体指针。接着,SIOCSIFFLAGS会调用到dev_change_flags(),最后调用到__dev_change_flags()

intdev_change_flags(structnet_device*dev,unsignedintflags)
{
intret;
unsignedintchanges,old_flags=dev->flags,old_gflags=dev->gflags;

ret=__dev_change_flags(dev,flags);
if(ret<0)
returnret;

changes=(old_flags^dev->flags)|(old_gflags^dev->gflags);
__dev_notify_flags(dev,old_flags,changes);
returnret;
}
int__dev_change_flags(structnet_device*dev,unsignedintflags)
{
unsignedintold_flags=dev->flags;
intret;

ASSERT_RTNL();

/*
*Settheflagsonourdevice.
*/

dev->flags=(flags(IFF_DEBUG|IFF_NOTRAILERS|IFF_NOARP|
IFF_DYNAMIC|IFF_MULTICAST|IFF_PORTSEL|
IFF_AUTOMEDIA))|
(dev->flags(IFF_UP|IFF_VOLATILE|IFF_PROMISC|
IFF_ALLMULTI));

/*
*Loadinthecorrectmulticastlistnowtheflagshavechanged.
*/

if((old_flags^flags)IFF_MULTICAST)
dev_change_rx_flags(dev,IFF_MULTICAST);

dev_set_rx_mode(dev);

/*
*Havewedownedtheinterface.WehandleIFF_UPourselves
*accordingtouserattemptstosetit,ratherthanblindly
*settingit.
*/

ret=0;
/*两个标识有一个是IFF_UP*/
if((old_flags^flags)IFF_UP)
ret=((old_flagsIFF_UP)?__dev_close:__dev_open)(dev);//通过flags判断调用__dev_close还是__dev_open

if((flags^dev->gflags)IFF_PROMISC){
intinc=(flagsIFF_PROMISC)?1:-1;
unsignedintold_flags=dev->flags;

dev->gflags^=IFF_PROMISC;

if(__dev_set_promiscuity(dev,inc,false)>=0)
if(dev->flags!=old_flags)
dev_set_rx_mode(dev);
}

/*NOTE:orderofsynchronizationofIFF_PROMISCandIFF_ALLMULTI
isimportant.Some(broken)driverssetIFF_PROMISC,when
IFF_ALLMULTIisrequestednotaskingusandnotreporting.
*/
if((flags^dev->gflags)IFF_ALLMULTI){
intinc=(flagsIFF_ALLMULTI)?1:-1;

dev->gflags^=IFF_ALLMULTI;
__dev_set_allmulti(dev,inc,false);
}

returnret;
}

__dev_change_flags(dev, flags)函数中,通过判断flag的IFF_UP位上的值是否相反,来实现是调用__dev_close()还是__dev_open()来开关eth0。

__dev_close中会将当前的net_device加入到等待设备关闭列表中。

staticint__dev_close(structnet_device*dev)
{
intretval;
LIST_HEAD(single);

list_add(dev->close_list,single);
retval=__dev_close_many(single);
list_del(single);

returnretval;
}

__dev_close_many通知设备正在关闭,等待未发送完的数据发送完,最后清除开启标记。

staticint__dev_close_many(structlist_head*head)
{
structnet_device*dev;

ASSERT_RTNL();
might_sleep();

list_for_each_entry(dev,head,close_list){
/*Temporarilydisablenetpolluntiltheinterfaceisdown*/
/*禁用netpoll*/
netpoll_poll_disable(dev);
/*通知设备正在关闭*/
call_netdevice_notifiers(NETDEV_GOING_DOWN,dev);
/*清除start标志位*/
clear_bit(__LINK_STATE_START,dev->state);

/*Synchronizetoscheduledpoll.Wecannottouchpolllist,it
*canbeevenondifferentcpu.Sojustclearnetif_running().
*
*dev->stop()willinvokenapi_disable()onallofit's
*napi_structinstancesonthisdevice.
*/
smp_mb__after_atomic();/*Commitnetif_running().*/
}
/*未发送完的数据发送完*/
dev_deactivate_many(head);

list_for_each_entry(dev,head,close_list){
conststructnet_device_ops*ops=dev->netdev_ops;

/*
*Callthedevicespecificclose.Thiscannotfail.
*OnlyifdeviceisUP
*
*WeallowittobecalledevenafteraDETACHhot-plug
*event.
*/
/*调用设备关闭操作*/
if(ops->ndo_stop)
ops->ndo_stop(dev);
/*标记设备关闭*/
dev->flags=~IFF_UP;
/*启用netpoll*/
netpoll_poll_enable(dev);
}

return0;
}

ndo_stop为关闭网卡时,不同网卡驱动注册的不同的关闭函数,我们以海思的网卡驱动为例,分析下ndo_stop函数的实现。代码位于kernel\drivers\net\ethernet\hisilicon\hns\hns_enet.c

staticinthns_nic_net_stop(structnet_device*ndev)
{
hns_nic_net_down(ndev);

return0;
}

staticvoidhns_nic_net_down(structnet_device*ndev)
{
inti;
structhnae_ae_ops*ops;
structhns_nic_priv*priv=netdev_priv(ndev);

if(test_and_set_bit(NIC_STATE_DOWN,priv->state))
return;

(void)del_timer_sync(priv->service_timer);
netif_tx_stop_all_queues(ndev);
netif_carrier_off(ndev);
netif_tx_disable(ndev);
priv->link=0;

if(priv->phy)
phy_stop(priv->phy);

ops=priv->ae_handle->dev->ops;

if(ops->stop)
ops->stop(priv->ae_handle);

netif_tx_stop_all_queues(ndev);

for(i=priv->ae_handle->q_num-1;i>=0;i--){
hns_nic_ring_close(ndev,i);
hns_nic_ring_close(ndev,i+priv->ae_handle->q_num);

/*cleantxbuffers*/
hns_nic_tx_clr_all_bufs(priv->ring_data+i);
}
}

hns_nic_net_down()中会调用netif_carrier_off()通知内核子系统网络断开。下面我们详细分析下netif_carrier_off()的实现。

voidnetif_carrier_off(structnet_device*dev)
{
/*设置网卡为载波断开状态即nocarrier状态,上行时软中断下半部读到该状态不会进行网卡收包*/
if(!test_and_set_bit(__LINK_STATE_NOCARRIER,dev->state)){
if(dev->reg_state==NETREG_UNINITIALIZED)
return;
/*增加设备改变状态*/
atomic_inc(dev->carrier_changes);
/*加入事件处理队列进行处理*/
linkwatch_fire_event(dev);
}
}

linkwatch_fire_event()函数将设备加入到事件队列,并且进行事件调度,调度中会根据是否为紧急事件做不同处理。

voidlinkwatch_fire_event(structnet_device*dev)
{
/*判断是否是紧急处理的事件*/
boolurgent=linkwatch_urgent_event(dev);
/*判断是否是紧急处理的事件*/
if(!test_and_set_bit(__LINK_STATE_LINKWATCH_PENDING,dev->state)){
/*添加事件到事件列表*/
linkwatch_add_event(dev);
}elseif(!urgent)
/*设备以前已经设置了pending标记,不是紧急事件,直接返回*/
return;
/*事件调度*/
linkwatch_schedule_work(urgent);
}

linkwatch_urgent_event()判断是否是否需要紧急处理。

staticboollinkwatch_urgent_event(structnet_device*dev)
{
/*设备未运行,非紧急*/
if(!netif_running(dev))
returnfalse;
/*设备的索引号与连接索引号不等,紧急*/
if(dev->ifindex!=dev_get_iflink(dev))
returntrue;
/*设备作为teamport,紧急*/
if(dev->priv_flagsIFF_TEAM_PORT)
returntrue;
/*连接与否发送队列排队规则改变与否*/
returnnetif_carrier_ok(dev)qdisc_tx_changing(dev);
}

linkwatch_add_event()将设备加入到事件处理链表。

staticvoidlinkwatch_add_event(structnet_device*dev)
{
unsignedlongflags;

spin_lock_irqsave(lweventlist_lock,flags);
/*若未添加,则添加设备到事件列表*/
if(list_empty(dev->link_watch_list)){
list_add_tail(dev->link_watch_list,lweventlist);
dev_hold(dev);
}
spin_unlock_irqrestore(lweventlist_lock,flags);
}

linkwatch_schedule_work()对事件处理进行调度,紧急事件立即执行,非紧急事件延后执行。

staticvoidlinkwatch_schedule_work(inturgent)
{
unsignedlongdelay=linkwatch_nextevent-jiffies;
/*已经设置了紧急标记,则返回*/
if(test_bit(LW_URGENT,linkwatch_flags))
return;

/*需要紧急调度*/
if(urgent){
/*之前设置了,则返回*/
if(test_and_set_bit(LW_URGENT,linkwatch_flags))
return;
/*未设置紧急,则立即执行*/
delay=0;
}

/*如果大于1s则立即执行*/
if(delay>HZ)
delay=0;

/*如果设置了紧急标记,则立即执行*/
if(test_bit(LW_URGENT,linkwatch_flags))
mod_delayed_work(system_wq,linkwatch_work,0);
else
/*未设置紧急标记,则按照delay执行*/
schedule_delayed_work(linkwatch_work,delay);
}

__linkwatch_run_queue()完成对事件调度队列中设备的处理。

staticvoid__linkwatch_run_queue(inturgent_only)
{
structnet_device*dev;
LIST_HEAD(wrk);

/*
*Limitthenumberoflinkwatcheventstoone
*persecondsothatarunawaydriverdoesnot
*causeastormofmessagesonthenetlink
*socket.Thislimitdoesnotapplytoupevents
*whilethedeviceqdiscisdown.
*/
/*已达到调度时间*/
if(!urgent_only)
linkwatch_nextevent=jiffies+HZ;
/*Limitwrap-aroundeffectondelay.*/
/*
未到达调度时间,并且下一次调度在当前时间的1s以后
那么设置调度时间是当前时间
*/
elseif(time_after(linkwatch_nextevent,jiffies+HZ))
linkwatch_nextevent=jiffies;
/*清除紧急标识*/
clear_bit(LW_URGENT,linkwatch_flags);

spin_lock_irq(lweventlist_lock);
list_splice_init(lweventlist,wrk);
/*遍历链表*/
while(!list_empty(wrk)){
/*获取设备*/
dev=list_first_entry(wrk,structnet_device,link_watch_list);
/*从链表移除设备*/
list_del_init(dev->link_watch_list);
/*未到达调度时间不需要紧急处理*/
if(urgent_only!linkwatch_urgent_event(dev)){
/*添加到链表尾部*/
list_add_tail(dev->link_watch_list,lweventlist);
/*继续处理*/
continue;
}
spin_unlock_irq(lweventlist_lock);
/*处理设备*/
linkwatch_do_dev(dev);
spin_lock_irq(lweventlist_lock);
}
/*链表有未处理事件,则以非紧急状态调度队列*/
if(!list_empty(lweventlist))
linkwatch_schedule_work(0);
spin_unlock_irq(lweventlist_lock);
}

linkwatch_do_dev()完成对某个设备的状态改变处理。

staticvoidlinkwatch_do_dev(structnet_device*dev)
{
/*
*Makesuretheabovereadiscompletesinceitcanbe
*rewrittenassoonasweclearthebitbelow.
*/
smp_mb__before_atomic();

/*Weareabouttohandlethisdevice,
*soneweventscanbeaccepted
*/
/*清除pending标记*/
clear_bit(__LINK_STATE_LINKWATCH_PENDING,dev->state);

rfc2863_policy(dev);
/*如果设备启动状态*/
if(dev->flagsIFF_UP){
/*链路连接*/
if(netif_carrier_ok(dev))
/*启用排队规则*/
dev_activate(dev);
else
/*关闭排队规则*/
dev_deactivate(dev);
/*设备状态改变处理,执行netdev_chain上设备状态变更回调*/
netdev_state_change(dev);
}
dev_put(dev);
}

最后,hns_nic_net_down()中会调用phy_stop()将网卡link down。

voidphy_stop(structphy_device*phydev)
{
mutex_lock(phydev->lock);

if(PHY_HALTED==phydev->state)
gotoout_unlock;

if(phy_interrupt_is_valid(phydev)){
/*DisablePHYInterrupts*/
phy_config_interrupt(phydev,PHY_INTERRUPT_DISABLED);

/*Clearanypendinginterrupts*/
phy_clear_interrupt(phydev);
}

phydev->state=PHY_HALTED;

out_unlock:
mutex_unlock(phydev->lock);

/*Cannotcallflush_scheduled_work()hereasdesiredbecause
*ofrtnl_lock(),butPHY_HALTEDshallguaranteephy_change()
*willnotreenableinterrupts.
*/
}

phy_stop()将phydev->state设置为PHY_HALTED,将网卡关闭。

__dev_open为设备启用核心函数,该函数打开eth0,设置启用标记,并且设置接收模式,排队规则等。

staticint__dev_open(structnet_device*dev)
{
conststructnet_device_ops*ops=dev->netdev_ops;
intret;

ASSERT_RTNL();
/*设备不可用*/
if(!netif_device_present(dev))
return-ENODEV;

/*Blocknetpollfromtryingtodoanyrxpathservicing.
*Ifwedon'tdothisthereisachancendo_poll_controller
*orndo_pollmayberunningwhileweopenthedevice
*/
/*禁用netpoll*/
netpoll_poll_disable(dev);
/*设备打开前通知*/
ret=call_netdevice_notifiers(NETDEV_PRE_UP,dev);
ret=notifier_to_errno(ret);
if(ret)
returnret;
/*设置设备打开标记,设备将设置IFF_UP标志位*/
set_bit(__LINK_STATE_START,dev->state);
/*校验地址*/
if(ops->ndo_validate_addr)
ret=ops->ndo_validate_addr(dev);
/*执行打开*/
if(!retops->ndo_open)
ret=ops->ndo_open(dev);
/*启用netpoll*/
netpoll_poll_enable(dev);
/*失败,清除打开标记*/
if(ret)
clear_bit(__LINK_STATE_START,dev->state);
/*设备打开操作*/
else{
/*设置打开标记*/
dev->flags|=IFF_UP;
/*设置接收模式*/
dev_set_rx_mode(dev);
/*初始化排队规则*/
dev_activate(dev);
/*加入设备数据到熵池*/
add_device_randomness(dev->dev_addr,dev->addr_len);
}

returnret;
}

我们以海思的网卡驱动为例,分析下ndo_open()函数的实现。代码位于kernel\drivers\net\ethernet\hisilicon\hns\hns_enet.c

staticinthns_nic_net_open(structnet_device*ndev)
{
structhns_nic_priv*priv=netdev_priv(ndev);
structhnae_handle*h=priv->ae_handle;
intret;

if(test_bit(NIC_STATE_TESTING,priv->state))
return-EBUSY;

priv->link=0;
netif_carrier_off(ndev);
/*设置txqueue的个数*/
ret=netif_set_real_num_tx_queues(ndev,h->q_num);
if(ret<0){
netdev_err(ndev,"netif_set_real_num_tx_queuesfail,ret=%d!\n",
ret);
returnret;
}
/*设置rxqueue的个数*/
ret=netif_set_real_num_rx_queues(ndev,h->q_num);
if(ret<0){
netdev_err(ndev,
"netif_set_real_num_rx_queuesfail,ret=%d!\n",ret);
returnret;
}
/*启动网卡*/
ret=hns_nic_net_up(ndev);
if(ret){
netdev_err(ndev,
"hnsnetupfail,ret=%d!\n",ret);
returnret;
}

return0;
}

staticinthns_nic_net_up(structnet_device*ndev)
{
structhns_nic_priv*priv=netdev_priv(ndev);
structhnae_handle*h=priv->ae_handle;
inti,j,k;
intret;
/*初始化中断,并设置中断函数为hns_irq_handle,每个rx和txqueue都对应一个中断*/
ret=hns_nic_init_irq(priv);
if(ret!=0){
netdev_err(ndev,"hnsinitirqfailed!ret=%d\n",ret);
returnret;
}

for(i=0;i<h->q_num*2;i++){
/*使能中断,使能napi*/
ret=hns_nic_ring_open(ndev,i);
if(ret)
gotoout_has_some_queues;
}

for(k=0;k<h->q_num;k++)
h->dev->ops->toggle_queue_status(h->qs[k],1);
/*设置mac地址*/
ret=h->dev->ops->set_mac_addr(h,ndev->dev_addr);
if(ret)
gotoout_set_mac_addr_err;
/*hns的start函数为null*/
ret=h->dev->ops->start?h->dev->ops->start(h):0;
if(ret)
gotoout_start_err;

if(priv->phy)
/*启动phy*/
phy_start(priv->phy);

clear_bit(NIC_STATE_DOWN,priv->state);
/*修改time每一秒到期一次*/
(void)mod_timer(priv->service_timer,jiffies+SERVICE_TIMER_HZ);

return0;

out_start_err:
netif_stop_queue(ndev);
out_set_mac_addr_err:
for(k=0;k<h->q_num;k++)
h->dev->ops->toggle_queue_status(h->qs[k],0);
out_has_some_queues:
for(j=i-1;j>=0;j--)
hns_nic_ring_close(ndev,j);

set_bit(NIC_STATE_DOWN,priv->state);

returnret;
}

最后会调用到phy_start()启动网卡。

voidphy_start(structphy_device*phydev)
{
booldo_resume=false;
interr=0;

mutex_lock(phydev->lock);

switch(phydev->state){
casePHY_STARTING:
phydev->state=PHY_PENDING;
break;
casePHY_READY:
phydev->state=PHY_UP;
break;
casePHY_HALTED:
/*makesureinterruptsarere-enabledforthePHY*/
err=phy_enable_interrupts(phydev);
if(err<0)
break;

phydev->state=PHY_RESUMING;
do_resume=true;
break;
default:
break;
}
mutex_unlock(phydev->lock);

/*ifphywassuspended,bringthephysicallinkupagain*/
if(do_resume)
phy_resume(phydev);
}

https://blog.csdn.net/qq_29044159/article/details/118030335

https://www.likecs.com/show-308571259.html

https://blog.csdn.net/Longyu_wlz/article/details/108026902

http://bbs.chinaunix.net/thread-2020457-1-1.html

https://blog.csdn.net/tiantao2012/article/details/75283527

https://blog.csdn.net/sinat_20184565/article/details/104353185


[ 新闻来源:嵌入式技术笔记,更多精彩资讯请下载icspec App。如对本稿件有异议,请联系微信客服specltkj]
存入云盘 收藏
举报
全部评论

暂无评论哦,快来评论一下吧!