SlideShare a Scribd company logo
C/C++ Linux System Programming Session 17 User-space System Programming  – session 7
Outline Device File I/O ops Networking Concepts Socket Concepts and Ops Sockets for IPC
DEVICES Major and minor numbers int mknod(const char *pathname, mode_t mode, dev_t dev); UDEV FS Events and rules
I/O Multiplexing int select(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, struct timeval *timeout); void FD_CLR(int fd, fd_set *set); int  FD_ISSET(int fd, fd_set *set); void FD_SET(int fd, fd_set *set); void FD_ZERO(fd_set *set); int pselect(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, const struct timespec *timeout, const sigset_t *sigmask); int poll(struct pollfd *fds, nfds_t nfds, int timeout); int ppoll(struct pollfd *fds, nfds_vt nfds, const struct timespec *timeout, const sigset_t *sigmask); POLLIN/POLLOUT/POLLPRI/POLLERR
Epoll  Decouple interest set registration from poll +: O(1) on the wait +: Edge trigger - : system call for adding onto the set int epoll_create(int size); //desc, need close int epoll_ctl(int epfd, int op, int fd, struct epoll_event *event); int epoll_wait(int epfd, struct epoll_event *events, int maxevents, int timeout); typedef union epoll_data { void  *ptr; int  fd; uint32_t u32; uint64_t u64; } epoll_data_t; struct epoll_event { uint32_t  events;  /* Epoll events */ epoll_data_t data;  /* User data variable */ };
IOCTL Device / special file control int ioctl(int d, int request, ...); Request is specific to device being controlled, and may have a payload (ioctl_list)
Filesystem events int inotify_init(void); // desc, need close int inotify_add_watch(int fd, const char *pathname, uint32_t mask); // watch desc int inotify_rm_watch(int fd, uint32_t wd); FIONREAD ioctl fcntl: F_NOTIFY struct inotify_event { int wd;  /* watch descriptor */ uint32_t mask;  /* mask of events */ uint32_t cookie; /* unique cookie */ uint32_t len;  /* size of 'name' field */ char name[];  /* null-terminated name */ };
int inotifyd_main(int argc UNUSED_PARAM, char **argv) { unsigned mask = IN_ALL_EVENTS; // assume we want all events struct pollfd pfd; char **watched = ++argv; // watched name list const char *args[] = { *argv, NULL, NULL, NULL, NULL }; // open inotify pfd.fd = inotify_init(); if (pfd.fd < 0) bb_perror_msg_and_die(&quot;no kernel support&quot;); // setup watched while (*++argv) { char *path = *argv; char *masks = strchr(path, ':'); int wd; // watch descriptor // if mask is specified -> if (masks) { *masks = '\0'; // split path and mask // convert mask names to mask bitset mask = 0; while (*++masks) { int i = strchr(mask_names, *masks) - mask_names;   if (i >= 0) { mask |= (1 << i); } } } // add watch wd = inotify_add_watch(pfd.fd, path, mask); if (wd < 0) { bb_perror_msg_and_die(&quot;add watch (%s) failed&quot;, path); } } static const char mask_names[] ALIGN1 = &quot;a&quot; // 0x00000001 File was accessed &quot;c&quot; // 0x00000002 File was modified &quot;e&quot; // 0x00000004 Metadata changed &quot;w&quot; // 0x00000008 Writtable file was closed &quot;0&quot; // 0x00000010 Unwrittable file closed &quot;r&quot; // 0x00000020 File was opened &quot;m&quot; // 0x00000040 File was moved from X &quot;y&quot; // 0x00000080 File was moved to Y &quot;n&quot; // 0x00000100 Subfile was created &quot;d&quot; // 0x00000200 Subfile was deleted &quot;D&quot; // 0x00000400 Self was deleted &quot;M&quot; // 0x00000800 Self was moved ; pfd.events = POLLIN; while (!signalled && poll(&pfd, 1, -1) > 0) { ssize_t len; void *buf; struct inotify_event *ie; // read out all pending events xioctl(pfd.fd, FIONREAD, &len); #define eventbuf bb_common_bufsiz1 ie = buf = (len <= sizeof(eventbuf)) ? eventbuf : xmalloc(len); len = full_read(pfd.fd, buf, len); // process events. N.B. events may vary in length while (len > 0) { int i; char events[12]; char *s = events; unsigned m = ie->mask; for (i = 0; i < 12; ++i, m >>= 1) { if (m & 1) { *s++ = mask_names[i]; } } *s = '\0'; args[1] = events; args[2] = watched[ie->wd]; args[3] = ie->len ? ie->name : NULL; xspawn((char **)args); // next event i = sizeof(struct inotify_event) + ie->len; len -= i; ie = (void*)((char*)ie + i); } if (eventbuf != buf) free(buf); } return EXIT_SUCCESS; }
Asynchronous I/O Only on O_DIRECT struct aiocb { int aio_filedes;  /* file descriptor * int aio_lio_opcode;  /* operation to perform */ int aio_reqprio;  /* request priority offset * volatile void *aio_buf;  /* pointer to buffer */ size_t aio_nbytes;  /* length of operation */ struct sigevent aio_sigevent; /* signal number and value */ /* internal, private members follow... */ }; int aio_read (struct aiocb *aiocbp); int aio_write (struct aiocb *aiocbp); int aio_error (const struct aiocb *aiocbp); int aio_return (struct aiocb *aiocbp); int aio_cancel (int fd, struct aiocb *aiocbp); int aio_fsync (int op, struct aiocb *aiocbp); int aio_suspend (const struct aiocb * const cblist[], int n, const struct timespec *timeout);
Network Architecture Application – telnet/ftp/http...etc Presentation --  intended for e.g. encryption Session --  e.g. iSCSI  Transport – PORTS Network – IP, ATM  Link --  Physical – Ethernet, wifi... OSI  Packets and Data Encapsulation Protocols can be stacked on top of that e.g. CIM over HTTP ------------------------------------------------------------- | Eth | IP | TCP | App | DDDDAAAATTTTAAAA | -------------------------------------------------------------
Focus Link is handled by HW and drivers Network: IP, handled by kernel, affects addressing and byte ordering Transport layer TCP – Reliable, sequenced,  Connection-oriented UDP – Unreliable, unsequenced, connectionless Handled by kernel which provides us an interface Application is what you are writing
Network Layer Concerns Byte ordering  Network byte order  vs Host byte order  Addressing IPV4: 4 octets xx.xx.xx.xx (32 bits) IPV6: 8 16-bit hex digits separated by : (128 bits) Ipv4 compatibility  Scopes Subnets Unicasting/Broadcasting (v4) /Multicasting (v6) /Anycasting (v6) Ports Loopback
Network Byte Order uint32_t htonl(uint32_t hostlong); uint16_t htons(uint16_t hostshort); uint32_t ntohl(uint32_t netlong); uint16_t ntohs(uint16_t netshort); What about everything else? Agreement: the higher level protocol Abstraction layers for cross-platform calls (e.g. RPC, RMI): (un)marshalling
IP Address Casting struct sockaddr { sa_family_t sa_family; char  sa_data[14]; } struct sockaddr_in { sa_family_t  sin_family; /* AF_INET */ uint16_t  sin_port;  /* port */ struct in_addr sin_addr;  }; struct in_addr { uint32_t  s_addr;  }; struct sockaddr_in6 { uint16_t  sin6_family;  /* AF_INET6 */ uint16_t  sin6_port;  /* port  */ uint32_t  sin6_flowinfo;  struct in6_addr sin6_addr;  uint32_t  sin6_scope_id;  }; struct in6_addr { unsigned char  s6_addr[16];  }; IPV4 IPV6
Name Service what hosts (sometimes, what service) DNS/BIND, NIS/YP, LDAP DNS: domain name (fully qualified) The Resolver named /etc/hosts Order: /etc/host.conf
Name / Address Info address ==> name Name ==> address(es) String ==> Address Address ==> String My host Info int getnameinfo(const struct sockaddr *sa, socklen_t salen, char *host, size_t hostlen, char *serv, size_t servlen, int flags); int getaddrinfo(const char *node, const char *service, const struct addrinfo *hints, struct addrinfo **res); void freeaddrinfo(struct addrinfo *res); const char *gai_strerror(int errcode); struct addrinfo { int  ai_flags; int  ai_family; int  ai_socktype; int  ai_protocol; size_t  ai_addrlen; struct sockaddr *ai_addr; char  *ai_canonname; struct addrinfo *ai_next; }; int inet_pton(int af, const char *src, void *dst); const char *inet_ntop(int af, const void *src, char *dst, socklen_t cnt); NI_NOFQDN NI_NUMERICHOST NI_NAMEREQD NI_NUMERICSERV NI_DGRAM int gethostname(char *name, size_t len);
Legacy Name/Address Info struct hostent *gethostbyname(const char *name); struct hostent *gethostbyaddr(const void *addr, socklen_t len, int type); void herror(const char *s); const char *hstrerror(int err); Require a deep copy GNU extensions: re-entrancy (_r), POSIX extension: gethostent(void) IPV4 only: inet_ntoa/aton and family struct hostent { char  *h_name;  char **h_aliases;  int  h_addrtype;  int  h_length;  char **h_addr_list; }
Sockets  Model Virtual hookup (like the phone) A special “descriptor” (hooks VFS to transport layer) Creation int socket(int domain, int type, int protocol); Domains: PF_{INET, INET6, UNIX, NETLINK ....} Types: SOCK_{STREAM, DGRAM, RAW, ...} Protocols and getprotoent() Address / Socket binding int bind(int sockfd, const struct sockaddr *addr, socklen_t addrlen); INADDR_ANY, INADDR6_ANY
Reliable Sockets Connect to server address int connect(int sockfd, const struct sockaddr *serv_addr, socklen_t addrlen); Listening to incoming connections int listen(int sockfd, int backlog); Accepting a new connection int accept(int sockfd, struct sockaddr *addr, socklen_t *addrlen); Gets a new “child” socket descriptor Stevens et al
Socket States  Stevens et al
Socket Options int getsockopt(int s, int level, int optname, void *optval, socklen_t *optlen); int setsockopt(int s, int level, int optname, const void *optval, socklen_t optlen); Some important options: SO_KEEPALIVE SO_RCVBUF / SO_SNDBUF SO_LINGER SO_REUSEADDR
Unreliable Communication ssize_t sendto(int s, const void *buf, size_t len, int flags, const struct sockaddr *to, socklen_t tolen); ssize_t recvfrom(int s, void *buf, size_t len, int flags, struct sockaddr *from, socklen_t *fromlen); To add reliability: Connection (You can still connect, no handshake) Sequence Replies + timeouts + retransmission
I/O  Like File I/O: read/write/readv/writev/poll/select/ fcntl-SIGIO... ssize_t send(int s, const void *buf, size_t len, int flags); ssize_t recv(int s, void *buf, size_t len, int flags); Flags only matter on connections MSG_{CONFIRM, DONTROUTE, DONTWAIT, EOR, MORE, NOSIGNAL, OOB, WAITALL, PEEK}
Message-Based Transfers ssize_t recvmsg(int s, struct msghdr *msg, int flags); ssize_t sendmsg(int s, const struct msghdr *msg, int flags); Raw sockets Ancillary data struct msghdr { void  *msg_name;  socklen_t  msg_namelen;  struct iovec *msg_iov;  size_t  msg_iovlen;  void  *msg_control;  socklen_t  msg_controllen;  int  msg_flags;  }; struct cmsghdr { socklen_t cmsg_len;  int  cmsg_level;  int  cmsg_type;  /* unsigned char cmsg_data[]; */ }; struct cmsghdr *CMSG_FIRSTHDR(struct msghdr *msgh); struct cmsghdr *CMSG_NXTHDR(struct msghdr *msgh, struct cmsghdr *cmsg); size_t CMSG_ALIGN(size_t length); size_t CMSG_SPACE(size_t length); size_t CMSG_LEN(size_t length); unsigned char *CMSG_DATA(struct cmsghdr *cmsg);
Design Decisions UDP, TCP, Raw On connection server Iterative vs Concurrent Thread vs Process Pre vs Post
Some examples TCP sshd  Raw ping UDP snmp
UNIX Domain Sockets IPC Ancillary data: SOL_SOCKET level SCM_RIGHTS int socketpair(int d, int type, int protocol, int sv[2]); udevmonitor example Ioctls: FIONREAD, TIOCOUTQ struct sockaddr_un { sa_family_t  sun_family;  char  sun_path[UNIX_PATH_MAX];  };

More Related Content

PPT
PDF
iCloud keychain
PDF
[2007 CodeEngn Conference 01] seaofglass - Linux Virus Analysis
PPTX
Cisco IOS shellcode: All-in-one
PDF
various tricks for remote linux exploits  by Seok-Ha Lee (wh1ant)
PDF
Php engine
PDF
PHP Internals and Virtual Machine
iCloud keychain
[2007 CodeEngn Conference 01] seaofglass - Linux Virus Analysis
Cisco IOS shellcode: All-in-one
various tricks for remote linux exploits  by Seok-Ha Lee (wh1ant)
Php engine
PHP Internals and Virtual Machine

What's hot (20)

DOCX
ODP
Отладка в GDB
PDF
Confraria SECURITY & IT - Lisbon Set 29, 2011
PDF
Phpをいじり倒す10の方法
PDF
Specializing the Data Path - Hooking into the Linux Network Stack
PPT
Cs423 raw sockets_bw
PDF
Linux Shellcode disassembling
PDF
App secforum2014 andrivet-cplusplus11-metaprogramming_applied_to_software_obf...
PDF
Php and threads ZTS
PDF
[DSC] Introduction to Binary Exploitation
PDF
A Stealthy Stealers - Spyware Toolkit and What They Do
PPTX
07 - Bypassing ASLR, or why X^W matters
PPTX
04 - I love my OS, he protects me (sometimes, in specific circumstances)
ODP
Design and implementation_of_shellcodes
PPTX
Embedded JavaScript
PDF
Devirtualizing FinSpy
PDF
Dsd lab Practical File
PDF
Easily mockingdependenciesinc++ 2
KEY
Post Exploitation Bliss: Loading Meterpreter on a Factory iPhone, Black Hat U...
Отладка в GDB
Confraria SECURITY & IT - Lisbon Set 29, 2011
Phpをいじり倒す10の方法
Specializing the Data Path - Hooking into the Linux Network Stack
Cs423 raw sockets_bw
Linux Shellcode disassembling
App secforum2014 andrivet-cplusplus11-metaprogramming_applied_to_software_obf...
Php and threads ZTS
[DSC] Introduction to Binary Exploitation
A Stealthy Stealers - Spyware Toolkit and What They Do
07 - Bypassing ASLR, or why X^W matters
04 - I love my OS, he protects me (sometimes, in specific circumstances)
Design and implementation_of_shellcodes
Embedded JavaScript
Devirtualizing FinSpy
Dsd lab Practical File
Easily mockingdependenciesinc++ 2
Post Exploitation Bliss: Loading Meterpreter on a Factory iPhone, Black Hat U...
Ad

Similar to Sysprog17 (20)

ODP
Sysprog 16
PPT
Basic socket programming
PPTX
Network sockets
ODP
Introduction to Kernel Programming
PDF
Geep networking stack-linuxkernel
PPT
03-socketprogramming for college students.ppt
PPT
03-socketprogrsamming forcoleeger students.ppt
PPTX
Socket programming in c
ODP
Sysprog 13
PPT
Introduction to sockets tcp ip protocol.ppt
PDF
netLec5.pdf
PPT
INTRODUCTION TO SOCKETS IN COMPUTER NETWORKS DEPT OF CSE.ppt
ODP
Sysprog 12
ODP
Sysprog 12
PDF
lab04.pdf
PDF
PPT
Sockets intro
PPT
sockets_intro.ppt
PDF
PPT
Epoll - from the kernel side
Sysprog 16
Basic socket programming
Network sockets
Introduction to Kernel Programming
Geep networking stack-linuxkernel
03-socketprogramming for college students.ppt
03-socketprogrsamming forcoleeger students.ppt
Socket programming in c
Sysprog 13
Introduction to sockets tcp ip protocol.ppt
netLec5.pdf
INTRODUCTION TO SOCKETS IN COMPUTER NETWORKS DEPT OF CSE.ppt
Sysprog 12
Sysprog 12
lab04.pdf
Sockets intro
sockets_intro.ppt
Epoll - from the kernel side
Ad

More from Ahmed Mekkawy (20)

PDF
Encrypted Traffic in Egypt - an attempt to understand
ODP
Securing Governmental Public Services with Free/Open Source Tools - Egyptian ...
PDF
OpenData for governments
ODP
Infrastructure as a Code
PDF
شركة سبيرولا للأنظمة والجمعية المصرية للمصادر المفتوحة
PDF
Everything is a Game
ODP
Why Cloud Computing has to go the FOSS way
ODP
FOSS Enterpreneurship
ODP
Intro to FOSS & using it in development
PDF
FOSS, history and philosophy
ODP
Virtualization Techniques & Cloud Compting
ODP
A look at computer security
ODP
Networking in Gnu/Linux
ODP
Foss Movement In Egypt
ODP
Sysprog 15
ODP
Sysprog 9
ODP
Sysprog 14
ODP
Sysprog 11
ODP
Sysprog 7
ODP
Sysprog 8
Encrypted Traffic in Egypt - an attempt to understand
Securing Governmental Public Services with Free/Open Source Tools - Egyptian ...
OpenData for governments
Infrastructure as a Code
شركة سبيرولا للأنظمة والجمعية المصرية للمصادر المفتوحة
Everything is a Game
Why Cloud Computing has to go the FOSS way
FOSS Enterpreneurship
Intro to FOSS & using it in development
FOSS, history and philosophy
Virtualization Techniques & Cloud Compting
A look at computer security
Networking in Gnu/Linux
Foss Movement In Egypt
Sysprog 15
Sysprog 9
Sysprog 14
Sysprog 11
Sysprog 7
Sysprog 8

Recently uploaded (20)

PDF
Empathic Computing: Creating Shared Understanding
PPT
“AI and Expert System Decision Support & Business Intelligence Systems”
PDF
How UI/UX Design Impacts User Retention in Mobile Apps.pdf
PPTX
Cloud computing and distributed systems.
DOCX
The AUB Centre for AI in Media Proposal.docx
PDF
Per capita expenditure prediction using model stacking based on satellite ima...
PDF
Build a system with the filesystem maintained by OSTree @ COSCUP 2025
PDF
7 ChatGPT Prompts to Help You Define Your Ideal Customer Profile.pdf
PDF
Unlocking AI with Model Context Protocol (MCP)
PDF
Review of recent advances in non-invasive hemoglobin estimation
PDF
Reach Out and Touch Someone: Haptics and Empathic Computing
PDF
Peak of Data & AI Encore- AI for Metadata and Smarter Workflows
PDF
Network Security Unit 5.pdf for BCA BBA.
PDF
Diabetes mellitus diagnosis method based random forest with bat algorithm
PPTX
KOM of Painting work and Equipment Insulation REV00 update 25-dec.pptx
PDF
Optimiser vos workloads AI/ML sur Amazon EC2 et AWS Graviton
PPTX
ACSFv1EN-58255 AWS Academy Cloud Security Foundations.pptx
PDF
Spectral efficient network and resource selection model in 5G networks
PPTX
Programs and apps: productivity, graphics, security and other tools
PDF
MIND Revenue Release Quarter 2 2025 Press Release
Empathic Computing: Creating Shared Understanding
“AI and Expert System Decision Support & Business Intelligence Systems”
How UI/UX Design Impacts User Retention in Mobile Apps.pdf
Cloud computing and distributed systems.
The AUB Centre for AI in Media Proposal.docx
Per capita expenditure prediction using model stacking based on satellite ima...
Build a system with the filesystem maintained by OSTree @ COSCUP 2025
7 ChatGPT Prompts to Help You Define Your Ideal Customer Profile.pdf
Unlocking AI with Model Context Protocol (MCP)
Review of recent advances in non-invasive hemoglobin estimation
Reach Out and Touch Someone: Haptics and Empathic Computing
Peak of Data & AI Encore- AI for Metadata and Smarter Workflows
Network Security Unit 5.pdf for BCA BBA.
Diabetes mellitus diagnosis method based random forest with bat algorithm
KOM of Painting work and Equipment Insulation REV00 update 25-dec.pptx
Optimiser vos workloads AI/ML sur Amazon EC2 et AWS Graviton
ACSFv1EN-58255 AWS Academy Cloud Security Foundations.pptx
Spectral efficient network and resource selection model in 5G networks
Programs and apps: productivity, graphics, security and other tools
MIND Revenue Release Quarter 2 2025 Press Release

Sysprog17

  • 1. C/C++ Linux System Programming Session 17 User-space System Programming – session 7
  • 2. Outline Device File I/O ops Networking Concepts Socket Concepts and Ops Sockets for IPC
  • 3. DEVICES Major and minor numbers int mknod(const char *pathname, mode_t mode, dev_t dev); UDEV FS Events and rules
  • 4. I/O Multiplexing int select(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, struct timeval *timeout); void FD_CLR(int fd, fd_set *set); int FD_ISSET(int fd, fd_set *set); void FD_SET(int fd, fd_set *set); void FD_ZERO(fd_set *set); int pselect(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, const struct timespec *timeout, const sigset_t *sigmask); int poll(struct pollfd *fds, nfds_t nfds, int timeout); int ppoll(struct pollfd *fds, nfds_vt nfds, const struct timespec *timeout, const sigset_t *sigmask); POLLIN/POLLOUT/POLLPRI/POLLERR
  • 5. Epoll Decouple interest set registration from poll +: O(1) on the wait +: Edge trigger - : system call for adding onto the set int epoll_create(int size); //desc, need close int epoll_ctl(int epfd, int op, int fd, struct epoll_event *event); int epoll_wait(int epfd, struct epoll_event *events, int maxevents, int timeout); typedef union epoll_data { void *ptr; int fd; uint32_t u32; uint64_t u64; } epoll_data_t; struct epoll_event { uint32_t events; /* Epoll events */ epoll_data_t data; /* User data variable */ };
  • 6. IOCTL Device / special file control int ioctl(int d, int request, ...); Request is specific to device being controlled, and may have a payload (ioctl_list)
  • 7. Filesystem events int inotify_init(void); // desc, need close int inotify_add_watch(int fd, const char *pathname, uint32_t mask); // watch desc int inotify_rm_watch(int fd, uint32_t wd); FIONREAD ioctl fcntl: F_NOTIFY struct inotify_event { int wd; /* watch descriptor */ uint32_t mask; /* mask of events */ uint32_t cookie; /* unique cookie */ uint32_t len; /* size of 'name' field */ char name[]; /* null-terminated name */ };
  • 8. int inotifyd_main(int argc UNUSED_PARAM, char **argv) { unsigned mask = IN_ALL_EVENTS; // assume we want all events struct pollfd pfd; char **watched = ++argv; // watched name list const char *args[] = { *argv, NULL, NULL, NULL, NULL }; // open inotify pfd.fd = inotify_init(); if (pfd.fd < 0) bb_perror_msg_and_die(&quot;no kernel support&quot;); // setup watched while (*++argv) { char *path = *argv; char *masks = strchr(path, ':'); int wd; // watch descriptor // if mask is specified -> if (masks) { *masks = '\0'; // split path and mask // convert mask names to mask bitset mask = 0; while (*++masks) { int i = strchr(mask_names, *masks) - mask_names; if (i >= 0) { mask |= (1 << i); } } } // add watch wd = inotify_add_watch(pfd.fd, path, mask); if (wd < 0) { bb_perror_msg_and_die(&quot;add watch (%s) failed&quot;, path); } } static const char mask_names[] ALIGN1 = &quot;a&quot; // 0x00000001 File was accessed &quot;c&quot; // 0x00000002 File was modified &quot;e&quot; // 0x00000004 Metadata changed &quot;w&quot; // 0x00000008 Writtable file was closed &quot;0&quot; // 0x00000010 Unwrittable file closed &quot;r&quot; // 0x00000020 File was opened &quot;m&quot; // 0x00000040 File was moved from X &quot;y&quot; // 0x00000080 File was moved to Y &quot;n&quot; // 0x00000100 Subfile was created &quot;d&quot; // 0x00000200 Subfile was deleted &quot;D&quot; // 0x00000400 Self was deleted &quot;M&quot; // 0x00000800 Self was moved ; pfd.events = POLLIN; while (!signalled && poll(&pfd, 1, -1) > 0) { ssize_t len; void *buf; struct inotify_event *ie; // read out all pending events xioctl(pfd.fd, FIONREAD, &len); #define eventbuf bb_common_bufsiz1 ie = buf = (len <= sizeof(eventbuf)) ? eventbuf : xmalloc(len); len = full_read(pfd.fd, buf, len); // process events. N.B. events may vary in length while (len > 0) { int i; char events[12]; char *s = events; unsigned m = ie->mask; for (i = 0; i < 12; ++i, m >>= 1) { if (m & 1) { *s++ = mask_names[i]; } } *s = '\0'; args[1] = events; args[2] = watched[ie->wd]; args[3] = ie->len ? ie->name : NULL; xspawn((char **)args); // next event i = sizeof(struct inotify_event) + ie->len; len -= i; ie = (void*)((char*)ie + i); } if (eventbuf != buf) free(buf); } return EXIT_SUCCESS; }
  • 9. Asynchronous I/O Only on O_DIRECT struct aiocb { int aio_filedes; /* file descriptor * int aio_lio_opcode; /* operation to perform */ int aio_reqprio; /* request priority offset * volatile void *aio_buf; /* pointer to buffer */ size_t aio_nbytes; /* length of operation */ struct sigevent aio_sigevent; /* signal number and value */ /* internal, private members follow... */ }; int aio_read (struct aiocb *aiocbp); int aio_write (struct aiocb *aiocbp); int aio_error (const struct aiocb *aiocbp); int aio_return (struct aiocb *aiocbp); int aio_cancel (int fd, struct aiocb *aiocbp); int aio_fsync (int op, struct aiocb *aiocbp); int aio_suspend (const struct aiocb * const cblist[], int n, const struct timespec *timeout);
  • 10. Network Architecture Application – telnet/ftp/http...etc Presentation -- intended for e.g. encryption Session -- e.g. iSCSI Transport – PORTS Network – IP, ATM Link -- Physical – Ethernet, wifi... OSI Packets and Data Encapsulation Protocols can be stacked on top of that e.g. CIM over HTTP ------------------------------------------------------------- | Eth | IP | TCP | App | DDDDAAAATTTTAAAA | -------------------------------------------------------------
  • 11. Focus Link is handled by HW and drivers Network: IP, handled by kernel, affects addressing and byte ordering Transport layer TCP – Reliable, sequenced, Connection-oriented UDP – Unreliable, unsequenced, connectionless Handled by kernel which provides us an interface Application is what you are writing
  • 12. Network Layer Concerns Byte ordering Network byte order vs Host byte order Addressing IPV4: 4 octets xx.xx.xx.xx (32 bits) IPV6: 8 16-bit hex digits separated by : (128 bits) Ipv4 compatibility Scopes Subnets Unicasting/Broadcasting (v4) /Multicasting (v6) /Anycasting (v6) Ports Loopback
  • 13. Network Byte Order uint32_t htonl(uint32_t hostlong); uint16_t htons(uint16_t hostshort); uint32_t ntohl(uint32_t netlong); uint16_t ntohs(uint16_t netshort); What about everything else? Agreement: the higher level protocol Abstraction layers for cross-platform calls (e.g. RPC, RMI): (un)marshalling
  • 14. IP Address Casting struct sockaddr { sa_family_t sa_family; char sa_data[14]; } struct sockaddr_in { sa_family_t sin_family; /* AF_INET */ uint16_t sin_port; /* port */ struct in_addr sin_addr; }; struct in_addr { uint32_t s_addr; }; struct sockaddr_in6 { uint16_t sin6_family; /* AF_INET6 */ uint16_t sin6_port; /* port */ uint32_t sin6_flowinfo; struct in6_addr sin6_addr; uint32_t sin6_scope_id; }; struct in6_addr { unsigned char s6_addr[16]; }; IPV4 IPV6
  • 15. Name Service what hosts (sometimes, what service) DNS/BIND, NIS/YP, LDAP DNS: domain name (fully qualified) The Resolver named /etc/hosts Order: /etc/host.conf
  • 16. Name / Address Info address ==> name Name ==> address(es) String ==> Address Address ==> String My host Info int getnameinfo(const struct sockaddr *sa, socklen_t salen, char *host, size_t hostlen, char *serv, size_t servlen, int flags); int getaddrinfo(const char *node, const char *service, const struct addrinfo *hints, struct addrinfo **res); void freeaddrinfo(struct addrinfo *res); const char *gai_strerror(int errcode); struct addrinfo { int ai_flags; int ai_family; int ai_socktype; int ai_protocol; size_t ai_addrlen; struct sockaddr *ai_addr; char *ai_canonname; struct addrinfo *ai_next; }; int inet_pton(int af, const char *src, void *dst); const char *inet_ntop(int af, const void *src, char *dst, socklen_t cnt); NI_NOFQDN NI_NUMERICHOST NI_NAMEREQD NI_NUMERICSERV NI_DGRAM int gethostname(char *name, size_t len);
  • 17. Legacy Name/Address Info struct hostent *gethostbyname(const char *name); struct hostent *gethostbyaddr(const void *addr, socklen_t len, int type); void herror(const char *s); const char *hstrerror(int err); Require a deep copy GNU extensions: re-entrancy (_r), POSIX extension: gethostent(void) IPV4 only: inet_ntoa/aton and family struct hostent { char *h_name; char **h_aliases; int h_addrtype; int h_length; char **h_addr_list; }
  • 18. Sockets Model Virtual hookup (like the phone) A special “descriptor” (hooks VFS to transport layer) Creation int socket(int domain, int type, int protocol); Domains: PF_{INET, INET6, UNIX, NETLINK ....} Types: SOCK_{STREAM, DGRAM, RAW, ...} Protocols and getprotoent() Address / Socket binding int bind(int sockfd, const struct sockaddr *addr, socklen_t addrlen); INADDR_ANY, INADDR6_ANY
  • 19. Reliable Sockets Connect to server address int connect(int sockfd, const struct sockaddr *serv_addr, socklen_t addrlen); Listening to incoming connections int listen(int sockfd, int backlog); Accepting a new connection int accept(int sockfd, struct sockaddr *addr, socklen_t *addrlen); Gets a new “child” socket descriptor Stevens et al
  • 20. Socket States Stevens et al
  • 21. Socket Options int getsockopt(int s, int level, int optname, void *optval, socklen_t *optlen); int setsockopt(int s, int level, int optname, const void *optval, socklen_t optlen); Some important options: SO_KEEPALIVE SO_RCVBUF / SO_SNDBUF SO_LINGER SO_REUSEADDR
  • 22. Unreliable Communication ssize_t sendto(int s, const void *buf, size_t len, int flags, const struct sockaddr *to, socklen_t tolen); ssize_t recvfrom(int s, void *buf, size_t len, int flags, struct sockaddr *from, socklen_t *fromlen); To add reliability: Connection (You can still connect, no handshake) Sequence Replies + timeouts + retransmission
  • 23. I/O Like File I/O: read/write/readv/writev/poll/select/ fcntl-SIGIO... ssize_t send(int s, const void *buf, size_t len, int flags); ssize_t recv(int s, void *buf, size_t len, int flags); Flags only matter on connections MSG_{CONFIRM, DONTROUTE, DONTWAIT, EOR, MORE, NOSIGNAL, OOB, WAITALL, PEEK}
  • 24. Message-Based Transfers ssize_t recvmsg(int s, struct msghdr *msg, int flags); ssize_t sendmsg(int s, const struct msghdr *msg, int flags); Raw sockets Ancillary data struct msghdr { void *msg_name; socklen_t msg_namelen; struct iovec *msg_iov; size_t msg_iovlen; void *msg_control; socklen_t msg_controllen; int msg_flags; }; struct cmsghdr { socklen_t cmsg_len; int cmsg_level; int cmsg_type; /* unsigned char cmsg_data[]; */ }; struct cmsghdr *CMSG_FIRSTHDR(struct msghdr *msgh); struct cmsghdr *CMSG_NXTHDR(struct msghdr *msgh, struct cmsghdr *cmsg); size_t CMSG_ALIGN(size_t length); size_t CMSG_SPACE(size_t length); size_t CMSG_LEN(size_t length); unsigned char *CMSG_DATA(struct cmsghdr *cmsg);
  • 25. Design Decisions UDP, TCP, Raw On connection server Iterative vs Concurrent Thread vs Process Pre vs Post
  • 26. Some examples TCP sshd Raw ping UDP snmp
  • 27. UNIX Domain Sockets IPC Ancillary data: SOL_SOCKET level SCM_RIGHTS int socketpair(int d, int type, int protocol, int sv[2]); udevmonitor example Ioctls: FIONREAD, TIOCOUTQ struct sockaddr_un { sa_family_t sun_family; char sun_path[UNIX_PATH_MAX]; };