[PATCH] wlib scheduler

Chris Caputo ccaputo at alt.net
Thu Jan 13 22:25:42 GMT 2005


Wensong, Julian, et al,

Please consider the below patches against ipvsadm-1.24 and linux kernel
2.6.10 which add a "Weighted Least Incoming Bandwidth" scheduler to LVS.

I have also put these up at:

  http://www.caputo.com/foss/ipvsadm-1.24-wlib.patch
  http://www.caputo.com/foss/lvs_wlib-2.6.10.patch

Comments and suggestions welcome.  I'd like to work towards this being
included in future releases if you think it worthwhile.

Thank you,
Chris

--- patch against ipvsadm-1.24 ---
--- patch against ipvsadm-1.24 ---

diff -upr ipvsadm-1.24/SCHEDULERS ipvsadm-1.24-wlib/SCHEDULERS
--- ipvsadm-1.24/SCHEDULERS     2003-05-10 03:05:26.000000000 +0000
+++ ipvsadm-1.24-wlib/SCHEDULERS        2005-01-13 22:17:23.744596405 
+0000
@@ -1 +1 @@
-rr|wrr|lc|wlc|lblc|lblcr|dh|sh|sed|nq
+rr|wrr|lc|wlc|lblc|lblcr|dh|sh|sed|nq|wlib
diff -upr ipvsadm-1.24/ipvsadm.8 ipvsadm-1.24-wlib/ipvsadm.8
--- ipvsadm-1.24/ipvsadm.8      2003-07-05 05:32:38.000000000 +0000
+++ ipvsadm-1.24-wlib/ipvsadm.8 2005-01-13 22:17:23.745596281 +0000
@@ -255,6 +255,10 @@ fixed service rate (weight) of the ith s
 \fBnq\fR - Never Queue: assigns an incoming job to an idle server if
 there is, instead of waiting for a fast one; if all the servers are
 busy, it adopts the Shortest Expected Delay policy to assign the job.
+.sp
+\fBwlib\fR - Weighted Least Incoming Bandwidth: directs network
+connections to the real server with the least incoming bandwidth
+normalized by the server weight.
 .TP
 .B -p, --persistent [\fItimeout\fP]
 Specify that a virtual service is persistent. If this option is

--- patch against linux kernel 2.6.10 ---
--- patch against linux kernel 2.6.10 ---

diff -upr -X dontdiff linux-2.6.10-stock/net/ipv4/ipvs/Kconfig linux-2.6.10-lvs_wlib/net/ipv4/ipvs/Kconfig
--- linux-2.6.10-stock/net/ipv4/ipvs/Kconfig	2005-01-10 03:57:58.261234635 +0000
+++ linux-2.6.10-lvs_wlib/net/ipv4/ipvs/Kconfig	2005-01-10 03:32:27.445121420 +0000
@@ -224,6 +224,17 @@ config	IP_VS_NQ
 	  If you want to compile it in kernel, say Y. To compile it as a
 	  module, choose M here. If unsure, say N.
 
+config	IP_VS_WLIB
+	tristate "weighted least incoming bandwidth scheduling"
+        depends on IP_VS
+	---help---
+	  The weighted least incoming bandwidth scheduling algorithm directs
+	  network connections to the server with the least incoming bandwidth
+	  normalized by the server weight.
+
+	  If you want to compile it in kernel, say Y. To compile it as a
+	  module, choose M here. If unsure, say N.
+
 comment 'IPVS application helper'
 	depends on IP_VS
 
diff -upr -X dontdiff linux-2.6.10-stock/net/ipv4/ipvs/Makefile linux-2.6.10-lvs_wlib/net/ipv4/ipvs/Makefile
--- linux-2.6.10-stock/net/ipv4/ipvs/Makefile	2005-01-10 03:58:08.623956009 +0000
+++ linux-2.6.10-lvs_wlib/net/ipv4/ipvs/Makefile	2005-01-10 02:54:06.158074854 +0000
@@ -29,6 +29,7 @@ obj-$(CONFIG_IP_VS_DH) += ip_vs_dh.o
 obj-$(CONFIG_IP_VS_SH) += ip_vs_sh.o
 obj-$(CONFIG_IP_VS_SED) += ip_vs_sed.o
 obj-$(CONFIG_IP_VS_NQ) += ip_vs_nq.o
+obj-$(CONFIG_IP_VS_WLIB) += ip_vs_wlib.o
 
 # IPVS application helpers
 obj-$(CONFIG_IP_VS_FTP) += ip_vs_ftp.o
diff -upr -X dontdiff linux-2.6.10-stock/net/ipv4/ipvs/ip_vs_wlib.c linux-2.6.10-lvs_wlib/net/ipv4/ipvs/ip_vs_wlib.c
--- linux-2.6.10-stock/net/ipv4/ipvs/ip_vs_wlib.c	2005-01-10 03:59:54.000000000 +0000
+++ linux-2.6.10-lvs_wlib/net/ipv4/ipvs/ip_vs_wlib.c	2005-01-12 20:23:19.150551541 +0000
@@ -0,0 +1,157 @@
+/*
+ * IPVS:        Weighted Least Incoming Bandwidth Scheduling module
+ *
+ * Version:     ip_vs_wlib.c 1.00 2005/01/12 ccaputo
+ *
+ * Authors:     Chris Caputo <ccaputo at alt.net> based on code by:
+ *
+ *                  Wensong Zhang <wensong at linuxvirtualserver.org>
+ *                  Peter Kese <peter.kese at ijs.si>
+ *                  Julian Anastasov
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ *     Chris Caputo: Based code on ip_vs_wlc.c ip_vs_rr.c.
+ *
+ */
+
+/*
+ * The WLIB algorithm uses the results of the estimator's inbps calculations
+ * to determine which real server has the lowest incoming byterate.  But to
+ * add a weighting element to the calculation an integer divide of inbps by
+ * the server weight is done.
+ * 
+ * An example way to use this is if you have one server that can handle
+ * 100 Mbps of input and another that can handle 1 Gbps you would set the
+ * weights to be 1 and 10 respectively.  (common denominator reduced)
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+
+#include <net/ip_vs.h>
+
+
+static int
+ip_vs_wlib_init_svc(struct ip_vs_service *svc)
+{
+	svc->sched_data = &svc->destinations;
+	return 0;
+}
+
+
+static int
+ip_vs_wlib_done_svc(struct ip_vs_service *svc)
+{
+	return 0;
+}
+
+
+static int
+ip_vs_wlib_update_svc(struct ip_vs_service *svc)
+{
+	svc->sched_data = &svc->destinations;
+	return 0;
+}
+
+
+/*
+ *	Weighted Least Incoming Bandwidth scheduling
+ */
+static struct ip_vs_dest *
+ip_vs_wlib_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
+{
+	struct list_head *p, *q;
+	struct ip_vs_dest *dest, *least = NULL;
+	__u32 dwib, lwib = -1;  /* type is from ip_vs_stats struct */
+	int dweight, lweight = 0;
+
+	IP_VS_DBG(6, "ip_vs_wlib_schedule(): Scheduling...\n");
+
+	/*
+	 * Find the dest with the least weighted incoming bandwidth (wib).
+	 *
+	 * The server with weight=0 is quiesced and will not receive any
+	 * new connections.
+	 *
+	 * When weight is above 0 we calculate the wib of each dest server as
+	 * follows:
+	 *	(dest inbps) / dest->weight    (integer divide)
+	 *
+	 * In case of ties, highest weight is winner.  And if that still makes
+	 * for a tie, round robin is used (which is why we remember our last
+	 * starting location in the linked list).
+	 */
+
+	write_lock(&svc->sched_lock);
+	p = (struct list_head *)svc->sched_data;
+	p = p->next;
+	q = p;
+	do {
+		/* skip list head */
+		if (q == &svc->destinations) {
+			q = q->next;
+			continue;
+		}
+
+		dest = list_entry(q, struct ip_vs_dest, n_list);
+		dweight = atomic_read(&dest->weight);
+		if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) && dweight > 0) {
+			spin_lock(&dest->stats.lock);
+			dwib = dest->stats.inbps / dweight;
+			spin_unlock(&dest->stats.lock);
+
+			if (least == NULL || dwib < lwib ||
+				(dwib == lwib && dweight > lweight)) {
+				least = dest;
+				lwib = dwib;
+				lweight = dweight;
+				svc->sched_data = q;
+			}
+		}
+		q = q->next;
+	} while (q != p);
+	write_unlock(&svc->sched_lock);
+
+	if (least != NULL)
+		IP_VS_DBG(6, "WLIB: server %u.%u.%u.%u:%u "
+			  "activeconns %d refcnt %d weight %d\n",
+			  NIPQUAD(least->addr), ntohs(least->port),
+			  atomic_read(&least->activeconns),
+			  atomic_read(&least->refcnt),
+			  atomic_read(&least->weight));
+
+	return least;
+}
+
+
+static struct ip_vs_scheduler ip_vs_wlib_scheduler =
+{
+	.name =			"wlib",
+	.refcnt =		ATOMIC_INIT(0),
+	.module =		THIS_MODULE,
+	.init_service =		ip_vs_wlib_init_svc,
+	.done_service =		ip_vs_wlib_done_svc,
+	.update_service =	ip_vs_wlib_update_svc,
+	.schedule =		ip_vs_wlib_schedule,
+};
+
+
+static int __init ip_vs_wlib_init(void)
+{
+	INIT_LIST_HEAD(&ip_vs_wlib_scheduler.n_list);
+	return register_ip_vs_scheduler(&ip_vs_wlib_scheduler);
+}
+
+static void __exit ip_vs_wlib_cleanup(void)
+{
+	unregister_ip_vs_scheduler(&ip_vs_wlib_scheduler);
+}
+
+module_init(ip_vs_wlib_init);
+module_exit(ip_vs_wlib_cleanup);
+MODULE_LICENSE("GPL");



More information about the lvs-users mailing list