Mailinglist Archive: opensuse-buildservice (248 mails)

< Previous Next >
[opensuse-buildservice] support for http range requests
  • From: Marcus Hüwe <suse-tux@xxxxxx>
  • Date: Wed, 14 Oct 2009 16:21:11 +0200
  • Message-id: <20091014142111.GA3345@xxxxxxxxxxxxxxxxxxxxxx>
Hi,

what about adding http range request support [1] to the backend? After I
comitted r8169 ("get only the last N bytes of a logfile") last week darix
suggested to use the http "Range" header instead of the "start" and "end" query
parameters. The advantage of using the range header is that it is already part
of the protocol and the range header isn't limited to the "getlogfile()"
function (we can use it whenever a file is replied).
The attached patch adds initial range header support to the backend (it also
supports multiple range requests [2]).

Comments, objections etc. are welcome:)

If there are no objections I'm going to commit it and the next step will be
to implement range support in BSServerEvents::reply_file (or rather in the
corresponding "stream" functions).


Marcus

[1] http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.35.2
[2] http://www.w3.org/Protocols/rfc2616/rfc2616-sec19.html#sec19.2
Index: BSHTTP.pm
===================================================================
--- BSHTTP.pm (Revision 8268)
+++ BSHTTP.pm (Arbeitskopie)
@@ -25,7 +25,14 @@
use Digest::MD5 ();

use strict;
+use Fcntl qw(:DEFAULT);
+use Data::Dumper;

+our $MULTIPART_BOUNDARY = "--THIS_STRING_SEPARATES";
+our $CONTENT_RANGE = "Content-Range: bytes %d-%d/%s";
+our $MULTIPART_SEP = "\r\n$MULTIPART_BOUNDARY\r\n$CONTENT_RANGE\r\n";
+our $MULTIPART_ENDSEP = "\r\n$MULTIPART_BOUNDARY--\r\n";
+
sub gethead {
my ($h, $t) = @_;

@@ -345,6 +352,59 @@
return '';
}

+sub range {
+ my ($r, $file, $contenttype, $stream) = @_;
+ return undef unless $r && $r =~ /^bytes=([\d,-]+)$/;
+ my @r = split(',', $1);
+ my @ranges;
+ for (@r) {
+ return undef unless /^(\d*)-(\d*)$/;
+ my $first = $1 ne '' ? $1 + 0 : undef;
+ my $last = $2 ne '' ? $2 + 0 : undef;
+ return undef if !defined($first) && !defined($last);
+ # check if byte range is syntactically invalid
+ print "syntactically invalid byte range\n" if defined($first) &&
defined($last) && $first > $last;
+ return undef if defined($first) && defined($last) && $first > $last;
+ push @ranges, [$first, $last];
+ }
+ my @s = stat($file);
+ # check if ranges are satisfiable; if the ranges aren't satisfiable return
the compelete file
+ # (rfc suggests to return status 416)
+ my @sat_ranges = grep {defined($_->[0]) && $_->[0] < $s[7] ||
!defined($_->[0]) && defined($_->[1]) && $_->[1] > 0} @ranges;
+ my $len = 0;
+ # fixup ranges
+ for my $range (@sat_ranges) {
+ my ($first, $last) = @{$range};
+ # last MUST be < $s[7]
+ if (defined($first) && defined($last)) {
+ $len += $last - $first;
+ $last = $s[7] - 1 if $last >= $s[7];
+ $range->[1] = $s[7] if $range->[1] > $s[7];
+ } elsif (defined($first)) {
+ $len += $s[7] - $first;
+ $last = $s[7] - 1;
+ $range->[0] = $first;
+ $range->[1] = $s[7];
+ } else {
+ $len += $last <= $s[7] ? $last : $s[7];
+ $range->[0] = $first = $last <= $s[7] ? $s[7] - $last : 0;
+ $range->[1] = $s[7];
+ $last = $s[7] - 1;
+ }
+ if (@sat_ranges > 1) {
+ # will be used in the entity body (to separate the range content)
+ my $multi = sprintf($MULTIPART_SEP, $first, $last, $stream ? "*" :
$s[7]) . $contenttype . "\r\n";
+ $len += length($multi);
+ push @{$range}, $multi;
+ } else {
+ # will be used in the http header
+ push @{$range}, sprintf($CONTENT_RANGE, $first, $last, $stream ? "*" :
$s[7]);
+ }
+ }
+ $len += length($MULTIPART_ENDSEP) if @sat_ranges > 1;
+ return {'length' => $len, 'range' => \@sat_ranges};
+}
+
sub file_sender {
my ($param, $sock) = @_;
local *F;
@@ -355,11 +415,25 @@
} else {
open(F, '<', $param->{'filename'}) || die("$param->{'filename'}: $!\n")
}
+ my @ranges = @{$param->{'range'} || []};
+ my @s = stat(F);
while(1) {
+ my ($first, $last, $multi);
+ ($first, $last, $multi) = splice(@{shift(@ranges)}, 0, 3) if @ranges;
+ sysseek(F, $first, Fcntl::SEEK_SET) || die("sysseek: $!\n") if
defined($first);
my $r = sysread(F, $data, 8192);
- last unless $r;
+ last unless $r || @ranges;
+ if (defined($first) && defined($last)) {
+ if ($last - $first > length($data)) {
+ unshift @ranges, [$first + length($data), $last];
+ } else {
+ $data = substr($data, 0, $last - $first);
+ }
+ }
+ $data = $multi . $data if defined($multi) && @{$param->{'range'} || []} >
1;
$data = sprintf("%X\r\n", length($data)).$data."\r\n" if
$param->{'chunked'};
swrite($sock, $data);
+ last if !@ranges && (defined($first) || defined($last));
}
close F unless ref $param->{'filename'};
return '';
Index: BSWatcher.pm
===================================================================
--- BSWatcher.pm (Revision 8268)
+++ BSWatcher.pm (Arbeitskopie)
@@ -58,6 +58,7 @@
sub reply_file {
my $jev = $BSServerEvents::gev;
return BSServer::reply_file(@_) unless $jev;
+ # TODO: implement range support in BSServerEvents::reply_file
deljob($jev);
return BSServerEvents::reply_file(@_);
}
@@ -669,7 +670,7 @@
$ans =~ /^(.*?)\n\r?\n(.*)$/s;
my $headers = $1;
$ans = $2;
- if ($status !~ /^200[^\d]/) {
+ if ($status !~ /^(200|206)[^\d]/) {
rpc_error($ev, "remote error: $status");
return;
}
Index: bs_repserver
===================================================================
--- bs_repserver (Revision 8268)
+++ bs_repserver (Arbeitskopie)
@@ -88,6 +88,22 @@
return grep(delete($h{$_}), @_);
}

+sub header {
+ my (@headers) = @_;
+ my @res;
+ my $headers;
+ if ($BSStdServer::isajax) {
+ $headers = BSServerEvents::header();
+ } else {
+ $headers = BSServer::header();
+ }
+ for (@headers) {
+ #$res->{$_} = $headers->{$_} if exists $headers->{$_};
+ push @res, "$_: $headers->{$_}" if exists $headers->{$_};
+ }
+ return \@res;
+}
+
# XXX read jobs instead?

sub jobname {
@@ -493,13 +509,14 @@
push @args, "end=$cgi->{'end'}" if defined $cgi->{'end'};
if (!$BSStdServer::isajax) {
my $url = "/build/$projid/$repoid/$arch/$packid/_log";
- BSHandoff::handoff($ajaxsocket, $url, undef, @args);
+ BSHandoff::handoff($ajaxsocket, {'uri' => $url, 'headers' =>
header("range")}, undef, @args);
exit(0);
}
my $param = {
'uri' => "$jobstatus->{'uri'}/logfile",
'joinable' => 1,
'receiver:application/octet-stream' => \&BSServer::reply_receiver,
+ 'headers' => header("range"),
};
eval {
BSWatcher::rpc($param, undef, @args);
Index: BSServer.pm
===================================================================
--- BSServer.pm (Revision 8268)
+++ BSServer.pm (Arbeitskopie)
@@ -314,6 +314,7 @@
}

my $post_hdrs;
+my $hdrs;

sub done {
close CLNT;
@@ -429,6 +430,7 @@
$qu = ''; # and assume that there are no more request data
}
$forwardedfor = $headers{'x-forwarded-for'};
+ $hdrs = \%headers;
my $query_string = '';
if ($path =~ /^(.*?)\?(.*)$/) {
$path = $1;
@@ -479,11 +481,15 @@
return $post_hdrs->{'content-type'};
}

-sub header {
+sub post_header {
die("header: invalid request\n") unless $post_hdrs;
return $post_hdrs->{$_[0]};
}

+sub header {
+ return $hdrs;
+}
+
###########################################################################

sub read_file {
@@ -528,10 +534,24 @@
$chunked = 1 unless grep {/^content-length:/i} @args;
push @args, 'Transfer-Encoding: chunked' if $chunked;
unshift @args, 'Content-Type: application/octet-stream' unless grep
{/^content-type:/i} @args;
+ my @contenttype = grep {/^content-type:/i} @args;
+ my $ranges = BSHTTP::range($hdrs->{'range'}, $file, $contenttype[0]);
+ push @args, $ranges->{'range'}->[0]->[2] if @{$ranges->{'range'} || []} == 1;
+ if (@{$ranges->{'range'} || []}) {
+ for (@args) {
+ s/^(content-length:\s*)[\d]+(.*)/$1$ranges->{'length'}$2/i;
+ s/^$contenttype[0]/Content-type: multipart\/byteranges;
boundary=$BSHTTP::MULTIPART_BOUNDARY/ if @{$ranges->{'range'} || []} > 1;
+ }
+ unshift @args, 'status: 206 Partial content';
+ }
reply(undef, @args);
my $param = {'filename' => $file};
$param->{'chunked'} = 1 if $chunked;
+ $param->{'range'} = $ranges->{'range'};
BSHTTP::file_sender($param, \*CLNT);
+ my $data = $BSHTTP::MULTIPART_ENDSEP;
+ $data = sprintf("%X\r\n", length($data)).$data."\r\n" if $param->{'chunked'};
+ BSHTTP::swrite(\*CLNT, $data) if @{$ranges->{'range'} || []} > 1;
BSHTTP::swrite(\*CLNT, "0\r\n\r\n") if $chunked;
}

Index: BSRPC.pm
===================================================================
--- BSRPC.pm (Revision 8268)
+++ BSRPC.pm (Arbeitskopie)
@@ -226,7 +226,7 @@
}
my %headers;
BSHTTP::gethead(\%headers, $headers);
- if ($status =~ /^200[^\d]/) {
+ if ($status =~ /^(200|206)[^\d]/) {
undef $status;
} elsif ($status =~ /^302[^\d]/) {
# XXX: should we do the redirect if $param->{'ignorestatus'} is defined?
Index: BSServerEvents.pm
===================================================================
--- BSServerEvents.pm (Revision 8268)
+++ BSServerEvents.pm (Arbeitskopie)
@@ -32,6 +32,7 @@
use strict;

our $gev; # our event
+my $hdrs;

sub gethead {
# parses http header and fills hash
@@ -326,6 +327,7 @@
}
my %headers;
gethead(\%headers, "Request: $1");
+ $hdrs = \%headers;
$ev->{'headers'} = \%headers;
} elsif ($act ne 'get') {
die("501 Bad method, must be GET\n") if $act ne 'GET';
@@ -551,4 +553,8 @@
return $sockev;
}

+sub header {
+ return $hdrs;
+}
+
1;
Index: bs_worker
===================================================================
--- bs_worker (Revision 8268)
+++ bs_worker (Arbeitskopie)
@@ -341,38 +341,41 @@

sub stream_logfile {
my ($nostream, $start, $end) = @_;
+ local *F;
open(F, "<$buildroot/.build.log") || die("$buildroot/.build.log: $!\n");
- my @s = stat(F);
- $start ||= 0;
- if (defined($end)) {
- $end -= $start;
- die("end is smaller than start\n") if $end < 0;
+ my @args = ('Content-Type: application/octet-stream', 'Transfer-Encoding:
chunked');
+ my @contenttype = grep {/^content-type:/i} @args;
+ my $header = BSServer::header();
+ my $ranges = BSHTTP::range($header->{'range'}, \*F, $contenttype[0],
!$nostream);
+ unshift @args, 'status: 206 Partial content' if @{$ranges->{'range'} || []};
+ push @args, $ranges->{'range'}->[0]->[2] if @{$ranges->{'range'} || []} == 1;
+ for (@args) {
+ s/^$contenttype[0]/Content-type: multipart\/byteranges;
boundary=$BSHTTP::MULTIPART_BOUNDARY/ if @{$ranges->{'range'} || []} > 1;
}
- die("Logfile is not that big\n") if $s[7] < abs($start);
- defined(sysseek(F, $start, $start < 0 ? Fcntl::SEEK_END : Fcntl::SEEK_SET))
|| die("sysseek: $!\n");
-
- BSServer::reply(undef, 'Content-Type: application/octet-stream',
'Transfer-Encoding: chunked');
- my $pos = sysseek(F, 0, Fcntl::SEEK_CUR) || die("sysseek: $!\n");
- while(!defined($end) || $end) {
- @s = stat(F);
- if ($s[7] <= $pos) {
+ BSServer::reply(undef, @args);
+ # XXX: if streaming is enabled the old code also never quits the while loop
(except end was specified)
+ my $rlen = 0;
+ my $param = {'filename' => \*F, 'chunked' => 1, 'range' =>
$ranges->{'range'}};
+ while(1) {
+ my @s = stat(F);
+ if ($rlen >= $s[7]) {
+ # why is that? even if we remove the file the number of hardlinks is
still the same (F is still open)
last if !$s[3];
select(undef, undef, undef, .5);
next;
}
- my $data = '';
- my $l = $s[7] - $pos;
- $l = 4096 if $l > 4096;
- sysread(F, $data, $l);
- next unless length($data);
- $data = substr($data, 0, $end) if defined($end) && length($data) > $end;
- $pos += length($data);
- $end -= length($data) if defined $end;
- $data = sprintf("%X\r\n", length($data)).$data."\r\n";
- BSServer::swrite($data);
- last if $nostream && $pos >= $s[7];
+ $rlen = $s[7];
+ # we cannot use BSServer::reply_file because it would close the connection
+ BSHTTP::file_sender($param, BSServer::getsocket());
+ undef $param->{'range'};
+ last if $nostream;
}
close F;
+ if (@{$ranges->{'range'} || []} > 1) {
+ my $data = $BSHTTP::MULTIPART_ENDSEP;
+ $data = sprintf("%X\r\n", length($data)).$data."\r\n" if
$param->{'chunked'};
+ BSHTTP::swrite(BSServer::getsocket(), $data);
+ }
BSServer::swrite("0\r\n\r\n");
}

Index: BSHandoff.pm
===================================================================
--- BSHandoff.pm (Revision 8268)
+++ BSHandoff.pm (Arbeitskopie)
@@ -42,12 +43,12 @@
local *SOCK;
socket(SOCK, PF_UNIX, SOCK_STREAM, 0) || die("socket: $!\n");
connect(SOCK, sockaddr_un($sockname)) || die("connect: $!\n");
- my $param = {
- 'uri' => $path,
- 'socket' => *SOCK,
- 'sender' => \&handoffsender,
- };
+ my $param = {'uri' => $path};
+ $param = $path if ref($path) eq 'HASH';
+ $param->{'socket'} = *SOCK;
+ $param->{'sender'} = \&handoffsender;
my @headers;
+ @headers = @{$param->{'headers'}} if exists $param->{'headers'};
if ($BSServer::forwardedfor) {
push @headers, "X-Peer: $BSServer::forwardedfor";
} elsif ($BSServer::peer) {
< Previous Next >