Mailinglist Archive: radeonhd (307 mails)

< Previous Next >
Re: [radeonhd] [PATCH] r6xx/r7xx overlapping EXA copy optimization
  • From: Yang Zhao <yang@xxxxxxxxxx>
  • Date: Sat, 7 Feb 2009 10:26:33 -0800
  • Message-id: <40a7b1aa0902071026m7e7ca4a8t9c5d71e66d286106@xxxxxxxxxxxxxx>
Take two.

Copy is done in chunks, and the diagonal overlap case is actually
handled. Applies to current HEAD of r6xx-r7xx-support (cb78b480).


--
Yang Zhao
http://yangman.ca
From 38b6833ec134fe1c759ba34a18e2afc0b5c66847 Mon Sep 17 00:00:00 2001
From: Yang Zhao <yang@xxxxxxxxxx>
Date: Sat, 7 Feb 2009 10:21:31 -0800
Subject: [PATCH] R6xx/R7xx EXA: Optimize overlapping copy

Overlapping copy is now done in chunks proportional to the
non-overlapping area.

Diagonal overlaps are also handled properly.
---
src/r600_exa.c | 85 ++++++++++++++++++++++++++++++++++++++-----------------
1 files changed, 59 insertions(+), 26 deletions(-)

diff --git a/src/r600_exa.c b/src/r600_exa.c
index d1d344b..08db605 100644
--- a/src/r600_exa.c
+++ b/src/r600_exa.c
@@ -677,7 +677,7 @@ R600PrepareCopy(PixmapPtr pSrc, PixmapPtr pDst,
accel_state->rop = rop;
accel_state->planemask = planemask;

- return FALSE;
+ //return FALSE;

#ifdef SHOW_VERTEXES
ErrorF("same surface!\n");
@@ -721,57 +721,90 @@ R600OverlapCopy(PixmapPtr pDst,
struct r6xx_accel_state *accel_state = rhdPtr->TwoDPrivate;
uint32_t dst_pitch = exaGetPixmapPitch(pDst) /
(pDst->drawable.bitsPerPixel / 8);
uint32_t dst_offset = exaGetPixmapOffset(pDst) + rhdPtr->FbIntAddress +
rhdPtr->FbScanoutStart;
- int i;
+ int i, chunk;

if (is_overlap(srcX, srcX + w, srcY, srcY + h,
dstX, dstX + w, dstY, dstY + h)) {
- if (srcY == dstY) { // left/right
+ /* Diagonally offset overlap is reduced to a horizontal-only offset by
first
+ * copying the vertically non-overlapping portion, then adjusting
coordinates
+ */
+ if (srcX != dstX) { // left/right or diagonal
+ if (srcY > dstY ) { // diagonal up
+ chunk = srcY - dstY;
+ R600DoPrepareCopy(pScrn,
+ dst_pitch, pDst->drawable.width,
pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
+ dst_pitch, pDst->drawable.height,
dst_offset, pDst->drawable.bitsPerPixel,
+ accel_state->rop, accel_state->planemask);
+ R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, chunk);
+ R600DoCopy(pScrn);
+
+ h = h - chunk;
+ srcY = srcY + chunk;
+ dstY = dstY + chunk;
+ } else if (srcY < dstY) { // diagonal down
+ chunk = dstY - srcY;
+ R600DoPrepareCopy(pScrn,
+ dst_pitch, pDst->drawable.width,
pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
+ dst_pitch, pDst->drawable.height,
dst_offset, pDst->drawable.bitsPerPixel,
+ accel_state->rop, accel_state->planemask);
+ R600AppendCopyVertex(pScrn, srcX, srcY + h - chunk, dstX, dstY
+ h - chunk, w, chunk);
+ R600DoCopy(pScrn);
+
+ h = h - chunk;
+ }
+
if (srcX < dstX) { // right
// copy right to left
- for (i = w; i > 0; i--) {
+ chunk = dstX - srcX;
+ for (i = w; i > 0; i -= chunk) {
R600DoPrepareCopy(pScrn,
dst_pitch, pDst->drawable.width,
pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
dst_pitch, pDst->drawable.height,
dst_offset, pDst->drawable.bitsPerPixel,
accel_state->rop, accel_state->planemask);
- R600AppendCopyVertex(pScrn, srcX + i - 1, srcY, dstX + i -
1, dstY, 1, h);
+ R600AppendCopyVertex(pScrn, srcX + i - chunk, srcY, dstX +
i - chunk, dstY, chunk, h);
R600DoCopy(pScrn);
}
} else { //left
// copy left to right
- for (i = 0; i < w; i++) {
+ chunk = srcX - dstX;
+ for (i = 0; i < w; i += chunk) {
R600DoPrepareCopy(pScrn,
dst_pitch, pDst->drawable.width,
pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
dst_pitch, pDst->drawable.height,
dst_offset, pDst->drawable.bitsPerPixel,
accel_state->rop, accel_state->planemask);

- R600AppendCopyVertex(pScrn, srcX + i, srcY, dstX + i, dstY,
1, h);
+ R600AppendCopyVertex(pScrn, srcX + i, srcY, dstX + i, dstY,
chunk, h);
R600DoCopy(pScrn);
}
}
} else { //up/down
if (srcY > dstY) { // up
// copy top to bottom
- for (i = 0; i < h; i++) {
- R600DoPrepareCopy(pScrn,
- dst_pitch, pDst->drawable.width,
pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
- dst_pitch, pDst->drawable.height,
dst_offset, pDst->drawable.bitsPerPixel,
- accel_state->rop, accel_state->planemask);
-
- R600AppendCopyVertex(pScrn, srcX, srcY + i, dstX, dstY + i,
w, 1);
- R600DoCopy(pScrn);
- }
+ for (i = 0; i < h; i += chunk) {
+ chunk = srcY - dstY;
+ R600DoPrepareCopy(pScrn,
+ dst_pitch, pDst->drawable.width,
pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
+ dst_pitch, pDst->drawable.height,
dst_offset, pDst->drawable.bitsPerPixel,
+ accel_state->rop,
accel_state->planemask);
+
+ if (chunk > h - i) chunk = h - i;
+ R600AppendCopyVertex(pScrn, srcX, srcY + i, dstX, dstY +
i, w, chunk);
+ R600DoCopy(pScrn);
+ }
} else { // down
// copy bottom to top
- for (i = h; i > 0; i--) {
- R600DoPrepareCopy(pScrn,
- dst_pitch, pDst->drawable.width,
pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
- dst_pitch, pDst->drawable.height,
dst_offset, pDst->drawable.bitsPerPixel,
- accel_state->rop, accel_state->planemask);
-
- R600AppendCopyVertex(pScrn, srcX, srcY + i - 1, dstX, dstY
+ i - 1, w, 1);
- R600DoCopy(pScrn);
- }
- }
+ chunk = dstY - srcY;
+ for (i = h; i > 0; i -= chunk) {
+ R600DoPrepareCopy(pScrn,
+ dst_pitch, pDst->drawable.width,
pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
+ dst_pitch, pDst->drawable.height,
dst_offset, pDst->drawable.bitsPerPixel,
+ accel_state->rop,
accel_state->planemask);
+
+ if (chunk > i) chunk = i;
+ R600AppendCopyVertex(pScrn, srcX, srcY + i - chunk, dstX,
dstY + i - chunk, w, chunk);
+ R600DoCopy(pScrn);
+ }
+ }
}
} else {
R600DoPrepareCopy(pScrn,
--
1.6.0.6

< Previous Next >
References