Mailinglist Archive: radeonhd (307 mails)

< Previous Next >
[radeonhd] [PATCH] Correct colour scaling for r6xx-r7xx Xv
  • From: Yang Zhao <yang@xxxxxxxxxx>
  • Date: Wed, 11 Feb 2009 11:28:56 -0800
  • Message-id: <40a7b1aa0902111128nf5b6529v9209926e2a256ff5@xxxxxxxxxxxxxx>
This should fix the "video looks washed out" issue that some people
have noticed. Applies to planar only at the moment.

See commit comment for details.

Also at 'better-xv' branch of http://yangman.ca/git/xf86-video-radeonhd

--
Yang Zhao
http://yangman.ca
From fad616a6242debcd7a2ce122aa1877bdfea389a0 Mon Sep 17 00:00:00 2001
From: Yang Zhao <yang@xxxxxxxxxx>
Date: Mon, 9 Feb 2009 22:28:03 -0800
Subject: [PATCH] R6xx/R7xx Xv: Planar - Properly scale Y'CbCr values before
converting to RGB

According to MPEG-2 spec, Y' and Cb/Cr values are scaled to [16, 235]
and [16, 240], respectively, when packed into bytes. Properly take care
of the reverse scaling before translating to RGB.

Conversion matrix has been simplified to remove 3rd column, as the fitting
to [-0.5, 0.5] can be done with scaling.

Redundant MOV instructions were also removed, and now only 3 GPRs are
required.
---
src/r600_exa.c | 204 +++++++++++++++++++---------------------
src/r600_textured_videofuncs.c | 13 ++-
2 files changed, 108 insertions(+), 109 deletions(-)

diff --git a/src/r600_exa.c b/src/r600_exa.c
index 70e55db..b75e031 100644
--- a/src/r600_exa.c
+++ b/src/r600_exa.c
@@ -3412,7 +3412,7 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
// 2
ps[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
TYPE(SQ_EXPORT_PIXEL),
- RW_GPR(3),
+ RW_GPR(2),
RW_REL(ABSOLUTE),
INDEX_GPR(0),
ELEM_SIZE(3));
@@ -3427,96 +3427,88 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
CF_INST(SQ_CF_INST_EXPORT_DONE),
WHOLE_QUAD_MODE(0),
BARRIER(1));
+ /* Undo scaling of Y'CbCr values
+ * Y' is scaled from 16:235
+ * Cb/Cr are scaled from 16:240
+ */
// 3 - alu 0
- // DP4 gpr[2].x gpr[1].x c[0].x
+ // MULADD gpr[1].x gpr[1].x c[3].x c[3].y
ps[i++] = ALU_DWORD0(SRC0_SEL(1),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
SRC0_NEG(0),
- SRC1_SEL(256),
+ SRC1_SEL(259),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(0));
- ps[i++] = ALU_DWORD1_OP2(rhdPtr->ChipSet,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(1),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_DOT4),
- BANK_SWIZZLE(SQ_ALU_VEC_102),
- DST_GPR(2),
+ ps[i++] = ALU_DWORD1_OP3(SRC2_SEL(259),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_Y),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(1),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_X),
CLAMP(1));
// 4 - alu 1
- // DP4 gpr[2].y gpr[1].y c[0].y
+ // MULADD gpr[1].y gpr[1].y c[3].z c[3].w
ps[i++] = ALU_DWORD0(SRC0_SEL(1),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Y),
SRC0_NEG(0),
- SRC1_SEL(256),
+ SRC1_SEL(259),
SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_Y),
+ SRC1_ELEM(ELEM_Z),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(0));
- ps[i++] = ALU_DWORD1_OP2(rhdPtr->ChipSet,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(0),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_DOT4),
- BANK_SWIZZLE(SQ_ALU_VEC_102),
- DST_GPR(2),
+ ps[i++] = ALU_DWORD1_OP3(SRC2_SEL(259),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_W),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(1),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Y),
- CLAMP(1));
+ CLAMP(0));
// 5 - alu 2
- // DP4 gpr[2].z gpr[1].z c[0].z
+ // MULADD gpr[1].z gpr[1].z c[3].z c[3].w
ps[i++] = ALU_DWORD0(SRC0_SEL(1),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Z),
SRC0_NEG(0),
- SRC1_SEL(256),
+ SRC1_SEL(259),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_Z),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
LAST(0));
- ps[i++] = ALU_DWORD1_OP2(rhdPtr->ChipSet,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(0),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_DOT4),
- BANK_SWIZZLE(SQ_ALU_VEC_102),
- DST_GPR(2),
+ ps[i++] = ALU_DWORD1_OP3(SRC2_SEL(259),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_W),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(1),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Z),
- CLAMP(1));
+ CLAMP(0));
// 6 - alu 3
- // DP4 gpr[2].w gpr[1].w c[0].w
- ps[i++] = ALU_DWORD0(SRC0_SEL(1),
+ // MOV gpr[1].w 0.0
+ ps[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_W),
+ SRC0_ELEM(ELEM_X),
SRC0_NEG(0),
- SRC1_SEL(256),
+ SRC1_SEL(SQ_ALU_SRC_0),
SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_W),
+ SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
@@ -3526,22 +3518,22 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
SRC1_ABS(0),
UPDATE_EXECUTE_MASK(0),
UPDATE_PRED(0),
- WRITE_MASK(0),
+ WRITE_MASK(1),
FOG_MERGE(0),
OMOD(SQ_ALU_OMOD_OFF),
ALU_INST(SQ_OP2_INST_DOT4),
- BANK_SWIZZLE(SQ_ALU_VEC_021),
- DST_GPR(2),
+ BANK_SWIZZLE(SQ_ALU_VEC_102),
+ DST_GPR(1),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_W),
- CLAMP(1));
+ CLAMP(0));
// 7 - alu 4
- // DP4 gpr[2].x gpr[1].x c[1].x
+ // DP4 gpr[2].x gpr[1].x c[0].x
ps[i++] = ALU_DWORD0(SRC0_SEL(1),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
SRC0_NEG(0),
- SRC1_SEL(257),
+ SRC1_SEL(256),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
@@ -3553,7 +3545,7 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
SRC1_ABS(0),
UPDATE_EXECUTE_MASK(0),
UPDATE_PRED(0),
- WRITE_MASK(0),
+ WRITE_MASK(1),
FOG_MERGE(0),
OMOD(SQ_ALU_OMOD_OFF),
ALU_INST(SQ_OP2_INST_DOT4),
@@ -3563,12 +3555,12 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
DST_ELEM(ELEM_X),
CLAMP(1));
// 8 - alu 5
- // DP4 gpr[2].y gpr[1].y c[1].y
+ // DP4 gpr[2].y gpr[1].y c[0].y
ps[i++] = ALU_DWORD0(SRC0_SEL(1),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Y),
SRC0_NEG(0),
- SRC1_SEL(257),
+ SRC1_SEL(256),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_Y),
SRC1_NEG(0),
@@ -3580,7 +3572,7 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
SRC1_ABS(0),
UPDATE_EXECUTE_MASK(0),
UPDATE_PRED(0),
- WRITE_MASK(1),
+ WRITE_MASK(0),
FOG_MERGE(0),
OMOD(SQ_ALU_OMOD_OFF),
ALU_INST(SQ_OP2_INST_DOT4),
@@ -3590,12 +3582,12 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
DST_ELEM(ELEM_Y),
CLAMP(1));
// 9 - alu 6
- // DP4 gpr[2].z gpr[1].z c[1].z
+ // DP4 gpr[2].z gpr[1].z c[0].z
ps[i++] = ALU_DWORD0(SRC0_SEL(1),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Z),
SRC0_NEG(0),
- SRC1_SEL(257),
+ SRC1_SEL(256),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_Z),
SRC1_NEG(0),
@@ -3617,12 +3609,12 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
DST_ELEM(ELEM_Z),
CLAMP(1));
// 10 - alu 7
- // DP4 gpr[2].w gpr[1].w c[1].w
+ // DP4 gpr[2].w gpr[1].w c[0].w
ps[i++] = ALU_DWORD0(SRC0_SEL(1),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_W),
SRC0_NEG(0),
- SRC1_SEL(257),
+ SRC1_SEL(256),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_W),
SRC1_NEG(0),
@@ -3644,12 +3636,12 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
DST_ELEM(ELEM_W),
CLAMP(1));
// 11 - alu 8
- // DP4 gpr[2].x gpr[1].x c[2].x
+ // DP4 gpr[2].x gpr[1].x c[1].x
ps[i++] = ALU_DWORD0(SRC0_SEL(1),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
SRC0_NEG(0),
- SRC1_SEL(258),
+ SRC1_SEL(257),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
@@ -3671,12 +3663,12 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
DST_ELEM(ELEM_X),
CLAMP(1));
// 12 - alu 9
- // DP4 gpr[2].y gpr[1].y c[2].y
+ // DP4 gpr[2].y gpr[1].y c[1].y
ps[i++] = ALU_DWORD0(SRC0_SEL(1),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Y),
SRC0_NEG(0),
- SRC1_SEL(258),
+ SRC1_SEL(257),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_Y),
SRC1_NEG(0),
@@ -3688,7 +3680,7 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
SRC1_ABS(0),
UPDATE_EXECUTE_MASK(0),
UPDATE_PRED(0),
- WRITE_MASK(0),
+ WRITE_MASK(1),
FOG_MERGE(0),
OMOD(SQ_ALU_OMOD_OFF),
ALU_INST(SQ_OP2_INST_DOT4),
@@ -3698,12 +3690,12 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
DST_ELEM(ELEM_Y),
CLAMP(1));
// 13 - alu 10
- // DP4 gpr[2].z gpr[1].z c[2].z
+ // DP4 gpr[2].z gpr[1].z c[1].z
ps[i++] = ALU_DWORD0(SRC0_SEL(1),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Z),
SRC0_NEG(0),
- SRC1_SEL(258),
+ SRC1_SEL(257),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_Z),
SRC1_NEG(0),
@@ -3715,7 +3707,7 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
SRC1_ABS(0),
UPDATE_EXECUTE_MASK(0),
UPDATE_PRED(0),
- WRITE_MASK(1),
+ WRITE_MASK(0),
FOG_MERGE(0),
OMOD(SQ_ALU_OMOD_OFF),
ALU_INST(SQ_OP2_INST_DOT4),
@@ -3725,12 +3717,12 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
DST_ELEM(ELEM_Z),
CLAMP(1));
// 14 - alu 11
- // DP4 gpr[2].w gpr[1].w c[2].w
+ // DP4 gpr[2].w gpr[1].w c[1].w
ps[i++] = ALU_DWORD0(SRC0_SEL(1),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_W),
SRC0_NEG(0),
- SRC1_SEL(258),
+ SRC1_SEL(257),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_W),
SRC1_NEG(0),
@@ -3752,12 +3744,12 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
DST_ELEM(ELEM_W),
CLAMP(1));
// 15 - alu 12
- // MOV gpr[3].x gpr[2].x
- ps[i++] = ALU_DWORD0(SRC0_SEL(2),
+ // DP4 gpr[2].x gpr[1].x c[2].x
+ ps[i++] = ALU_DWORD0(SRC0_SEL(1),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
SRC0_NEG(0),
- SRC1_SEL(0),
+ SRC1_SEL(258),
SRC1_REL(ABSOLUTE),
SRC1_ELEM(ELEM_X),
SRC1_NEG(0),
@@ -3769,24 +3761,24 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
SRC1_ABS(0),
UPDATE_EXECUTE_MASK(0),
UPDATE_PRED(0),
- WRITE_MASK(1),
+ WRITE_MASK(0),
FOG_MERGE(0),
OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_MOV),
- BANK_SWIZZLE(SQ_ALU_VEC_210),
- DST_GPR(3),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_102),
+ DST_GPR(2),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_X),
- CLAMP(0));
+ CLAMP(1));
// 16 - alu 13
- // MOV gpr[3].y gpr[2].y
- ps[i++] = ALU_DWORD0(SRC0_SEL(2),
+ // DP4 gpr[2].y gpr[1].y c[2].y
+ ps[i++] = ALU_DWORD0(SRC0_SEL(1),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Y),
SRC0_NEG(0),
- SRC1_SEL(0),
+ SRC1_SEL(258),
SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_X),
+ SRC1_ELEM(ELEM_Y),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
@@ -3796,24 +3788,24 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
SRC1_ABS(0),
UPDATE_EXECUTE_MASK(0),
UPDATE_PRED(0),
- WRITE_MASK(1),
+ WRITE_MASK(0),
FOG_MERGE(0),
OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_MOV),
- BANK_SWIZZLE(SQ_ALU_VEC_210),
- DST_GPR(3),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_102),
+ DST_GPR(2),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Y),
- CLAMP(0));
+ CLAMP(1));
// 17 - alu 14
- // MOV gpr[3].z gpr[2].z
- ps[i++] = ALU_DWORD0(SRC0_SEL(2),
+ // DP4 gpr[2].z gpr[1].z c[2].z
+ ps[i++] = ALU_DWORD0(SRC0_SEL(1),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Z),
SRC0_NEG(0),
- SRC1_SEL(0),
+ SRC1_SEL(258),
SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_X),
+ SRC1_ELEM(ELEM_Z),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
@@ -3826,21 +3818,21 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
WRITE_MASK(1),
FOG_MERGE(0),
OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_MOV),
- BANK_SWIZZLE(SQ_ALU_VEC_210),
- DST_GPR(3),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_102),
+ DST_GPR(2),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Z),
- CLAMP(0));
+ CLAMP(1));
// 18 - alu 15
- // MOV gpr[3].w gpr[2].w
- ps[i++] = ALU_DWORD0(SRC0_SEL(2),
+ // DP4 gpr[2].w gpr[1].w c[2].w
+ ps[i++] = ALU_DWORD0(SRC0_SEL(1),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_W),
SRC0_NEG(0),
- SRC1_SEL(0),
+ SRC1_SEL(258),
SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_X),
+ SRC1_ELEM(ELEM_W),
SRC1_NEG(0),
INDEX_MODE(SQ_INDEX_LOOP),
PRED_SEL(SQ_PRED_SEL_OFF),
@@ -3850,15 +3842,15 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
SRC1_ABS(0),
UPDATE_EXECUTE_MASK(0),
UPDATE_PRED(0),
- WRITE_MASK(1),
+ WRITE_MASK(0),
FOG_MERGE(0),
OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_MOV),
- BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(3),
+ ALU_INST(SQ_OP2_INST_DOT4),
+ BANK_SWIZZLE(SQ_ALU_VEC_021),
+ DST_GPR(2),
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_W),
- CLAMP(0));
+ CLAMP(1));
// 19 - alignment
ps[i++] = 0x00000000;
ps[i++] = 0x00000000;
diff --git a/src/r600_textured_videofuncs.c b/src/r600_textured_videofuncs.c
index 46389b8..24a7a7d 100644
--- a/src/r600_textured_videofuncs.c
+++ b/src/r600_textured_videofuncs.c
@@ -70,9 +70,16 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, struct
RHDPortPriv *pPriv)
int uv_offset;

static float ps_alu_consts[] = {
- 1.0, 0.0, 1.13983, -1.13983/2, // r - c[0]
- 1.0, -0.39465, -0.5806, (0.39465+0.5806)/2, // g - c[1]
- 1.0, 2.03211, 0.0, -2.03211/2, // b - c[2]
+ 1.0, 0.0, 1.4020, 0, // r - c[0]
+ 1.0, -0.34414, -0.71414, 0, // g - c[1]
+ 1.0, 1.7720, 0.0, 0, // b - c[2]
+ /* Constants for undoing Y'CbCr scaling
+ * - Y' is scaled from 16:235
+ * - Cb/Cr are scaled from 16:240
+ * Unscaled value N' = N * N_mul + N_shift (N' in range [-0.5, 0.5])
+ * Vector is [Y_mul, Y_shfit, C_mul, C_shift]
+ */
+ 256.0/219.0, -16.0/219.0, 256.0/224.0, -128.0/224.0,
};

CLEAR (cb_conf);
--
1.6.0.6

< Previous Next >