Mailinglist Archive: radeonhd (427 mails)

< Previous Next >
Re: [radeonhd] RS690 full-HD performance
  • From: Christiaan van Dijk <dvbmail@xxxxxxxxx>
  • Date: Wed, 13 May 2009 20:59:47 +0200
  • Message-id: <4A0B18A3.7040509@xxxxxxxxx>
Wow,

what a discussion, very nice too see all the comments. Let me also comment on some points.

First of all using digital audio is a must with modern TV's doing heavy video processing. The processing causes delays which are compensated in the audio if digital audio input is used. Audio pass-through has to go via the TV for best results.

Full-HD decoding can be done in software. The solution with CoreAVC and "CoreAVC for linux" is not free but affordable and performs very good on multi-core processors. This is no CoreAVC promotion, just my findings :-)

I still haven't done very detailed testing of the patch. It's running on my system for some time now without any mayor problems. There are still some dropouts in the sound but these do not seem to relate to any of the PLL programming. I suspect the amounts of data being processed and transferred around during full-HD playback are actually causing the audio dropouts. I did some more experimental optimizing of the video part and this greatly reduced audio drop outs.

I have included the patch of my last status. Experimental is the right word for the patch. Very very very experimental would fit even better :-). I am still using double buffering with overlay video and if no scaling is required I'm not using the 3D engine but simply copy/sort out the data and let the overlay part handle the color space conversion. This only works for YV12 right now and is horribly messy at the moment. The whole stuff breaks lot of stuff for other cards and should only be used on RS690 cards... You have been warned :-).

It would be useful to have a way to see what the load on the memory controller/GPU is during full-HD playback. Right now it's just guessing at what's going wrong.

Best regards,
Christiaan van Dijk.
diff -u -r xf86-video-radeonhd/src/r5xx_3dregs.h
xf86-video-radeonhd-work/src/r5xx_3dregs.h
--- xf86-video-radeonhd/src/r5xx_3dregs.h 2009-04-24 19:32:07.000000000
+0200
+++ xf86-video-radeonhd-work/src/r5xx_3dregs.h 2009-05-03 16:15:22.000000000
+0200
@@ -545,9 +545,10 @@
#define R300_SC_SCREENDOOR 0x43e8

#define R300_TX_FILTER0_0 0x4400
-# define R300_TX_CLAMP_S(x) (x << 0)
-# define R300_TX_CLAMP_T(x) (x << 3)
-# define R300_TX_CLAMP_R(x) (x << 6)
+#define R300_TX_FILTER0_1 0x4404
+# define R300_TX_CLAMP_S(x) ((x) << 0)
+# define R300_TX_CLAMP_T(x) ((x) << 3)
+# define R300_TX_CLAMP_R(x) ((x) << 6)
# define R300_TX_CLAMP_WRAP 0
# define R300_TX_CLAMP_MIRROR 1
# define R300_TX_CLAMP_CLAMP_LAST 2
@@ -561,18 +562,18 @@
# define R300_TX_MAG_FILTER_LINEAR (2 << 9)
# define R300_TX_MIN_FILTER_LINEAR (2 << 11)
# define R300_TX_ID_SHIFT 28
-
#define R300_TX_FILTER1_0 0x4440
-
+#define R300_TX_FILTER1_1 0x4444
#define R300_TX_FORMAT0_0 0x4480
+#define R300_TX_FORMAT0_1 0x4484
# define R300_TXWIDTH_SHIFT 0
# define R300_TXHEIGHT_SHIFT 11
# define R300_NUM_LEVELS_SHIFT 26
# define R300_NUM_LEVELS_MASK 0x
# define R300_TXPROJECTED (1 << 30)
# define R300_TXPITCH_EN (1 << 31)
-
#define R300_TX_FORMAT1_0 0x44c0
+#define R300_TX_FORMAT1_1 0x44c4
# define R300_TX_FORMAT_X8 0x0
# define R300_TX_FORMAT_X16 0x1
# define R300_TX_FORMAT_Y4X4 0x2
@@ -645,67 +646,29 @@
# define R300_TX_FORMAT_YUV_TO_RGB_NO_CLAMP (2 << 22)
# define R300_TX_FORMAT_SWAP_YUV (1 << 24)

+#define R300_TX_FORMAT2_0 0x4500
+#define R300_TX_FORMAT2_1 0x4504
+# define R500_TXWIDTH_11 (1 << 15)
+# define R500_TXHEIGHT_11 (1 << 16)
+
#define R300_TX_OFFSET_0 0x4540
+#define R300_TX_OFFSET_1 0x4544
# define R300_ENDIAN_SWAP_16_BIT (1 << 0)
# define R300_ENDIAN_SWAP_32_BIT (2 << 0)
# define R300_ENDIAN_SWAP_HALF_DWORD (3 << 0)
# define R300_MACRO_TILE (1 << 2)

-#define R300_US_CONFIG 0x4600
-# define R300_NLEVEL_SHIFT 0
-# define R300_FIRST_TEX (1 << 3)
-# define R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO (1 << 1)
-#define R300_US_PIXSIZE 0x4604
-#define R300_US_CODE_OFFSET 0x4608
-# define R300_ALU_CODE_OFFSET(x) (x << 0)
-# define R300_ALU_CODE_SIZE(x) (x << 6)
-# define R300_TEX_CODE_OFFSET(x) (x << 13)
-# define R300_TEX_CODE_SIZE(x) (x << 18)
-#define R300_US_CODE_ADDR_0 0x4610
-# define R300_ALU_START(x) (x << 0)
-# define R300_ALU_SIZE(x) (x << 6)
-# define R300_TEX_START(x) (x << 12)
-# define R300_TEX_SIZE(x) (x << 17)
-# define R300_RGBA_OUT (1 << 22)
-# define R300_W_OUT (1 << 23)
-#define R300_US_CODE_ADDR_1 0x4614
-#define R300_US_CODE_ADDR_2 0x4618
-#define R300_US_CODE_ADDR_3 0x461c
-#define R300_US_TEX_INST_0 0x4620
-#define R300_US_TEX_INST_1 0x4624
-#define R300_US_TEX_INST_2 0x4628
-# define R300_TEX_SRC_ADDR(x) (x << 0)
-# define R300_TEX_DST_ADDR(x) (x << 6)
-# define R300_TEX_ID(x) (x << 11)
-# define R300_TEX_INST(x) (x << 15)
-# define R300_TEX_INST_NOP 0
-# define R300_TEX_INST_LD 1
-# define R300_TEX_INST_TEXKILL 2
-# define R300_TEX_INST_PROJ 3
-# define R300_TEX_INST_LODBIAS 4
-
-#define R300_TX_FORMAT2_0 0x4500
-# define R500_TXWIDTH_11 (1 << 15)
-# define R500_TXHEIGHT_11 (1 << 16)
-
#define R300_TX_BORDER_COLOR_0 0x45c0

-#define R500_US_FC_CTRL 0x4624
-
-/* note that these are 8 bit lengths, despite the offsets, at least for R500 */
-#define R500_US_CODE_ADDR 0x4630
-# define R500_US_CODE_START_ADDR(x) (x << 0)
-# define R500_US_CODE_END_ADDR(x) (x << 16)
-#define R500_US_CODE_RANGE 0x4634
-# define R500_US_CODE_RANGE_ADDR(x) (x << 0)
-# define R500_US_CODE_RANGE_SIZE(x) (x << 16)
-#define R500_US_CODE_OFFSET 0x4638
-# define R500_US_CODE_OFFSET_ADDR(x) (x << 0)
+#define R300_TX_ENABLE 0x4104
+# define R300_TEX_0_ENABLE (1 << 0)
+# define R300_TEX_1_ENABLE (1 << 1)

-#define R300_US_OUT_FMT_0 0x46a4
+#define R300_US_W_FMT 0x46b4
#define R300_US_OUT_FMT_1 0x46a8
#define R300_US_OUT_FMT_2 0x46ac
#define R300_US_OUT_FMT_3 0x46b0
+#define R300_US_OUT_FMT_0 0x46a4
# define R300_OUT_FMT_C4_8 (0 << 0)
# define R300_OUT_FMT_C4_10 (1 << 0)
# define R300_OUT_FMT_C4_10_GAMMA (2 << 0)
@@ -743,55 +706,70 @@
# define R300_OUT_FMT_C3_SEL_RED (1 << 14)
# define R300_OUT_FMT_C3_SEL_GREEN (2 << 14)
# define R300_OUT_FMT_C3_SEL_BLUE (3 << 14)
-#define R300_US_W_FMT 0x46b4
+#define R300_US_CONFIG 0x4600
+# define R300_NLEVEL_SHIFT 0
+# define R300_FIRST_TEX (1 << 3)
+# define R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO (1 << 1)
+#define R300_US_PIXSIZE 0x4604
+#define R300_US_CODE_OFFSET 0x4608
+# define R300_ALU_CODE_OFFSET(x) (x << 0)
+# define R300_ALU_CODE_SIZE(x) (x << 6)
+# define R300_TEX_CODE_OFFSET(x) (x << 13)
+# define R300_TEX_CODE_SIZE(x) (x << 18)
+#define R300_US_CODE_ADDR_0 0x4610
+# define R300_ALU_START(x) (x << 0)
+# define R300_ALU_SIZE(x) (x << 6)
+# define R300_TEX_START(x) (x << 12)
+# define R300_TEX_SIZE(x) (x << 17)
+# define R300_RGBA_OUT (1 << 22)
+# define R300_W_OUT (1 << 23)
+#define R300_US_CODE_ADDR_1 0x4614
+#define R300_US_CODE_ADDR_2 0x4618
+#define R300_US_CODE_ADDR_3 0x461c

+
+#define R300_US_TEX_INST_0 0x4620
+#define R300_US_TEX_INST_1 0x4624
+#define R300_US_TEX_INST_2 0x4628
+#define R300_US_TEX_INST(x) (R300_US_TEX_INST_0 +
(x)*4)
+# define R300_TEX_SRC_ADDR(x) ((x) << 0)
+# define R300_TEX_DST_ADDR(x) ((x) << 6)
+# define R300_TEX_ID(x) ((x) << 11)
+# define R300_TEX_INST(x) ((x) << 15)
+# define R300_TEX_INST_NOP 0
+# define R300_TEX_INST_LD 1
+# define R300_TEX_INST_TEXKILL 2
+# define R300_TEX_INST_PROJ 3
+# define R300_TEX_INST_LODBIAS 4
#define R300_US_ALU_RGB_ADDR_0 0x46c0
#define R300_US_ALU_RGB_ADDR_1 0x46c4
#define R300_US_ALU_RGB_ADDR_2 0x46c8
+#define R300_US_ALU_RGB_ADDR(x)
(R300_US_ALU_RGB_ADDR_0 + (x)*4)
/* for ADDR0-2, values 0-31 specify a location in the pixel stack,
values 32-63 specify a constant */
-# define R300_ALU_RGB_ADDR0(x) (x << 0)
-# define R300_ALU_RGB_ADDR1(x) (x << 6)
-# define R300_ALU_RGB_ADDR2(x) (x << 12)
+# define R300_ALU_RGB_ADDR0(x) ((x) << 0)
+# define R300_ALU_RGB_ADDR1(x) ((x) << 6)
+# define R300_ALU_RGB_ADDR2(x) ((x) << 12)
+# define R300_ALU_RGB_CONST(x) ((x) | (1 << 5))
/* ADDRD - where on the pixel stack the result of this instruction
will be written */
-# define R300_ALU_RGB_ADDRD(x) (x << 18)
-# define R300_ALU_RGB_WMASK(x) (x << 23)
-# define R300_ALU_RGB_OMASK(x) (x << 26)
+# define R300_ALU_RGB_ADDRD(x) ((x) << 18)
+# define R300_ALU_RGB_WMASK(x) ((x) << 23)
+# define R300_ALU_RGB_OMASK(x) ((x) << 26)
# define R300_ALU_RGB_MASK_NONE 0
# define R300_ALU_RGB_MASK_R 1
# define R300_ALU_RGB_MASK_G 2
# define R300_ALU_RGB_MASK_B 4
+# define R300_ALU_RGB_MASK_RGB 7
# define R300_ALU_RGB_TARGET_A (0 << 29)
# define R300_ALU_RGB_TARGET_B (1 << 29)
# define R300_ALU_RGB_TARGET_C (2 << 29)
# define R300_ALU_RGB_TARGET_D (3 << 29)
-
-#define R300_US_ALU_ALPHA_ADDR_0 0x47c0
-#define R300_US_ALU_ALPHA_ADDR_1 0x47c4
-#define R300_US_ALU_ALPHA_ADDR_2 0x47c8
-/* for ADDR0-2, values 0-31 specify a location in the pixel stack,
- values 32-63 specify a constant */
-# define R300_ALU_ALPHA_ADDR0(x) (x << 0)
-# define R300_ALU_ALPHA_ADDR1(x) (x << 6)
-# define R300_ALU_ALPHA_ADDR2(x) (x << 12)
-/* ADDRD - where on the pixel stack the result of this instruction
- will be written */
-# define R300_ALU_ALPHA_ADDRD(x) (x << 18)
-# define R300_ALU_ALPHA_WMASK(x) (x << 23)
-# define R300_ALU_ALPHA_OMASK(x) (x << 24)
-# define R300_ALU_ALPHA_OMASK_W(x) (x << 27)
-# define R300_ALU_ALPHA_MASK_NONE 0
-# define R300_ALU_ALPHA_MASK_A 1
-# define R300_ALU_ALPHA_TARGET_A (0 << 25)
-# define R300_ALU_ALPHA_TARGET_B (1 << 25)
-# define R300_ALU_ALPHA_TARGET_C (2 << 25)
-# define R300_ALU_ALPHA_TARGET_D (3 << 25)
-
#define R300_US_ALU_RGB_INST_0 0x48c0
#define R300_US_ALU_RGB_INST_1 0x48c4
#define R300_US_ALU_RGB_INST_2 0x48c8
-# define R300_ALU_RGB_SEL_A(x) (x << 0)
+#define R300_US_ALU_RGB_INST(x)
(R300_US_ALU_RGB_INST_0 + (x)*4)
+# define R300_ALU_RGB_SEL_A(x) ((x) << 0)
# define R300_ALU_RGB_SRC0_RGB 0
# define R300_ALU_RGB_SRC0_RRR 1
# define R300_ALU_RGB_SRC0_GGG 2
@@ -824,21 +802,21 @@
# define R300_ALU_RGB_SRC0_ABG 29
# define R300_ALU_RGB_SRC1_ABG 30
# define R300_ALU_RGB_SRC2_ABG 31
-# define R300_ALU_RGB_MOD_A(x) (x << 5)
+# define R300_ALU_RGB_MOD_A(x) ((x) << 5)
# define R300_ALU_RGB_MOD_NOP 0
# define R300_ALU_RGB_MOD_NEG 1
# define R300_ALU_RGB_MOD_ABS 2
# define R300_ALU_RGB_MOD_NAB 3
-# define R300_ALU_RGB_SEL_B(x) (x << 7)
-# define R300_ALU_RGB_MOD_B(x) (x << 12)
-# define R300_ALU_RGB_SEL_C(x) (x << 14)
-# define R300_ALU_RGB_MOD_C(x) (x << 19)
-# define R300_ALU_RGB_SRCP_OP(x) (x << 21)
+# define R300_ALU_RGB_SEL_B(x) ((x) << 7)
+# define R300_ALU_RGB_MOD_B(x) ((x) << 12)
+# define R300_ALU_RGB_SEL_C(x) ((x) << 14)
+# define R300_ALU_RGB_MOD_C(x) ((x) << 19)
+# define R300_ALU_RGB_SRCP_OP(x) ((x) << 21)
# define R300_ALU_RGB_SRCP_OP_1_MINUS_2RGB0 0
# define R300_ALU_RGB_SRCP_OP_RGB1_MINUS_RGB0 1
# define R300_ALU_RGB_SRCP_OP_RGB1_PLUS_RGB0 2
# define R300_ALU_RGB_SRCP_OP_1_MINUS_RGB0 3
-# define R300_ALU_RGB_OP(x) (x << 23)
+# define R300_ALU_RGB_OP(x) ((x) << 23)
# define R300_ALU_RGB_OP_MAD 0
# define R300_ALU_RGB_OP_DP3 1
# define R300_ALU_RGB_OP_DP4 2
@@ -849,7 +827,7 @@
# define R300_ALU_RGB_OP_CMP 8
# define R300_ALU_RGB_OP_FRC 9
# define R300_ALU_RGB_OP_SOP 10
-# define R300_ALU_RGB_OMOD(x) (x << 27)
+# define R300_ALU_RGB_OMOD(x) ((x) << 27)
# define R300_ALU_RGB_OMOD_NONE 0
# define R300_ALU_RGB_OMOD_MUL_2 1
# define R300_ALU_RGB_OMOD_MUL_4 2
@@ -859,11 +837,33 @@
# define R300_ALU_RGB_OMOD_DIV_8 6
# define R300_ALU_RGB_CLAMP (1 << 30)
# define R300_ALU_RGB_INSERT_NOP (1 << 31)
-
+#define R300_US_ALU_ALPHA_ADDR_0 0x47c0
+#define R300_US_ALU_ALPHA_ADDR_1 0x47c4
+#define R300_US_ALU_ALPHA_ADDR_2 0x47c8
+#define R300_US_ALU_ALPHA_ADDR(x)
(R300_US_ALU_ALPHA_ADDR_0 + (x)*4)
+/* for ADDR0-2, values 0-31 specify a location in the pixel stack,
+ values 32-63 specify a constant */
+# define R300_ALU_ALPHA_ADDR0(x) ((x) << 0)
+# define R300_ALU_ALPHA_ADDR1(x) ((x) << 6)
+# define R300_ALU_ALPHA_ADDR2(x) ((x) << 12)
+# define R300_ALU_ALPHA_CONST(x) ((x) | (1 << 5))
+/* ADDRD - where on the pixel stack the result of this instruction
+ will be written */
+# define R300_ALU_ALPHA_ADDRD(x) ((x) << 18)
+# define R300_ALU_ALPHA_WMASK(x) ((x) << 23)
+# define R300_ALU_ALPHA_OMASK(x) ((x) << 24)
+# define R300_ALU_ALPHA_OMASK_W(x) ((x) << 27)
+# define R300_ALU_ALPHA_MASK_NONE 0
+# define R300_ALU_ALPHA_MASK_A 1
+# define R300_ALU_ALPHA_TARGET_A (0 << 25)
+# define R300_ALU_ALPHA_TARGET_B (1 << 25)
+# define R300_ALU_ALPHA_TARGET_C (2 << 25)
+# define R300_ALU_ALPHA_TARGET_D (3 << 25)
#define R300_US_ALU_ALPHA_INST_0 0x49c0
#define R300_US_ALU_ALPHA_INST_1 0x49c4
#define R300_US_ALU_ALPHA_INST_2 0x49c8
-# define R300_ALU_ALPHA_SEL_A(x) (x << 0)
+#define R300_US_ALU_ALPHA_INST(x)
(R300_US_ALU_ALPHA_INST_0 + (x)*4)
+# define R300_ALU_ALPHA_SEL_A(x) ((x) << 0)
# define R300_ALU_ALPHA_SRC0_R 0
# define R300_ALU_ALPHA_SRC0_G 1
# define R300_ALU_ALPHA_SRC0_B 2
@@ -883,21 +883,21 @@
# define R300_ALU_ALPHA_0_0 16
# define R300_ALU_ALPHA_1_0 17
# define R300_ALU_ALPHA_0_5 18
-# define R300_ALU_ALPHA_MOD_A(x) (x << 5)
+# define R300_ALU_ALPHA_MOD_A(x) ((x) << 5)
# define R300_ALU_ALPHA_MOD_NOP 0
# define R300_ALU_ALPHA_MOD_NEG 1
# define R300_ALU_ALPHA_MOD_ABS 2
# define R300_ALU_ALPHA_MOD_NAB 3
-# define R300_ALU_ALPHA_SEL_B(x) (x << 7)
-# define R300_ALU_ALPHA_MOD_B(x) (x << 12)
-# define R300_ALU_ALPHA_SEL_C(x) (x << 14)
-# define R300_ALU_ALPHA_MOD_C(x) (x << 19)
-# define R300_ALU_ALPHA_SRCP_OP(x) (x << 21)
+# define R300_ALU_ALPHA_SEL_B(x) ((x) << 7)
+# define R300_ALU_ALPHA_MOD_B(x) ((x) << 12)
+# define R300_ALU_ALPHA_SEL_C(x) ((x) << 14)
+# define R300_ALU_ALPHA_MOD_C(x) ((x) << 19)
+# define R300_ALU_ALPHA_SRCP_OP(x) ((x) << 21)
# define R300_ALU_ALPHA_SRCP_OP_1_MINUS_2RGB0 0
# define R300_ALU_ALPHA_SRCP_OP_RGB1_MINUS_RGB0 1
# define R300_ALU_ALPHA_SRCP_OP_RGB1_PLUS_RGB0 2
# define R300_ALU_ALPHA_SRCP_OP_1_MINUS_RGB0 3
-# define R300_ALU_ALPHA_OP(x) (x << 23)
+# define R300_ALU_ALPHA_OP(x) ((x) << 23)
# define R300_ALU_ALPHA_OP_MAD 0
# define R300_ALU_ALPHA_OP_DP 1
# define R300_ALU_ALPHA_OP_MIN 2
@@ -909,7 +909,7 @@
# define R300_ALU_ALPHA_OP_LN2 9
# define R300_ALU_ALPHA_OP_RCP 10
# define R300_ALU_ALPHA_OP_RSQ 11
-# define R300_ALU_ALPHA_OMOD(x) (x << 27)
+# define R300_ALU_ALPHA_OMOD(x) ((x) << 27)
# define R300_ALU_ALPHA_OMOD_NONE 0
# define R300_ALU_ALPHA_OMOD_MUL_2 1
# define R300_ALU_ALPHA_OMOD_MUL_4 2
@@ -919,9 +919,66 @@
# define R300_ALU_ALPHA_OMOD_DIV_8 6
# define R300_ALU_ALPHA_CLAMP (1 << 30)

+#define R300_US_ALU_CONST_R_0 0x4c00
+#define R300_US_ALU_CONST_R(x) (R300_US_ALU_CONST_R_0
+ (x)*16)
+#define R300_US_ALU_CONST_G_0 0x4c04
+#define R300_US_ALU_CONST_G(x) (R300_US_ALU_CONST_G_0
+ (x)*16)
+#define R300_US_ALU_CONST_B_0 0x4c08
+#define R300_US_ALU_CONST_B(x) (R300_US_ALU_CONST_B_0
+ (x)*16)
+#define R300_US_ALU_CONST_A_0 0x4c0c
+#define R300_US_ALU_CONST_A(x) (R300_US_ALU_CONST_A_0
+ (x)*16)
+#define R300_FG_DEPTH_SRC 0x4bd8
#define R300_FG_FOG_BLEND 0x4bc0
#define R300_FG_ALPHA_FUNC 0x4bd4
-#define R300_FG_DEPTH_SRC 0x4bd8
+
+#define R300_DST_PIPE_CONFIG 0x170c
+# define R300_PIPE_AUTO_CONFIG (1 << 31)
+#define R300_RB2D_DSTCACHE_MODE 0x3428
+#define R300_RB2D_DSTCACHE_MODE 0x3428
+# define R300_DC_AUTOFLUSH_ENABLE (1 << 8)
+# define R300_DC_DC_DISABLE_IGNORE_PE (1 << 17)
+#define R300_RB2D_DSTCACHE_CTLSTAT 0x342c /* use
DSTCACHE_CTLSTAT instead */
+#define R300_DSTCACHE_CTLSTAT 0x1714
+# define R300_DC_FLUSH_2D (1 << 0)
+# define R300_DC_FREE_2D (1 << 2)
+# define R300_RB2D_DC_FLUSH_ALL (R300_DC_FLUSH_2D |
R300_DC_FREE_2D)
+# define R300_RB2D_DC_BUSY (1 << 31)
+#define R300_RB3D_ZCACHE_CTLSTAT 0x4f18
+# define R300_ZC_FLUSH (1 << 0)
+# define R300_ZC_FREE (1 << 1)
+# define R300_ZC_FLUSH_ALL 0x3
+#define R300_RB3D_ZSTENCILCNTL 0x4f04
+#define R300_RB3D_ZCACHE_CTLSTAT 0x4f18
+#define R300_RB3D_BW_CNTL 0x4f1c
+#define R300_RB3D_ZCNTL 0x4f00
+#define R300_RB3D_ZTOP 0x4f14
+#define R300_RB3D_ROPCNTL 0x4e18
+#define R300_RB3D_BLENDCNTL 0x4e04
+# define R300_ALPHA_BLEND_ENABLE (1 << 0)
+# define R300_SEPARATE_ALPHA_ENABLE (1 << 1)
+# define R300_READ_ENABLE (1 << 2)
+#define R300_RB3D_ABLENDCNTL 0x4e08
+#define R300_RB3D_COLOROFFSET0 0x4e28
+#define R300_RB3D_COLORPITCH0 0x4e38
+# define R300_COLORTILE (1 << 16)
+# define R300_COLORENDIAN_WORD (1 << 19)
+# define R300_COLORENDIAN_DWORD (2 << 19)
+# define R300_COLORENDIAN_HALF_DWORD (3 << 19)
+# define R300_COLORFORMAT_ARGB1555 (3 << 21)
+# define R300_COLORFORMAT_RGB565 (4 << 21)
+# define R300_COLORFORMAT_ARGB8888 (6 << 21)
+# define R300_COLORFORMAT_ARGB32323232 (7 << 21)
+# define R300_COLORFORMAT_I8 (9 << 21)
+# define R300_COLORFORMAT_ARGB16161616 (10 << 21)
+# define R300_COLORFORMAT_VYUY (11 << 21)
+# define R300_COLORFORMAT_YVYU (12 << 21)
+# define R300_COLORFORMAT_UV88 (13 << 21)
+# define R300_COLORFORMAT_ARGB4444 (15 << 21)
+
+
+
+
+

#define R300_RB3D_CCTL 0x4e00
#define R300_RB3D_BLENDCNTL 0x4e04
@@ -975,87 +1032,161 @@
# define R300_ZC_FLUSH_ALL 0x3
#define R300_RB3D_BW_CNTL 0x4f1c

-#define R500_US_ALU_RGB_ADDR_0 0x9000
-# define R500_RGB_ADDR0(x) (x << 0)
-# define R500_RGB_ADDR0_CONST (1 << 8)
-# define R500_RGB_ADDR0_REL (1 << 9)
-# define R500_RGB_ADDR1(x) (x << 10)
-# define R500_RGB_ADDR1_CONST (1 << 18)
-# define R500_RGB_ADDR1_REL (1 << 19)
-# define R500_RGB_ADDR2(x) (x << 20)
-# define R500_RGB_ADDR2_CONST (1 << 28)
-# define R500_RGB_ADDR2_REL (1 << 29)
-# define R500_RGB_SRCP_OP_1_MINUS_2RGB0 (0 << 30)
-# define R500_RGB_SRCP_OP_RGB1_MINUS_RGB0 (1 << 30)
-# define R500_RGB_SRCP_OP_RGB1_PLUS_RGB0 (2 << 30)
-# define R500_RGB_SRCP_OP_1_MINUS_RGB0 (3 << 30)
-
-#define R500_US_TEX_INST_0 0x9000
-# define R500_TEX_ID(x) (x << 16)
-# define R500_TEX_INST_NOP (0 << 22)
-# define R500_TEX_INST_LD (1 << 22)
-# define R500_TEX_INST_TEXKILL (2 << 22)
-# define R500_TEX_INST_PROJ (3 << 22)
-# define R500_TEX_INST_LODBIAS (4 << 22)
-# define R500_TEX_INST_LOD (5 << 22)
-# define R500_TEX_INST_DXDY (6 << 22)
-# define R500_TEX_SEM_ACQUIRE (1 << 25)
-# define R500_TEX_IGNORE_UNCOVERED (1 << 26)
-# define R500_TEX_UNSCALED (1 << 27)
+/* R500 US has to be loaded through an index/data pair */
+#define R500_GA_US_VECTOR_INDEX 0x4250
+# define R500_US_VECTOR_TYPE_INST (0 << 16)
+# define R500_US_VECTOR_TYPE_CONST (1 << 16)
+# define R500_US_VECTOR_CLAMP (1 << 17)
+# define R500_US_VECTOR_INST_INDEX(x) ((x) |
R500_US_VECTOR_TYPE_INST)
+# define R500_US_VECTOR_CONST_INDEX(x) ((x) |
R500_US_VECTOR_TYPE_CONST)
+#define R500_GA_US_VECTOR_DATA 0x4254

+/*
+ * The R500 unified shader (US) registers come in banks of 512 each, one
+ * for each instruction slot in the shader. You can't touch them directly.
+ * R500_US_VECTOR_INDEX() sets the base instruction to modify; successive
+ * writes to R500_GA_US_VECTOR_DATA autoincrement the index after the
+ * instruction is fully specified.
+ */
+#define R500_US_ALU_ALPHA_INST_0 0xa800
+# define R500_ALPHA_OP_MAD 0
+# define R500_ALPHA_OP_DP 1
+# define R500_ALPHA_OP_MIN 2
+# define R500_ALPHA_OP_MAX 3
+/* #define R500_ALPHA_OP_RESERVED 4 */
+# define R500_ALPHA_OP_CND 5
+# define R500_ALPHA_OP_CMP 6
+# define R500_ALPHA_OP_FRC 7
+# define R500_ALPHA_OP_EX2 8
+# define R500_ALPHA_OP_LN2 9
+# define R500_ALPHA_OP_RCP 10
+# define R500_ALPHA_OP_RSQ 11
+# define R500_ALPHA_OP_SIN 12
+# define R500_ALPHA_OP_COS 13
+# define R500_ALPHA_OP_MDH 14
+# define R500_ALPHA_OP_MDV 15
+# define R500_ALPHA_ADDRD(x) ((x) << 4)
+# define R500_ALPHA_ADDRD_REL (1 << 11)
+# define R500_ALPHA_SEL_A_SRC0 (0 << 12)
+# define R500_ALPHA_SEL_A_SRC1 (1 << 12)
+# define R500_ALPHA_SEL_A_SRC2 (2 << 12)
+# define R500_ALPHA_SEL_A_SRCP (3 << 12)
+# define R500_ALPHA_SWIZ_A_R (0 << 14)
+# define R500_ALPHA_SWIZ_A_G (1 << 14)
+# define R500_ALPHA_SWIZ_A_B (2 << 14)
+# define R500_ALPHA_SWIZ_A_A (3 << 14)
+# define R500_ALPHA_SWIZ_A_0 (4 << 14)
+# define R500_ALPHA_SWIZ_A_HALF (5 << 14)
+# define R500_ALPHA_SWIZ_A_1 (6 << 14)
+/* #define R500_ALPHA_SWIZ_A_UNUSED (7 << 14) */
+# define R500_ALPHA_MOD_A_NOP (0 << 17)
+# define R500_ALPHA_MOD_A_NEG (1 << 17)
+# define R500_ALPHA_MOD_A_ABS (2 << 17)
+# define R500_ALPHA_MOD_A_NAB (3 << 17)
+# define R500_ALPHA_SEL_B_SRC0 (0 << 19)
+# define R500_ALPHA_SEL_B_SRC1 (1 << 19)
+# define R500_ALPHA_SEL_B_SRC2 (2 << 19)
+# define R500_ALPHA_SEL_B_SRCP (3 << 19)
+# define R500_ALPHA_SWIZ_B_R (0 << 21)
+# define R500_ALPHA_SWIZ_B_G (1 << 21)
+# define R500_ALPHA_SWIZ_B_B (2 << 21)
+# define R500_ALPHA_SWIZ_B_A (3 << 21)
+# define R500_ALPHA_SWIZ_B_0 (4 << 21)
+# define R500_ALPHA_SWIZ_B_HALF (5 << 21)
+# define R500_ALPHA_SWIZ_B_1 (6 << 21)
+/* #define R500_ALPHA_SWIZ_B_UNUSED (7 << 21) */
+# define R500_ALPHA_MOD_B_NOP (0 << 24)
+# define R500_ALPHA_MOD_B_NEG (1 << 24)
+# define R500_ALPHA_MOD_B_ABS (2 << 24)
+# define R500_ALPHA_MOD_B_NAB (3 << 24)
+# define R500_ALPHA_OMOD_IDENTITY (0 << 26)
+# define R500_ALPHA_OMOD_MUL_2 (1 << 26)
+# define R500_ALPHA_OMOD_MUL_4 (2 << 26)
+# define R500_ALPHA_OMOD_MUL_8 (3 << 26)
+# define R500_ALPHA_OMOD_DIV_2 (4 << 26)
+# define R500_ALPHA_OMOD_DIV_4 (5 << 26)
+# define R500_ALPHA_OMOD_DIV_8 (6 << 26)
+# define R500_ALPHA_OMOD_DISABLE (7 << 26)
+# define R500_ALPHA_TARGET(x) ((x) << 29)
+# define R500_ALPHA_W_OMASK (1 << 31)
#define R500_US_ALU_ALPHA_ADDR_0 0x9800
-# define R500_ALPHA_ADDR0(x) (x << 0)
+# define R500_ALPHA_ADDR0(x) ((x) << 0)
# define R500_ALPHA_ADDR0_CONST (1 << 8)
# define R500_ALPHA_ADDR0_REL (1 << 9)
-# define R500_ALPHA_ADDR1(x) (x << 10)
+# define R500_ALPHA_ADDR1(x) ((x) << 10)
# define R500_ALPHA_ADDR1_CONST (1 << 18)
# define R500_ALPHA_ADDR1_REL (1 << 19)
-# define R500_ALPHA_ADDR2(x) (x << 20)
+# define R500_ALPHA_ADDR2(x) ((x) << 20)
# define R500_ALPHA_ADDR2_CONST (1 << 28)
# define R500_ALPHA_ADDR2_REL (1 << 29)
# define R500_ALPHA_SRCP_OP_1_MINUS_2A0 (0 << 30)
# define R500_ALPHA_SRCP_OP_A1_MINUS_A0 (1 << 30)
# define R500_ALPHA_SRCP_OP_A1_PLUS_A0 (2 << 30)
# define R500_ALPHA_SRCP_OP_1_MINUS_A0 (3 << 30)
-
-#define R500_US_TEX_ADDR_0 0x9800
-# define R500_TEX_SRC_ADDR(x) (x << 0)
-# define R500_TEX_SRC_ADDR_REL (1 << 7)
-# define R500_TEX_SRC_S_SWIZ_R (0 << 8)
-# define R500_TEX_SRC_S_SWIZ_G (1 << 8)
-# define R500_TEX_SRC_S_SWIZ_B (2 << 8)
-# define R500_TEX_SRC_S_SWIZ_A (3 << 8)
-# define R500_TEX_SRC_T_SWIZ_R (0 << 10)
-# define R500_TEX_SRC_T_SWIZ_G (1 << 10)
-# define R500_TEX_SRC_T_SWIZ_B (2 << 10)
-# define R500_TEX_SRC_T_SWIZ_A (3 << 10)
-# define R500_TEX_SRC_R_SWIZ_R (0 << 12)
-# define R500_TEX_SRC_R_SWIZ_G (1 << 12)
-# define R500_TEX_SRC_R_SWIZ_B (2 << 12)
-# define R500_TEX_SRC_R_SWIZ_A (3 << 12)
-# define R500_TEX_SRC_Q_SWIZ_R (0 << 14)
-# define R500_TEX_SRC_Q_SWIZ_G (1 << 14)
-# define R500_TEX_SRC_Q_SWIZ_B (2 << 14)
-# define R500_TEX_SRC_Q_SWIZ_A (3 << 14)
-# define R500_TEX_DST_ADDR(x) (x << 16)
-# define R500_TEX_DST_ADDR_REL (1 << 23)
-# define R500_TEX_DST_R_SWIZ_R (0 << 24)
-# define R500_TEX_DST_R_SWIZ_G (1 << 24)
-# define R500_TEX_DST_R_SWIZ_B (2 << 24)
-# define R500_TEX_DST_R_SWIZ_A (3 << 24)
-# define R500_TEX_DST_G_SWIZ_R (0 << 26)
-# define R500_TEX_DST_G_SWIZ_G (1 << 26)
-# define R500_TEX_DST_G_SWIZ_B (2 << 26)
-# define R500_TEX_DST_G_SWIZ_A (3 << 26)
-# define R500_TEX_DST_B_SWIZ_R (0 << 28)
-# define R500_TEX_DST_B_SWIZ_G (1 << 28)
-# define R500_TEX_DST_B_SWIZ_B (2 << 28)
-# define R500_TEX_DST_B_SWIZ_A (3 << 28)
-# define R500_TEX_DST_A_SWIZ_R (0 << 30)
-# define R500_TEX_DST_A_SWIZ_G (1 << 30)
-# define R500_TEX_DST_A_SWIZ_B (2 << 30)
-# define R500_TEX_DST_A_SWIZ_A (3 << 30)
-
+#define R500_US_ALU_RGBA_INST_0 0xb000
+# define R500_ALU_RGBA_OP_MAD (0 << 0)
+# define R500_ALU_RGBA_OP_DP3 (1 << 0)
+# define R500_ALU_RGBA_OP_DP4 (2 << 0)
+# define R500_ALU_RGBA_OP_D2A (3 << 0)
+# define R500_ALU_RGBA_OP_MIN (4 << 0)
+# define R500_ALU_RGBA_OP_MAX (5 << 0)
+/* #define R500_ALU_RGBA_OP_RESERVED (6 << 0) */
+# define R500_ALU_RGBA_OP_CND (7 << 0)
+# define R500_ALU_RGBA_OP_CMP (8 << 0)
+# define R500_ALU_RGBA_OP_FRC (9 << 0)
+# define R500_ALU_RGBA_OP_SOP (10 << 0)
+# define R500_ALU_RGBA_OP_MDH (11 << 0)
+# define R500_ALU_RGBA_OP_MDV (12 << 0)
+# define R500_ALU_RGBA_ADDRD(x) ((x) << 4)
+# define R500_ALU_RGBA_ADDRD_REL (1 << 11)
+# define R500_ALU_RGBA_SEL_C_SRC0 (0 << 12)
+# define R500_ALU_RGBA_SEL_C_SRC1 (1 << 12)
+# define R500_ALU_RGBA_SEL_C_SRC2 (2 << 12)
+# define R500_ALU_RGBA_SEL_C_SRCP (3 << 12)
+# define R500_ALU_RGBA_R_SWIZ_R (0 << 14)
+# define R500_ALU_RGBA_R_SWIZ_G (1 << 14)
+# define R500_ALU_RGBA_R_SWIZ_B (2 << 14)
+# define R500_ALU_RGBA_R_SWIZ_A (3 << 14)
+# define R500_ALU_RGBA_R_SWIZ_0 (4 << 14)
+# define R500_ALU_RGBA_R_SWIZ_HALF (5 << 14)
+# define R500_ALU_RGBA_R_SWIZ_1 (6 << 14)
+/* #define R500_ALU_RGBA_R_SWIZ_UNUSED (7 << 14) */
+# define R500_ALU_RGBA_G_SWIZ_R (0 << 17)
+# define R500_ALU_RGBA_G_SWIZ_G (1 << 17)
+# define R500_ALU_RGBA_G_SWIZ_B (2 << 17)
+# define R500_ALU_RGBA_G_SWIZ_A (3 << 17)
+# define R500_ALU_RGBA_G_SWIZ_0 (4 << 17)
+# define R500_ALU_RGBA_G_SWIZ_HALF (5 << 17)
+# define R500_ALU_RGBA_G_SWIZ_1 (6 << 17)
+/* #define R500_ALU_RGBA_G_SWIZ_UNUSED (7 << 17) */
+# define R500_ALU_RGBA_B_SWIZ_R (0 << 20)
+# define R500_ALU_RGBA_B_SWIZ_G (1 << 20)
+# define R500_ALU_RGBA_B_SWIZ_B (2 << 20)
+# define R500_ALU_RGBA_B_SWIZ_A (3 << 20)
+# define R500_ALU_RGBA_B_SWIZ_0 (4 << 20)
+# define R500_ALU_RGBA_B_SWIZ_HALF (5 << 20)
+# define R500_ALU_RGBA_B_SWIZ_1 (6 << 20)
+/* #define R500_ALU_RGBA_B_SWIZ_UNUSED (7 << 20) */
+# define R500_ALU_RGBA_MOD_C_NOP (0 << 23)
+# define R500_ALU_RGBA_MOD_C_NEG (1 << 23)
+# define R500_ALU_RGBA_MOD_C_ABS (2 << 23)
+# define R500_ALU_RGBA_MOD_C_NAB (3 << 23)
+# define R500_ALU_RGBA_ALPHA_SEL_C_SRC0 (0 << 25)
+# define R500_ALU_RGBA_ALPHA_SEL_C_SRC1 (1 << 25)
+# define R500_ALU_RGBA_ALPHA_SEL_C_SRC2 (2 << 25)
+# define R500_ALU_RGBA_ALPHA_SEL_C_SRCP (3 << 25)
+# define R500_ALU_RGBA_A_SWIZ_R (0 << 27)
+# define R500_ALU_RGBA_A_SWIZ_G (1 << 27)
+# define R500_ALU_RGBA_A_SWIZ_B (2 << 27)
+# define R500_ALU_RGBA_A_SWIZ_A (3 << 27)
+# define R500_ALU_RGBA_A_SWIZ_0 (4 << 27)
+# define R500_ALU_RGBA_A_SWIZ_HALF (5 << 27)
+# define R500_ALU_RGBA_A_SWIZ_1 (6 << 27)
+/* #define R500_ALU_RGBA_A_SWIZ_UNUSED (7 << 27) */
+# define R500_ALU_RGBA_ALPHA_MOD_C_NOP (0 << 30)
+# define R500_ALU_RGBA_ALPHA_MOD_C_NEG (1 << 30)
+# define R500_ALU_RGBA_ALPHA_MOD_C_ABS (2 << 30)
+# define R500_ALU_RGBA_ALPHA_MOD_C_NAB (3 << 30)
#define R500_US_ALU_RGB_INST_0 0xa000
# define R500_ALU_RGB_SEL_A_SRC0 (0 << 0)
# define R500_ALU_RGB_SEL_A_SRC1 (1 << 0)
@@ -1129,181 +1260,22 @@
# define R500_ALU_RGB_OMOD_DIV_4 (5 << 26)
# define R500_ALU_RGB_OMOD_DIV_8 (6 << 26)
# define R500_ALU_RGB_OMOD_DISABLE (7 << 26)
-# define R500_ALU_RGB_TARGET(x) (x << 29)
+# define R500_ALU_RGB_TARGET(x) ((x) << 29)
# define R500_ALU_RGB_WMASK (1 << 31)
-
-#define R500_US_TEX_ADDR_DXDY_0 0xa000
-# define R500_DX_ADDR(x) (x << 0)
-# define R500_DX_ADDR_REL (1 << 7)
-# define R500_DX_S_SWIZ_R (0 << 8)
-# define R500_DX_S_SWIZ_G (1 << 8)
-# define R500_DX_S_SWIZ_B (2 << 8)
-# define R500_DX_S_SWIZ_A (3 << 8)
-# define R500_DX_T_SWIZ_R (0 << 10)
-# define R500_DX_T_SWIZ_G (1 << 10)
-# define R500_DX_T_SWIZ_B (2 << 10)
-# define R500_DX_T_SWIZ_A (3 << 10)
-# define R500_DX_R_SWIZ_R (0 << 12)
-# define R500_DX_R_SWIZ_G (1 << 12)
-# define R500_DX_R_SWIZ_B (2 << 12)
-# define R500_DX_R_SWIZ_A (3 << 12)
-# define R500_DX_Q_SWIZ_R (0 << 14)
-# define R500_DX_Q_SWIZ_G (1 << 14)
-# define R500_DX_Q_SWIZ_B (2 << 14)
-# define R500_DX_Q_SWIZ_A (3 << 14)
-# define R500_DY_ADDR(x) (x << 16)
-# define R500_DY_ADDR_REL (1 << 17)
-# define R500_DY_S_SWIZ_R (0 << 24)
-# define R500_DY_S_SWIZ_G (1 << 24)
-# define R500_DY_S_SWIZ_B (2 << 24)
-# define R500_DY_S_SWIZ_A (3 << 24)
-# define R500_DY_T_SWIZ_R (0 << 26)
-# define R500_DY_T_SWIZ_G (1 << 26)
-# define R500_DY_T_SWIZ_B (2 << 26)
-# define R500_DY_T_SWIZ_A (3 << 26)
-# define R500_DY_R_SWIZ_R (0 << 28)
-# define R500_DY_R_SWIZ_G (1 << 28)
-# define R500_DY_R_SWIZ_B (2 << 28)
-# define R500_DY_R_SWIZ_A (3 << 28)
-# define R500_DY_Q_SWIZ_R (0 << 30)
-# define R500_DY_Q_SWIZ_G (1 << 30)
-# define R500_DY_Q_SWIZ_B (2 << 30)
-# define R500_DY_Q_SWIZ_A (3 << 30)
-
-/*
- * The R500 unified shader (US) registers come in banks of 512 each, one
- * for each instruction slot in the shader. You can't touch them directly.
- * R500_US_VECTOR_INDEX() sets the base instruction to modify; successive
- * writes to R500_GA_US_VECTOR_DATA autoincrement the index after the
- * instruction is fully specified.
- */
-#define R500_US_ALU_ALPHA_INST_0 0xa800
-# define R500_ALPHA_OP_MAD 0
-# define R500_ALPHA_OP_DP 1
-# define R500_ALPHA_OP_MIN 2
-# define R500_ALPHA_OP_MAX 3
-/* #define R500_ALPHA_OP_RESERVED 4 */
-# define R500_ALPHA_OP_CND 5
-# define R500_ALPHA_OP_CMP 6
-# define R500_ALPHA_OP_FRC 7
-# define R500_ALPHA_OP_EX2 8
-# define R500_ALPHA_OP_LN2 9
-# define R500_ALPHA_OP_RCP 10
-# define R500_ALPHA_OP_RSQ 11
-# define R500_ALPHA_OP_SIN 12
-# define R500_ALPHA_OP_COS 13
-# define R500_ALPHA_OP_MDH 14
-# define R500_ALPHA_OP_MDV 15
-# define R500_ALPHA_ADDRD(x) (x << 4)
-# define R500_ALPHA_ADDRD_REL (1 << 11)
-# define R500_ALPHA_SEL_A_SRC0 (0 << 12)
-# define R500_ALPHA_SEL_A_SRC1 (1 << 12)
-# define R500_ALPHA_SEL_A_SRC2 (2 << 12)
-# define R500_ALPHA_SEL_A_SRCP (3 << 12)
-# define R500_ALPHA_SWIZ_A_R (0 << 14)
-# define R500_ALPHA_SWIZ_A_G (1 << 14)
-# define R500_ALPHA_SWIZ_A_B (2 << 14)
-# define R500_ALPHA_SWIZ_A_A (3 << 14)
-# define R500_ALPHA_SWIZ_A_0 (4 << 14)
-# define R500_ALPHA_SWIZ_A_HALF (5 << 14)
-# define R500_ALPHA_SWIZ_A_1 (6 << 14)
-/* #define R500_ALPHA_SWIZ_A_UNUSED (7 << 14) */
-# define R500_ALPHA_MOD_A_NOP (0 << 17)
-# define R500_ALPHA_MOD_A_NEG (1 << 17)
-# define R500_ALPHA_MOD_A_ABS (2 << 17)
-# define R500_ALPHA_MOD_A_NAB (3 << 17)
-# define R500_ALPHA_SEL_B_SRC0 (0 << 19)
-# define R500_ALPHA_SEL_B_SRC1 (1 << 19)
-# define R500_ALPHA_SEL_B_SRC2 (2 << 19)
-# define R500_ALPHA_SEL_B_SRCP (3 << 19)
-# define R500_ALPHA_SWIZ_B_R (0 << 21)
-# define R500_ALPHA_SWIZ_B_G (1 << 21)
-# define R500_ALPHA_SWIZ_B_B (2 << 21)
-# define R500_ALPHA_SWIZ_B_A (3 << 21)
-# define R500_ALPHA_SWIZ_B_0 (4 << 21)
-# define R500_ALPHA_SWIZ_B_HALF (5 << 21)
-# define R500_ALPHA_SWIZ_B_1 (6 << 21)
-/* #define R500_ALPHA_SWIZ_B_UNUSED (7 << 21) */
-# define R500_ALPHA_MOD_B_NOP (0 << 24)
-# define R500_ALPHA_MOD_B_NEG (1 << 24)
-# define R500_ALPHA_MOD_B_ABS (2 << 24)
-# define R500_ALPHA_MOD_B_NAB (3 << 24)
-# define R500_ALPHA_OMOD_IDENTITY (0 << 26)
-# define R500_ALPHA_OMOD_MUL_2 (1 << 26)
-# define R500_ALPHA_OMOD_MUL_4 (2 << 26)
-# define R500_ALPHA_OMOD_MUL_8 (3 << 26)
-# define R500_ALPHA_OMOD_DIV_2 (4 << 26)
-# define R500_ALPHA_OMOD_DIV_4 (5 << 26)
-# define R500_ALPHA_OMOD_DIV_8 (6 << 26)
-# define R500_ALPHA_OMOD_DISABLE (7 << 26)
-# define R500_ALPHA_TARGET(x) (x << 29)
-# define R500_ALPHA_W_OMASK (1 << 31)
-
-#define R500_US_ALU_RGBA_INST_0 0xb000
-# define R500_ALU_RGBA_OP_MAD (0 << 0)
-# define R500_ALU_RGBA_OP_DP3 (1 << 0)
-# define R500_ALU_RGBA_OP_DP4 (2 << 0)
-# define R500_ALU_RGBA_OP_D2A (3 << 0)
-# define R500_ALU_RGBA_OP_MIN (4 << 0)
-# define R500_ALU_RGBA_OP_MAX (5 << 0)
-/* #define R500_ALU_RGBA_OP_RESERVED (6 << 0) */
-# define R500_ALU_RGBA_OP_CND (7 << 0)
-# define R500_ALU_RGBA_OP_CMP (8 << 0)
-# define R500_ALU_RGBA_OP_FRC (9 << 0)
-# define R500_ALU_RGBA_OP_SOP (10 << 0)
-# define R500_ALU_RGBA_OP_MDH (11 << 0)
-# define R500_ALU_RGBA_OP_MDV (12 << 0)
-# define R500_ALU_RGBA_ADDRD(x) (x << 4)
-# define R500_ALU_RGBA_ADDRD_REL (1 << 11)
-# define R500_ALU_RGBA_SEL_C_SRC0 (0 << 12)
-# define R500_ALU_RGBA_SEL_C_SRC1 (1 << 12)
-# define R500_ALU_RGBA_SEL_C_SRC2 (2 << 12)
-# define R500_ALU_RGBA_SEL_C_SRCP (3 << 12)
-# define R500_ALU_RGBA_R_SWIZ_R (0 << 14)
-# define R500_ALU_RGBA_R_SWIZ_G (1 << 14)
-# define R500_ALU_RGBA_R_SWIZ_B (2 << 14)
-# define R500_ALU_RGBA_R_SWIZ_A (3 << 14)
-# define R500_ALU_RGBA_R_SWIZ_0 (4 << 14)
-# define R500_ALU_RGBA_R_SWIZ_HALF (5 << 14)
-# define R500_ALU_RGBA_R_SWIZ_1 (6 << 14)
-/* #define R500_ALU_RGBA_R_SWIZ_UNUSED (7 << 14) */
-# define R500_ALU_RGBA_G_SWIZ_R (0 << 17)
-# define R500_ALU_RGBA_G_SWIZ_G (1 << 17)
-# define R500_ALU_RGBA_G_SWIZ_B (2 << 17)
-# define R500_ALU_RGBA_G_SWIZ_A (3 << 17)
-# define R500_ALU_RGBA_G_SWIZ_0 (4 << 17)
-# define R500_ALU_RGBA_G_SWIZ_HALF (5 << 17)
-# define R500_ALU_RGBA_G_SWIZ_1 (6 << 17)
-/* #define R500_ALU_RGBA_G_SWIZ_UNUSED (7 << 17) */
-# define R500_ALU_RGBA_B_SWIZ_R (0 << 20)
-# define R500_ALU_RGBA_B_SWIZ_G (1 << 20)
-# define R500_ALU_RGBA_B_SWIZ_B (2 << 20)
-# define R500_ALU_RGBA_B_SWIZ_A (3 << 20)
-# define R500_ALU_RGBA_B_SWIZ_0 (4 << 20)
-# define R500_ALU_RGBA_B_SWIZ_HALF (5 << 20)
-# define R500_ALU_RGBA_B_SWIZ_1 (6 << 20)
-/* #define R500_ALU_RGBA_B_SWIZ_UNUSED (7 << 20) */
-# define R500_ALU_RGBA_MOD_C_NOP (0 << 23)
-# define R500_ALU_RGBA_MOD_C_NEG (1 << 23)
-# define R500_ALU_RGBA_MOD_C_ABS (2 << 23)
-# define R500_ALU_RGBA_MOD_C_NAB (3 << 23)
-# define R500_ALU_RGBA_ALPHA_SEL_C_SRC0 (0 << 25)
-# define R500_ALU_RGBA_ALPHA_SEL_C_SRC1 (1 << 25)
-# define R500_ALU_RGBA_ALPHA_SEL_C_SRC2 (2 << 25)
-# define R500_ALU_RGBA_ALPHA_SEL_C_SRCP (3 << 25)
-# define R500_ALU_RGBA_A_SWIZ_R (0 << 27)
-# define R500_ALU_RGBA_A_SWIZ_G (1 << 27)
-# define R500_ALU_RGBA_A_SWIZ_B (2 << 27)
-# define R500_ALU_RGBA_A_SWIZ_A (3 << 27)
-# define R500_ALU_RGBA_A_SWIZ_0 (4 << 27)
-# define R500_ALU_RGBA_A_SWIZ_HALF (5 << 27)
-# define R500_ALU_RGBA_A_SWIZ_1 (6 << 27)
-/* #define R500_ALU_RGBA_A_SWIZ_UNUSED (7 << 27) */
-# define R500_ALU_RGBA_ALPHA_MOD_C_NOP (0 << 30)
-# define R500_ALU_RGBA_ALPHA_MOD_C_NEG (1 << 30)
-# define R500_ALU_RGBA_ALPHA_MOD_C_ABS (2 << 30)
-# define R500_ALU_RGBA_ALPHA_MOD_C_NAB (3 << 30)
-
+#define R500_US_ALU_RGB_ADDR_0 0x9000
+# define R500_RGB_ADDR0(x) ((x) << 0)
+# define R500_RGB_ADDR0_CONST (1 << 8)
+# define R500_RGB_ADDR0_REL (1 << 9)
+# define R500_RGB_ADDR1(x) ((x) << 10)
+# define R500_RGB_ADDR1_CONST (1 << 18)
+# define R500_RGB_ADDR1_REL (1 << 19)
+# define R500_RGB_ADDR2(x) ((x) << 20)
+# define R500_RGB_ADDR2_CONST (1 << 28)
+# define R500_RGB_ADDR2_REL (1 << 29)
+# define R500_RGB_SRCP_OP_1_MINUS_2RGB0 (0 << 30)
+# define R500_RGB_SRCP_OP_RGB1_MINUS_RGB0 (1 << 30)
+# define R500_RGB_SRCP_OP_RGB1_PLUS_RGB0 (2 << 30)
+# define R500_RGB_SRCP_OP_1_MINUS_RGB0 (3 << 30)
#define R500_US_CMN_INST_0 0xb800
# define R500_INST_TYPE_ALU (0 << 0)
# define R500_INST_TYPE_OUT (1 << 0)
@@ -1348,5 +1320,263 @@
# define R500_INST_STAT_WE_G (1 << 29)
# define R500_INST_STAT_WE_B (1 << 30)
# define R500_INST_STAT_WE_A (1 << 31)
+/* note that these are 8 bit lengths, despite the offsets, at least for R500 */
+#define R500_US_CODE_ADDR 0x4630
+# define R500_US_CODE_START_ADDR(x) ((x) << 0)
+# define R500_US_CODE_END_ADDR(x) ((x) << 16)
+#define R500_US_CODE_OFFSET 0x4638
+# define R500_US_CODE_OFFSET_ADDR(x) ((x) << 0)
+#define R500_US_CODE_RANGE 0x4634
+# define R500_US_CODE_RANGE_ADDR(x) ((x) << 0)
+# define R500_US_CODE_RANGE_SIZE(x) ((x) << 16)
+#define R500_US_CONFIG 0x4600
+# define R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO (1 << 1)
+#define R500_US_FC_ADDR_0 0xa000
+# define R500_FC_BOOL_ADDR(x) ((x) << 0)
+# define R500_FC_INT_ADDR(x) ((x) << 8)
+# define R500_FC_JUMP_ADDR(x) ((x) << 16)
+# define R500_FC_JUMP_GLOBAL (1 << 31)
+#define R500_US_FC_BOOL_CONST 0x4620
+# define R500_FC_KBOOL(x) (x)
+#define R500_US_FC_CTRL 0x4624
+# define R500_FC_TEST_EN (1 << 30)
+# define R500_FC_FULL_FC_EN (1 << 31)
+#define R500_US_FC_INST_0 0x9800
+# define R500_FC_OP_JUMP (0 << 0)
+# define R500_FC_OP_LOOP (1 << 0)
+# define R500_FC_OP_ENDLOOP (2 << 0)
+# define R500_FC_OP_REP (3 << 0)
+# define R500_FC_OP_ENDREP (4 << 0)
+# define R500_FC_OP_BREAKLOOP (5 << 0)
+# define R500_FC_OP_BREAKREP (6 << 0)
+# define R500_FC_OP_CONTINUE (7 << 0)
+# define R500_FC_B_ELSE (1 << 4)
+# define R500_FC_JUMP_ANY (1 << 5)
+# define R500_FC_A_OP_NONE (0 << 6)
+# define R500_FC_A_OP_POP (1 << 6)
+# define R500_FC_A_OP_PUSH (2 << 6)
+# define R500_FC_JUMP_FUNC(x) ((x) << 8)
+# define R500_FC_B_POP_CNT(x) ((x) << 16)
+# define R500_FC_B_OP0_NONE (0 << 24)
+# define R500_FC_B_OP0_DECR (1 << 24)
+# define R500_FC_B_OP0_INCR (2 << 24)
+# define R500_FC_B_OP1_DECR (0 << 26)
+# define R500_FC_B_OP1_NONE (1 << 26)
+# define R500_FC_B_OP1_INCR (2 << 26)
+# define R500_FC_IGNORE_UNCOVERED (1 << 28)
+#define R500_US_FC_INT_CONST_0 0x4c00
+# define R500_FC_INT_CONST_KR(x) ((x) << 0)
+# define R500_FC_INT_CONST_KG(x) ((x) << 8)
+# define R500_FC_INT_CONST_KB(x) ((x) << 16)
+/* _0 through _15 */
+#define R500_US_FORMAT0_0 0x4640
+# define R500_FORMAT_TXWIDTH(x) ((x) << 0)
+# define R500_FORMAT_TXHEIGHT(x) ((x) << 11)
+# define R500_FORMAT_TXDEPTH(x) ((x) << 22)
+/* _0 through _3 */
+#define R500_US_OUT_FMT_0 0x46a4
+# define R500_OUT_FMT_C4_8 (0 << 0)
+# define R500_OUT_FMT_C4_10 (1 << 0)
+# define R500_OUT_FMT_C4_10_GAMMA (2 << 0)
+# define R500_OUT_FMT_C_16 (3 << 0)
+# define R500_OUT_FMT_C2_16 (4 << 0)
+# define R500_OUT_FMT_C4_16 (5 << 0)
+# define R500_OUT_FMT_C_16_MPEG (6 << 0)
+# define R500_OUT_FMT_C2_16_MPEG (7 << 0)
+# define R500_OUT_FMT_C2_4 (8 << 0)
+# define R500_OUT_FMT_C_3_3_2 (9 << 0)
+# define R500_OUT_FMT_C_6_5_6 (10 << 0)
+# define R500_OUT_FMT_C_11_11_10 (11 << 0)
+# define R500_OUT_FMT_C_10_11_11 (12 << 0)
+# define R500_OUT_FMT_C_2_10_10_10 (13 << 0)
+/* #define R500_OUT_FMT_RESERVED (14 << 0) */
+# define R500_OUT_FMT_UNUSED (15 << 0)
+# define R500_OUT_FMT_C_16_FP (16 << 0)
+# define R500_OUT_FMT_C2_16_FP (17 << 0)
+# define R500_OUT_FMT_C4_16_FP (18 << 0)
+# define R500_OUT_FMT_C_32_FP (19 << 0)
+# define R500_OUT_FMT_C2_32_FP (20 << 0)
+# define R500_OUT_FMT_C4_32_FP (21 << 0)
+# define R500_C0_SEL_A (0 << 8)
+# define R500_C0_SEL_R (1 << 8)
+# define R500_C0_SEL_G (2 << 8)
+# define R500_C0_SEL_B (3 << 8)
+# define R500_C1_SEL_A (0 << 10)
+# define R500_C1_SEL_R (1 << 10)
+# define R500_C1_SEL_G (2 << 10)
+# define R500_C1_SEL_B (3 << 10)
+# define R500_C2_SEL_A (0 << 12)
+# define R500_C2_SEL_R (1 << 12)
+# define R500_C2_SEL_G (2 << 12)
+# define R500_C2_SEL_B (3 << 12)
+# define R500_C3_SEL_A (0 << 14)
+# define R500_C3_SEL_R (1 << 14)
+# define R500_C3_SEL_G (2 << 14)
+# define R500_C3_SEL_B (3 << 14)
+# define R500_OUT_SIGN(x) ((x) << 16)
+# define R500_ROUND_ADJ (1 << 20)
+#define R500_US_PIXSIZE 0x4604
+# define R500_PIX_SIZE(x) (x)
+#define R500_US_TEX_ADDR_0 0x9800
+# define R500_TEX_SRC_ADDR(x) ((x) << 0)
+# define R500_TEX_SRC_ADDR_REL (1 << 7)
+# define R500_TEX_SRC_S_SWIZ_R (0 << 8)
+# define R500_TEX_SRC_S_SWIZ_G (1 << 8)
+# define R500_TEX_SRC_S_SWIZ_B (2 << 8)
+# define R500_TEX_SRC_S_SWIZ_A (3 << 8)
+# define R500_TEX_SRC_T_SWIZ_R (0 << 10)
+# define R500_TEX_SRC_T_SWIZ_G (1 << 10)
+# define R500_TEX_SRC_T_SWIZ_B (2 << 10)
+# define R500_TEX_SRC_T_SWIZ_A (3 << 10)
+# define R500_TEX_SRC_R_SWIZ_R (0 << 12)
+# define R500_TEX_SRC_R_SWIZ_G (1 << 12)
+# define R500_TEX_SRC_R_SWIZ_B (2 << 12)
+# define R500_TEX_SRC_R_SWIZ_A (3 << 12)
+# define R500_TEX_SRC_Q_SWIZ_R (0 << 14)
+# define R500_TEX_SRC_Q_SWIZ_G (1 << 14)
+# define R500_TEX_SRC_Q_SWIZ_B (2 << 14)
+# define R500_TEX_SRC_Q_SWIZ_A (3 << 14)
+# define R500_TEX_DST_ADDR(x) ((x) << 16)
+# define R500_TEX_DST_ADDR_REL (1 << 23)
+# define R500_TEX_DST_R_SWIZ_R (0 << 24)
+# define R500_TEX_DST_R_SWIZ_G (1 << 24)
+# define R500_TEX_DST_R_SWIZ_B (2 << 24)
+# define R500_TEX_DST_R_SWIZ_A (3 << 24)
+# define R500_TEX_DST_G_SWIZ_R (0 << 26)
+# define R500_TEX_DST_G_SWIZ_G (1 << 26)
+# define R500_TEX_DST_G_SWIZ_B (2 << 26)
+# define R500_TEX_DST_G_SWIZ_A (3 << 26)
+# define R500_TEX_DST_B_SWIZ_R (0 << 28)
+# define R500_TEX_DST_B_SWIZ_G (1 << 28)
+# define R500_TEX_DST_B_SWIZ_B (2 << 28)
+# define R500_TEX_DST_B_SWIZ_A (3 << 28)
+# define R500_TEX_DST_A_SWIZ_R (0 << 30)
+# define R500_TEX_DST_A_SWIZ_G (1 << 30)
+# define R500_TEX_DST_A_SWIZ_B (2 << 30)
+# define R500_TEX_DST_A_SWIZ_A (3 << 30)
+#define R500_US_TEX_ADDR_DXDY_0 0xa000
+# define R500_DX_ADDR(x) ((x) << 0)
+# define R500_DX_ADDR_REL (1 << 7)
+# define R500_DX_S_SWIZ_R (0 << 8)
+# define R500_DX_S_SWIZ_G (1 << 8)
+# define R500_DX_S_SWIZ_B (2 << 8)
+# define R500_DX_S_SWIZ_A (3 << 8)
+# define R500_DX_T_SWIZ_R (0 << 10)
+# define R500_DX_T_SWIZ_G (1 << 10)
+# define R500_DX_T_SWIZ_B (2 << 10)
+# define R500_DX_T_SWIZ_A (3 << 10)
+# define R500_DX_R_SWIZ_R (0 << 12)
+# define R500_DX_R_SWIZ_G (1 << 12)
+# define R500_DX_R_SWIZ_B (2 << 12)
+# define R500_DX_R_SWIZ_A (3 << 12)
+# define R500_DX_Q_SWIZ_R (0 << 14)
+# define R500_DX_Q_SWIZ_G (1 << 14)
+# define R500_DX_Q_SWIZ_B (2 << 14)
+# define R500_DX_Q_SWIZ_A (3 << 14)
+# define R500_DY_ADDR(x) ((x) << 16)
+# define R500_DY_ADDR_REL (1 << 17)
+# define R500_DY_S_SWIZ_R (0 << 24)
+# define R500_DY_S_SWIZ_G (1 << 24)
+# define R500_DY_S_SWIZ_B (2 << 24)
+# define R500_DY_S_SWIZ_A (3 << 24)
+# define R500_DY_T_SWIZ_R (0 << 26)
+# define R500_DY_T_SWIZ_G (1 << 26)
+# define R500_DY_T_SWIZ_B (2 << 26)
+# define R500_DY_T_SWIZ_A (3 << 26)
+# define R500_DY_R_SWIZ_R (0 << 28)
+# define R500_DY_R_SWIZ_G (1 << 28)
+# define R500_DY_R_SWIZ_B (2 << 28)
+# define R500_DY_R_SWIZ_A (3 << 28)
+# define R500_DY_Q_SWIZ_R (0 << 30)
+# define R500_DY_Q_SWIZ_G (1 << 30)
+# define R500_DY_Q_SWIZ_B (2 << 30)
+# define R500_DY_Q_SWIZ_A (3 << 30)
+#define R500_US_TEX_INST_0 0x9000
+# define R500_TEX_ID(x) ((x) << 16)
+# define R500_TEX_INST_NOP (0 << 22)
+# define R500_TEX_INST_LD (1 << 22)
+# define R500_TEX_INST_TEXKILL (2 << 22)
+# define R500_TEX_INST_PROJ (3 << 22)
+# define R500_TEX_INST_LODBIAS (4 << 22)
+# define R500_TEX_INST_LOD (5 << 22)
+# define R500_TEX_INST_DXDY (6 << 22)
+# define R500_TEX_SEM_ACQUIRE (1 << 25)
+# define R500_TEX_IGNORE_UNCOVERED (1 << 26)
+# define R500_TEX_UNSCALED (1 << 27)
+#define R500_US_W_FMT 0x46b4
+# define R500_W_FMT_W0 (0 << 0)
+# define R500_W_FMT_W24 (1 << 0)
+# define R500_W_FMT_W24FP (2 << 0)
+# define R500_W_SRC_US (0 << 2)
+# define R500_W_SRC_RAS (1 << 2)
+
+#define R500_GA_US_VECTOR_INDEX 0x4250
+#define R500_GA_US_VECTOR_DATA 0x4254
+
+#define R500_RS_INST_0 0x4320
+#define R500_RS_INST_1 0x4324
+# define R500_RS_INST_TEX_ID_SHIFT 0
+# define R500_RS_INST_TEX_CN_WRITE (1 << 4)
+# define R500_RS_INST_TEX_ADDR_SHIFT 5
+# define R500_RS_INST_COL_ID_SHIFT 12
+# define R500_RS_INST_COL_CN_NO_WRITE (0 << 16)
+# define R500_RS_INST_COL_CN_WRITE (1 << 16)
+# define R500_RS_INST_COL_CN_WRITE_FBUFFER (2 << 16)
+# define R500_RS_INST_COL_CN_WRITE_BACKFACE (3 << 16)
+# define R500_RS_INST_COL_COL_ADDR_SHIFT 18
+# define R500_RS_INST_TEX_ADJ (1 << 25)
+# define R500_RS_INST_W_CN (1 << 26)
+
+#define R500_US_FC_CTRL 0x4624
+#define R500_US_CODE_ADDR 0x4630
+#define R500_US_CODE_RANGE 0x4634
+#define R500_US_CODE_OFFSET 0x4638
+
+#define R500_RS_IP_0 0x4074
+#define R500_RS_IP_1 0x4078
+# define R500_RS_IP_PTR_K0 62
+# define R500_RS_IP_PTR_K1 63
+# define R500_RS_IP_TEX_PTR_S_SHIFT 0
+# define R500_RS_IP_TEX_PTR_T_SHIFT 6
+# define R500_RS_IP_TEX_PTR_R_SHIFT 12
+# define R500_RS_IP_TEX_PTR_Q_SHIFT 18
+# define R500_RS_IP_COL_PTR_SHIFT 24
+# define R500_RS_IP_COL_FMT_SHIFT 27
+# define R500_RS_IP_COL_FMT_RGBA (0 << 27)
+# define R500_RS_IP_OFFSET_EN (1 << 31)
+
+#define R500_DYN_SCLK_PWMEM_PIPE 0x000d /* PLL */
+
+
+/* Overlay control registers */
+#define AVIVO_D1OVL_COLOR_MATRIX_TRANS_CNTL 0x6140
+#define AVIVO_D1OVL_ENABLE 0x6180
+#define AVIVO_D1OVL_CONTROL1 0x6184
+#define AVIVO_D1OVL_CONTROL2 0x6188
+#define AVIVO_D1OVL_SWAPCNTL 0x618c
+#define AVIVO_D1OVL_SURFACE_ADDRESS 0x6190
+#define AVIVO_D1OVL_PITCH 0x6198
+#define AVIVO_D1OVL_SURFACE_OFFSET_X 0x619c
+#define AVIVO_D1OVL_SURFACE_OFFSET_Y 0x61a0
+#define AVIVO_D1OVL_START 0x61a4
+#define AVIVO_D1OVL_END 0x61a8
+#define AVIVO_D1OVL_UPDATE 0x61ac
+#define AVIVO_D1OVL_ADDRESS_INUSE 0x61b0
+#define AVIVO_D1OVL_DFQ_STATUS 0x61b8
+#define AVIVO_D1OVL_MATRIX_TRANSFORM_EN 0x6200
+
+#define AVIVO_D1OVL_RT_SKEWCOMMAND 0x6500
+#define AVIVO_D1OVL_RT_BAND_POSITION 0x6508
+#define AVIVO_D1OVL_RT_PROCEED_COND 0x650c
+#define AVIVO_D1OVL_RT_STAT 0x6510
+
+#define AVIVO_D1OVL_COLOR_MATRIX_TRANSFORMATION_CNTL 0x6140
+#define AVIVO_D1OVL_MATRIX_TRANSFORM_EN 0x6200
+#define AVIVO_D1OVL_PWL_TRANSFORM_EN 0x6280
+#define AVIVO_D1OVL_KEY_CONTROL 0x6300
+#define AVIVO_D1OVL_ALPHA_CONTROL 0x630c
+
+#define AVIVO_D1CRTC_SNAPSHOT_STATUS 0x60c8
+

#endif /* HAVE_R5XX_3DREGS_H */
diff -u -r xf86-video-radeonhd/src/r600_textured_videofuncs.c
xf86-video-radeonhd-work/src/r600_textured_videofuncs.c
--- xf86-video-radeonhd/src/r600_textured_videofuncs.c 2009-04-24
19:32:07.000000000 +0200
+++ xf86-video-radeonhd-work/src/r600_textured_videofuncs.c 2009-04-24
19:34:03.000000000 +0200
@@ -146,7 +146,7 @@
CLEAR (ps_conf);

accel_state->dst_pitch = exaGetPixmapPitch(pPixmap) /
(pPixmap->drawable.bitsPerPixel / 8);
- accel_state->src_pitch[0] = pPriv->BufferPitch;
+ accel_state->src_pitch[0] = pPriv->SrcBufferPitch;

// bad pitch
if (accel_state->src_pitch[0] & 7)
@@ -226,7 +226,7 @@
switch(pPriv->id) {
case FOURCC_YV12:
case FOURCC_I420:
- accel_state->src_mc_addr[0] = pPriv->BufferOffset +
rhdPtr->FbIntAddress + rhdPtr->FbScanoutStart;
+ accel_state->src_mc_addr[0] = pPriv->SrcBufferOffset +
rhdPtr->FbIntAddress + rhdPtr->FbScanoutStart;
accel_state->src_size[0] = accel_state->src_pitch[0] * pPriv->h;

/* flush texture cache */
@@ -336,7 +336,7 @@
case FOURCC_UYVY:
case FOURCC_YUY2:
default:
- accel_state->src_mc_addr[0] = pPriv->BufferOffset +
rhdPtr->FbIntAddress + rhdPtr->FbScanoutStart;
+ accel_state->src_mc_addr[0] = pPriv->SrcBufferOffset +
rhdPtr->FbIntAddress + rhdPtr->FbScanoutStart;
accel_state->src_size[0] = accel_state->src_pitch[0] * pPriv->h;

/* flush texture cache */
diff -u -r xf86-video-radeonhd/src/radeon_textured_videofuncs.c
xf86-video-radeonhd-work/src/radeon_textured_videofuncs.c
--- xf86-video-radeonhd/src/radeon_textured_videofuncs.c 2009-04-24
19:32:07.000000000 +0200
+++ xf86-video-radeonhd-work/src/radeon_textured_videofuncs.c 2009-05-03
20:15:39.000000000 +0200
@@ -24,64 +24,10 @@
* Based on radeon_exa_render.c and kdrive ati_video.c by Eric Anholt, et al.
*
*/
+
#if defined(IS_RADEON_DRIVER) || defined(IS_QUICK_AND_DIRTY)
-#if defined(ACCEL_MMIO) && defined(ACCEL_CP)
-#error Cannot define both MMIO and CP acceleration!
-#endif
-
-#if !defined(UNIXCPP) || defined(ANSICPP)
-#define FUNC_NAME_CAT(prefix,suffix) prefix##suffix
-#else
-#define FUNC_NAME_CAT(prefix,suffix) prefix/**/suffix
-#endif
-
-#ifdef ACCEL_MMIO
-#define FUNC_NAME(prefix) FUNC_NAME_CAT(prefix,MMIO)
-#else
-#ifdef ACCEL_CP
-#define FUNC_NAME(prefix) FUNC_NAME_CAT(prefix,CP)
-#else
-#error No accel type defined!
-#endif
-#endif
-
-#define VTX_DWORD_COUNT 4
-
-#ifdef ACCEL_CP
-
-#define VTX_OUT(_dstX, _dstY, _srcX, _srcY) \
-do { \
- OUT_VIDEO_RING_F(_dstX); \
- OUT_VIDEO_RING_F(_dstY); \
- OUT_VIDEO_RING_F(_srcX); \
- OUT_VIDEO_RING_F(_srcY); \
-} while (0)
-
-#else /* ACCEL_CP */
-
-#define VTX_OUT(_dstX, _dstY, _srcX, _srcY) \
-do { \
- OUT_VIDEO_REG_F(RADEON_SE_PORT_DATA0, _dstX); \
- OUT_VIDEO_REG_F(RADEON_SE_PORT_DATA0, _dstY); \
- OUT_VIDEO_REG_F(RADEON_SE_PORT_DATA0, _srcX); \
- OUT_VIDEO_REG_F(RADEON_SE_PORT_DATA0, _srcY); \
-} while (0)

-# define VAR_PSCRN_PREAMBLE(pScrn) RHDPtr info = RHDPTR(pScrn)
-# define THREEDSTATE_PREAMBLE() struct rhdAccel *accel_state =
info->accel_state
-
-# define BUFFER_PITCH pPriv->src_pitch
-# define FB_BUFFER_OFFSET pPriv->src_offset
-# define FB_PIXMAP_OFFSET(x) (((char *)(x) - (char *)rhdPtr->FbBase) +
rhdPtr->FbIntAddress)
-
-# ifdef USE_EXA
-# define EXA_ENABLED (info->AccelMethod == RHD_ACCEL_EXA)
-# define EXA_FB_OFFSET (info->FbIntAddress + info->FbScanoutStart)
-# endif
-
-# define HAS_TCL IS_R500_3D
-
-#endif /* !ACCEL_CP */
+#error "Bad define in textured_videofuncs"

#else /* IS_RADEON_DRIVER */

@@ -101,6 +47,8 @@
# include "r5xx_3dregs.h"
# include "rhd_video.h"

+#include <unistd.h>
+
# ifdef USE_EXA
# include "exa.h"
# endif
@@ -114,26 +62,37 @@
/*
* Map the macros.
*/
-# define VIDEO_PREAMBLE() struct RhdCS *CS = rhdPtr->CS
-
-# define BEGIN_VIDEO(Count) RHDCSGrab(CS, 2 * (Count))
-# define OUT_VIDEO_REG(Reg, Value) RHDCSRegWrite(CS, (Reg), (Value))
-# define FINISH_VIDEO()
+#define VIDEO_PREAMBLE() struct RhdCS *CS = rhdPtr->CS

-# define BEGIN_RING(Count) RHDCSGrab(CS, (Count))
-# define OUT_RING(Value) RHDCSWrite(CS, (Value))
-# define ADVANCE_RING() RHDCSAdvance(CS)
+#define BEGIN_VIDEO(Count) RHDCSGrab(CS, 2 * (Count))
+#define OUT_VIDEO_REG(Reg, Value) RHDCSRegWrite(CS, (Reg), (Value))
+#define OUT_VIDEO_REG_F(Reg, Value) RHDCSRegWrite(CS, (Reg), (F_TO_DW(Value)))
+#define FINISH_VIDEO()
+
+#define BEGIN_RING(Count) RHDCSGrab(CS, (Count))
+#define OUT_RING(Value) RHDCSWrite(CS, (Value))
+#define ADVANCE_RING() RHDCSAdvance(CS)

-# define OUT_VIDEO_RING_F(x) OUT_RING(F_TO_DW(x))
+#define OUT_VIDEO_RING_F(x) OUT_RING(F_TO_DW(x))

#define VTX_DWORD_COUNT 4

-#define VTX_OUT(_dstX, _dstY, _srcX, _srcY) \
-do { \
+#define VTX_OUT_FILTER(_dstX, _dstY, _srcX, _srcY, _maskX, _maskY) \
+do { \
OUT_VIDEO_RING_F(_dstX); \
OUT_VIDEO_RING_F(_dstY); \
OUT_VIDEO_RING_F(_srcX); \
OUT_VIDEO_RING_F(_srcY); \
+ OUT_VIDEO_RING_F(_maskX); \
+ OUT_VIDEO_RING_F(_maskY); \
+} while (0)
+
+#define VTX_OUT(_dstX, _dstY, _srcX, _srcY) \
+do { \
+ OUT_VIDEO_RING_F(_dstX); \
+ OUT_VIDEO_RING_F(_dstY); \
+ OUT_VIDEO_RING_F(_srcX); \
+ OUT_VIDEO_RING_F(_srcY); \
} while (0)

# define IS_R300_3D \
@@ -146,7 +105,6 @@
# define HAS_TCL IS_R500_3D

# define ONLY_ONCE 1 /* we're always only once in the radeonhd driver */
-# define ACCEL_CP 1

# if !defined(UNIXCPP) || defined(ANSICPP)
# define FUNC_NAME_CAT(prefix,suffix) prefix##suffix
@@ -170,16 +128,52 @@
return tmp.l;
}

-/*
- *
- */
-static __inline__ Bool
-RADEONTilingEnabled(ScrnInfoPtr pScrn, PixmapPtr pPix)
+/* Borrowed from Mesa */
+static __inline__ uint32_t F_TO_24(float val)
{
- return FALSE; /* for now */
+ float mantissa;
+ int exponent;
+ uint32_t float24 = 0;
+
+ if (val == 0.0)
+ return 0;
+
+ mantissa = frexpf(val, &exponent);
+
+ /* Handle -ve */
+ if (mantissa < 0) {
+ float24 |= (1 << 23);
+ mantissa = mantissa * -1.0;
+ }
+ /* Handle exponent, bias of 63 */
+ exponent += 62;
+ float24 |= (exponent << 16);
+ /* Kill 7 LSB of mantissa */
+ float24 |= (F_TO_DW(mantissa) & 0x7FFFFF) >> 7;
+
+ return float24;
+}
+
+void
+radeon_box_intersect(BoxPtr dest, BoxPtr a, BoxPtr b)
+{
+ dest->x1 = a->x1 > b->x1 ? a->x1 : b->x1;
+ dest->x2 = a->x2 < b->x2 ? a->x2 : b->x2;
+ dest->y1 = a->y1 > b->y1 ? a->y1 : b->y1;
+ dest->y2 = a->y2 < b->y2 ? a->y2 : b->y2;
+
+ if (dest->x1 >= dest->x2 || dest->y1 >= dest->y2)
+ dest->x1 = dest->x2 = dest->y1 = dest->y2 = 0;
}

-# define xFixedToFloat(f) (((float) (f)) / 65536)
+int
+radeon_box_area(BoxPtr box)
+{
+ return (int) (box->x2 - box->x1) * (int) (box->y2 - box->y1);
+}
+/*
+ *
+ */

# define VAR_PSCRN_PREAMBLE(pScrn) RHDPtr rhdPtr = RHDPTR(pScrn)
# define THREEDSTATE_PREAMBLE() struct R5xx3D *accel_state = (struct R5xx3D
*)rhdPtr->ThreeDPrivate
@@ -199,61 +193,44 @@

#endif /* IS_RADEON_DRIVER */

-#ifdef IS_RADEON_DRIVER
-static
-#endif
void
FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr
pPriv)
{
VAR_PSCRN_PREAMBLE(pScrn);
THREEDSTATE_PREAMBLE();
+
PixmapPtr pPixmap = pPriv->pPixmap;
-#if 0
- uint32_t txformat;
-#endif
- uint32_t txfilter, txformat0, txformat1, txpitch;
- uint32_t dst_offset, dst_pitch, dst_format;
- uint32_t txenable, colorpitch;
- uint32_t blendcntl;
- int dstxoff, dstyoff, pixel_shift;
+ CARD32 txfilter, txformat0, txformat1, txoffset, txpitch;
+ CARD32 dst_offset, dst_pitch, dst_format;
+ CARD32 txenable, colorpitch;
+ CARD32 blendcntl;
+ Bool isplanar = FALSE;
+ int pixel_shift, vtx_count;
BoxPtr pBox = REGION_RECTS(&pPriv->clip);
int nBox = REGION_NUM_RECTS(&pPriv->clip);
+ BoxRec srcBox, dstBox;
+ CARD32 output_fmt;
+
VIDEO_PREAMBLE();

- pixel_shift = pPixmap->drawable.bitsPerPixel >> 4;
+ /* Overlay tear proof video only for R300 !!!!! */

-#ifdef USE_EXA
- if (EXA_ENABLED) {
- dst_offset = exaGetPixmapOffset(pPixmap) + EXA_FB_OFFSET;
- dst_pitch = exaGetPixmapPitch(pPixmap);
- } else
-#endif
- {
- dst_offset = FB_PIXMAP_OFFSET(pPixmap->devPrivate.ptr);
- dst_pitch = pPixmap->devKind;
- }
+ pixel_shift = pPixmap->drawable.bitsPerPixel >> 4;

-#ifdef COMPOSITE
- dstxoff = -pPixmap->screen_x + pPixmap->drawable.x;
- dstyoff = -pPixmap->screen_y + pPixmap->drawable.y;
-#else
- dstxoff = 0;
- dstyoff = 0;
-#endif
+ /* Select buffer for off-screen overlay buffer destination. Togle between
1 and 2 */
+ if (pPriv->ovlCurrent != 0) {
+ dst_offset = pPriv->Ovl1BufferOffset + rhdPtr->FbIntAddress;
+ pPriv->ovlCurrent = 0;
+ } else {
+ dst_offset = pPriv->Ovl2BufferOffset + rhdPtr->FbIntAddress;
+ pPriv->ovlCurrent = 1;
+ }
+ dst_pitch = pPriv->OvlBufferPitch;

- if (!accel_state->XHas3DEngineState)
- RADEONInit3DEngine(pScrn);
+ /* Radeon: if EXA: RADEON_SWITCH_TO_3D() */

- /* we can probably improve this */
BEGIN_VIDEO(2);
-#ifdef IS_RADEON_DRIVER
- if (IS_R300_3D || IS_R500_3D)
-#endif
- OUT_VIDEO_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D);
-#ifdef IS_RADEON_DRIVER
- else
- OUT_VIDEO_REG(RADEON_RB3D_DSTCACHE_CTLSTAT, RADEON_RB3D_DC_FLUSH);
-#endif
+ OUT_VIDEO_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D);
/* We must wait for 3d to idle, in case source was just written as a dest.
*/
OUT_VIDEO_REG(RADEON_WAIT_UNTIL,
RADEON_WAIT_HOST_IDLECLEAN |
@@ -262,94 +239,149 @@
RADEON_WAIT_DMA_GUI_IDLE);
FINISH_VIDEO();

- if (IS_R300_3D || IS_R500_3D) {
- uint32_t output_fmt;
-
- switch (pPixmap->drawable.bitsPerPixel) {
- case 16:
- if (pPixmap->drawable.depth == 15)
- dst_format = R300_COLORFORMAT_ARGB1555;
- else
- dst_format = R300_COLORFORMAT_RGB565;
- break;
- case 32:
- dst_format = R300_COLORFORMAT_ARGB8888;
- break;
- default:
- return;
- }
-
- output_fmt = (R300_OUT_FMT_C4_8 |
- R300_OUT_FMT_C0_SEL_BLUE |
- R300_OUT_FMT_C1_SEL_GREEN |
- R300_OUT_FMT_C2_SEL_RED |
- R300_OUT_FMT_C3_SEL_ALPHA);
-
- colorpitch = dst_pitch >> pixel_shift;
- colorpitch |= dst_format;
+ if (!accel_state->XHas3DEngineState)
+ RADEONInit3DEngine(pScrn);

- if (RADEONTilingEnabled(pScrn, pPixmap))
- colorpitch |= R300_COLORTILE;
+ if (pPriv->bicubic_enabled)
+ vtx_count = 6;
+ else
+ vtx_count = 4;

- if (pPriv->id == FOURCC_UYVY)
- txformat1 = R300_TX_FORMAT_YVYU422;
+ switch (pPixmap->drawable.bitsPerPixel) {
+ case 16:
+ if (pPixmap->drawable.depth == 15)
+ dst_format = R300_COLORFORMAT_ARGB1555;
else
- txformat1 = R300_TX_FORMAT_VYUY422;
+ dst_format = R300_COLORFORMAT_RGB565;
+ break;
+ case 32:
+ dst_format = R300_COLORFORMAT_ARGB8888;
+ break;
+ default:
+ return;
+ }

- txformat1 |= R300_TX_FORMAT_YUV_TO_RGB_CLAMP;
+ output_fmt = (R300_OUT_FMT_C4_8 |
+ R300_OUT_FMT_C0_SEL_BLUE |
+ R300_OUT_FMT_C1_SEL_GREEN |
+ R300_OUT_FMT_C2_SEL_RED |
+ R300_OUT_FMT_C3_SEL_ALPHA);

- txformat0 = ((((pPriv->w - 1) & 0x7ff) << R300_TXWIDTH_SHIFT) |
- (((pPriv->h - 1) & 0x7ff) << R300_TXHEIGHT_SHIFT));
+ colorpitch = dst_pitch >> pixel_shift;
+ colorpitch |= dst_format;

- txformat0 |= R300_TXPITCH_EN;
+ /* Always use tiling for the overlay */
+ colorpitch |= R300_COLORTILE;

- accel_state->texW[0] = pPriv->w;
- accel_state->texH[0] = pPriv->h;
+ if (pPriv->id == FOURCC_UYVY)
+ txformat1 = R300_TX_FORMAT_YVYU422;
+ else
+ txformat1 = R300_TX_FORMAT_VYUY422;

- txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_LAST) |
- R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_LAST) |
- R300_TX_MAG_FILTER_LINEAR | R300_TX_MIN_FILTER_LINEAR);
+ txformat1 |= R300_TX_FORMAT_YUV_TO_RGB_CLAMP;

- /* pitch is in pixels */
- txpitch = BUFFER_PITCH / 2;
- txpitch -= 1;
+ /* pitch is in pixels */
+ txpitch = pPriv->SrcBufferPitch / 2;
+ txpitch -= 1;
+
+ txformat0 = ((((pPriv->w - 1) & 0x7ff) << R300_TXWIDTH_SHIFT) |
+ (((pPriv->h - 1) & 0x7ff) << R300_TXHEIGHT_SHIFT) |
+ R300_TXPITCH_EN);
+
+ accel_state->texW[0] = pPriv->w;
+ accel_state->texH[0] = pPriv->h;
+
+ txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_LAST) |
+ R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_LAST) |
+ R300_TX_MAG_FILTER_LINEAR |
+ R300_TX_MIN_FILTER_LINEAR |
+ (0 << R300_TX_ID_SHIFT));
+
+ txoffset = pPriv->SrcBufferOffset + rhdPtr->FbIntAddress;
+
+ BEGIN_VIDEO(6);
+ OUT_VIDEO_REG(R300_TX_FILTER0_0, txfilter);
+ OUT_VIDEO_REG(R300_TX_FILTER1_0, 0);
+ OUT_VIDEO_REG(R300_TX_FORMAT0_0, txformat0);
+ OUT_VIDEO_REG(R300_TX_FORMAT1_0, txformat1);
+ OUT_VIDEO_REG(R300_TX_FORMAT2_0, txpitch);
+ OUT_VIDEO_REG(R300_TX_OFFSET_0, txoffset);
+ FINISH_VIDEO();

- if (IS_R500_3D && ((pPriv->w - 1) & 0x800))
- txpitch |= R500_TXWIDTH_11;
+ txenable = R300_TEX_0_ENABLE;

- if (IS_R500_3D && ((pPriv->h - 1) & 0x800))
- txpitch |= R500_TXHEIGHT_11;
+ if (pPriv->bicubic_enabled) {
+ /* Size is 128x1 */
+ txformat0 = ((0x7f << R300_TXWIDTH_SHIFT) |
+ (0x0 << R300_TXHEIGHT_SHIFT) |
+ R300_TXPITCH_EN);
+ /* Format is 32-bit floats, 4bpp */
+ txformat1 = R300_EASY_TX_FORMAT(Z, Y, X, W, FL_R16G16B16A16);
+ /* Pitch is 127 (128-1) */
+ txpitch = 0x7f;
+ /* Tex filter */
+ txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_WRAP) |
+ R300_TX_CLAMP_T(R300_TX_CLAMP_WRAP) |
+ R300_TX_MIN_FILTER_NEAREST |
+ R300_TX_MAG_FILTER_NEAREST |
+ (1 << R300_TX_ID_SHIFT));

BEGIN_VIDEO(6);
- OUT_VIDEO_REG(R300_TX_FILTER0_0, txfilter);
- OUT_VIDEO_REG(R300_TX_FILTER1_0, 0);
- OUT_VIDEO_REG(R300_TX_FORMAT0_0, txformat0);
- OUT_VIDEO_REG(R300_TX_FORMAT1_0, txformat1);
- OUT_VIDEO_REG(R300_TX_FORMAT2_0, txpitch);
- OUT_VIDEO_REG(R300_TX_OFFSET_0, FB_BUFFER_OFFSET);
+ OUT_VIDEO_REG(R300_TX_FILTER0_1, txfilter);
+ OUT_VIDEO_REG(R300_TX_FILTER1_1, 0);
+ OUT_VIDEO_REG(R300_TX_FORMAT0_1, txformat0);
+ OUT_VIDEO_REG(R300_TX_FORMAT1_1, txformat1);
+ OUT_VIDEO_REG(R300_TX_FORMAT2_1, txpitch);
+ OUT_VIDEO_REG(R300_TX_OFFSET_1, pPriv->bicubic_src_offset);
FINISH_VIDEO();

- txenable = R300_TEX_0_ENABLE;
+ /* Enable tex 1 */
+ txenable |= R300_TEX_1_ENABLE;
+ }

- /* setup the VAP */
- if (HAS_TCL)
+ /* setup the VAP */
+ if (HAS_TCL) {
+ if (pPriv->bicubic_enabled)
+ BEGIN_VIDEO(7);
+ else
BEGIN_VIDEO(6);
+ } else {
+ if (pPriv->bicubic_enabled)
+ BEGIN_VIDEO(5);
else
BEGIN_VIDEO(4);
+ }

- /* These registers define the number, type, and location of data
submitted
- * to the PVS unit of GA input (when PVS is disabled)
- * DST_VEC_LOC is the slot in the PVS input vector memory when PVS/TCL
is
- * enabled. This memory provides the imputs to the vertex shader
program
- * and ordering is not important. When PVS/TCL is disabled, this field
maps
- * directly to the GA input memory and the order is signifigant. In
- * PVS_BYPASS mode the order is as follows:
- * Position
- * Point Size
- * Color 0-3
- * Textures 0-7
- * Fog
- */
+ /* These registers define the number, type, and location of data submitted
+ * to the PVS unit of GA input (when PVS is disabled)
+ * DST_VEC_LOC is the slot in the PVS input vector memory when PVS/TCL is
+ * enabled. This memory provides the imputs to the vertex shader program
+ * and ordering is not important. When PVS/TCL is disabled, this field
maps
+ * directly to the GA input memory and the order is signifigant. In
+ * PVS_BYPASS mode the order is as follows:
+ * Position
+ * Point Size
+ * Color 0-3
+ * Textures 0-7
+ * Fog
+ */
+ if (pPriv->bicubic_enabled) {
+ OUT_VIDEO_REG(R300_VAP_PROG_STREAM_CNTL_0,
+ ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) |
+ (0 << R300_SKIP_DWORDS_0_SHIFT) |
+ (0 << R300_DST_VEC_LOC_0_SHIFT) |
+ R300_SIGNED_0 |
+ (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) |
+ (0 << R300_SKIP_DWORDS_1_SHIFT) |
+ (6 << R300_DST_VEC_LOC_1_SHIFT) |
+ R300_SIGNED_1));
+ OUT_VIDEO_REG(R300_VAP_PROG_STREAM_CNTL_1,
+ ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_2_SHIFT) |
+ (0 << R300_SKIP_DWORDS_2_SHIFT) |
+ (7 << R300_DST_VEC_LOC_2_SHIFT) |
+ R300_LAST_VEC_2 |
+ R300_SIGNED_2));
+ } else {
OUT_VIDEO_REG(R300_VAP_PROG_STREAM_CNTL_0,
((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) |
(0 << R300_SKIP_DWORDS_0_SHIFT) |
@@ -360,15 +392,24 @@
(6 << R300_DST_VEC_LOC_1_SHIFT) |
R300_LAST_VEC_1 |
R300_SIGNED_1));
+ }

- /* load the vertex shader
- * We pre-load vertex programs in RADEONInit3DEngine():
- * - exa no mask
- * - exa mask
- * - Xv
- * Here we select the offset of the vertex program we want to use
- */
- if (HAS_TCL) {
+ /* load the vertex shader
+ * We pre-load vertex programs in RADEONInit3DEngine():
+ * - exa mask/Xv bicubic
+ * - exa no mask
+ * - Xv
+ * Here we select the offset of the vertex program we want to use
+ */
+ if (HAS_TCL) {
+ if (pPriv->bicubic_enabled) {
+ OUT_VIDEO_REG(R300_VAP_PVS_CODE_CNTL_0,
+ ((0 << R300_PVS_FIRST_INST_SHIFT) |
+ (2 << R300_PVS_XYZW_VALID_INST_SHIFT) |
+ (2 << R300_PVS_LAST_INST_SHIFT)));
+ OUT_VIDEO_REG(R300_VAP_PVS_CODE_CNTL_1,
+ (2 << R300_PVS_LAST_VTX_SRC_INST_SHIFT));
+ } else {
OUT_VIDEO_REG(R300_VAP_PVS_CODE_CNTL_0,
((5 << R300_PVS_FIRST_INST_SHIFT) |
(6 << R300_PVS_XYZW_VALID_INST_SHIFT) |
@@ -376,435 +417,748 @@
OUT_VIDEO_REG(R300_VAP_PVS_CODE_CNTL_1,
(6 << R300_PVS_LAST_VTX_SRC_INST_SHIFT));
}
+ }

- /* Position and one set of 2 texture coordinates */
- OUT_VIDEO_REG(R300_VAP_OUT_VTX_FMT_0, R300_VTX_POS_PRESENT);
+ /* Position and one set of 2 texture coordinates */
+ OUT_VIDEO_REG(R300_VAP_OUT_VTX_FMT_0, R300_VTX_POS_PRESENT);
+ if (pPriv->bicubic_enabled)
+ OUT_VIDEO_REG(R300_VAP_OUT_VTX_FMT_1, ((2 << R300_TEX_0_COMP_CNT_SHIFT)
|
+ (2 <<
R300_TEX_1_COMP_CNT_SHIFT)));
+ else
OUT_VIDEO_REG(R300_VAP_OUT_VTX_FMT_1, (2 << R300_TEX_0_COMP_CNT_SHIFT));
- OUT_VIDEO_REG(R300_US_OUT_FMT_0, output_fmt);
- FINISH_VIDEO();

- /* setup pixel shader */
- if (IS_R300_3D) {
- BEGIN_VIDEO(8);
- /* 2 components: 2 for tex0 */
- OUT_VIDEO_REG(R300_RS_COUNT,
+ OUT_VIDEO_REG(R300_US_OUT_FMT_0, output_fmt);
+ FINISH_VIDEO();
+
+ /* setup pixel shader */
+ if (pPriv->bicubic_enabled) {
+ BEGIN_VIDEO(79);
+
+ /* 4 components: 2 for tex0 and 2 for tex1 */
+ OUT_VIDEO_REG(R300_RS_COUNT, ((4 << R300_RS_COUNT_IT_COUNT_SHIFT) |
+ R300_RS_COUNT_HIRES_EN));
+
+ /* R300_INST_COUNT_RS - highest RS instruction used */
+ OUT_VIDEO_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(1));
+
+ /* Pixel stack frame size. */
+ OUT_VIDEO_REG(R300_US_PIXSIZE, 5);
+
+ /* Indirection levels */
+ OUT_VIDEO_REG(R300_US_CONFIG, ((2 << R300_NLEVEL_SHIFT) |
+ R300_FIRST_TEX));
+
+ /* Set nodes. */
+ OUT_VIDEO_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) |
+ R300_ALU_CODE_SIZE(14) |
+ R300_TEX_CODE_OFFSET(0) |
+ R300_TEX_CODE_SIZE(6)));
+
+ /* Nodes are allocated highest first, but executed lowest first */
+ OUT_VIDEO_REG(R300_US_CODE_ADDR_0, 0);
+ OUT_VIDEO_REG(R300_US_CODE_ADDR_1, (R300_ALU_START(0) |
+ R300_ALU_SIZE(0) |
+ R300_TEX_START(0) |
+ R300_TEX_SIZE(0)));
+ OUT_VIDEO_REG(R300_US_CODE_ADDR_2, (R300_ALU_START(1) |
+ R300_ALU_SIZE(9) |
+ R300_TEX_START(1) |
+ R300_TEX_SIZE(0)));
+ OUT_VIDEO_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(11) |
+ R300_ALU_SIZE(2) |
+ R300_TEX_START(2) |
+ R300_TEX_SIZE(3) |
+ R300_RGBA_OUT));
+
+ /* ** BICUBIC FP ** */
+
+ /* texcoord0 => temp0
+ * texcoord1 => temp1 */
+
+ /* first node */
+ /* TEX temp2, temp1.rrr0, tex1, 1D */
+ OUT_VIDEO_REG(R300_US_TEX_INST(0), (R300_TEX_INST(R300_TEX_INST_LD) |
+ R300_TEX_ID(1) |
+ R300_TEX_SRC_ADDR(1) |
+ R300_TEX_DST_ADDR(2)));
+
+ /* MOV temp1.r, temp1.ggg0 */
+ OUT_VIDEO_REG(R300_US_ALU_RGB_INST(0),
(R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+
R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) |
+
R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) |
+
R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0)));
+ OUT_VIDEO_REG(R300_US_ALU_RGB_ADDR(0), (R300_ALU_RGB_ADDR0(1) |
+ R300_ALU_RGB_ADDRD(1) |
+
R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R)));
+ OUT_VIDEO_REG(R300_US_ALU_ALPHA_INST(0),
(R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+
R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
+
R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+
R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+ OUT_VIDEO_REG(R300_US_ALU_ALPHA_ADDR(0), (R300_ALU_ALPHA_ADDRD(1) |
+
R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+
+
+ /* second node */
+ /* TEX temp1, temp1, tex1, 1D */
+ OUT_VIDEO_REG(R300_US_TEX_INST(1), (R300_TEX_INST(R300_TEX_INST_LD) |
+ R300_TEX_ID(1) |
+ R300_TEX_SRC_ADDR(1) |
+ R300_TEX_DST_ADDR(1)));
+
+ /* MUL temp3.rg, temp2.ggg0, const0.rgb0 */
+ OUT_VIDEO_REG(R300_US_ALU_RGB_INST(1),
(R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+
R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) |
+
R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
+
R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0)));
+ OUT_VIDEO_REG(R300_US_ALU_RGB_ADDR(1), (R300_ALU_RGB_ADDR0(2) |
+
R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(0)) |
+ R300_ALU_RGB_ADDRD(3) |
+
R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R |
+ R300_ALU_RGB_MASK_G)));
+ OUT_VIDEO_REG(R300_US_ALU_ALPHA_INST(1),
(R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+
R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
+
R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+
R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+ OUT_VIDEO_REG(R300_US_ALU_ALPHA_ADDR(1), (R300_ALU_ALPHA_ADDRD(3) |
+
R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+
+
+ /* MUL temp2.rg, temp2.rrr0, const0.rgb */
+ OUT_VIDEO_REG(R300_US_ALU_RGB_INST(2),
(R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+
R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RRR) |
+
R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
+
R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0)));
+ OUT_VIDEO_REG(R300_US_ALU_RGB_ADDR(2), (R300_ALU_RGB_ADDR0(2) |
+
R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(0)) |
+ R300_ALU_RGB_ADDRD(2) |
+
R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R |
+ R300_ALU_RGB_MASK_G)));
+ OUT_VIDEO_REG(R300_US_ALU_ALPHA_INST(2),
(R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+
R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
+
R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+
R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+ OUT_VIDEO_REG(R300_US_ALU_ALPHA_ADDR(2), (R300_ALU_ALPHA_ADDRD(2) |
+
R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+
+ /* MAD temp4.rg, temp1.ggg0, const1.rgb, temp3.rgb0 */
+ OUT_VIDEO_REG(R300_US_ALU_RGB_INST(3),
(R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+
R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) |
+
R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
+
R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB)));
+ OUT_VIDEO_REG(R300_US_ALU_RGB_ADDR(3), (R300_ALU_RGB_ADDR0(1) |
+
R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) |
+ R300_ALU_RGB_ADDR2(3) |
+ R300_ALU_RGB_ADDRD(4) |
+
R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R |
+ R300_ALU_RGB_MASK_G)));
+ OUT_VIDEO_REG(R300_US_ALU_ALPHA_INST(3),
(R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+
R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
+
R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+
R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+ OUT_VIDEO_REG(R300_US_ALU_ALPHA_ADDR(3), (R300_ALU_ALPHA_ADDRD(4) |
+
R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+
+ /* MAD temp5.rg, temp1.ggg0, const1.rgb, temp2.rgb0 */
+ OUT_VIDEO_REG(R300_US_ALU_RGB_INST(4),
(R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+
R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_GGG) |
+
R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
+
R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB)));
+ OUT_VIDEO_REG(R300_US_ALU_RGB_ADDR(4), (R300_ALU_RGB_ADDR0(1) |
+
R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) |
+ R300_ALU_RGB_ADDR2(2) |
+ R300_ALU_RGB_ADDRD(5) |
+
R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R |
+ R300_ALU_RGB_MASK_G)));
+ OUT_VIDEO_REG(R300_US_ALU_ALPHA_INST(4),
(R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+
R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
+
R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+
R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+ OUT_VIDEO_REG(R300_US_ALU_ALPHA_ADDR(4), (R300_ALU_ALPHA_ADDRD(5) |
+
R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+
+ /* MAD temp3.rg, temp1.rrr0, const1.rgb, temp3.rgb0 */
+ OUT_VIDEO_REG(R300_US_ALU_RGB_INST(5),
(R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+
R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RRR) |
+
R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
+
R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB)));
+ OUT_VIDEO_REG(R300_US_ALU_RGB_ADDR(5), (R300_ALU_RGB_ADDR0(1) |
+
R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) |
+ R300_ALU_RGB_ADDR2(3) |
+ R300_ALU_RGB_ADDRD(3) |
+
R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R |
+ R300_ALU_RGB_MASK_G)));
+ OUT_VIDEO_REG(R300_US_ALU_ALPHA_INST(5),
(R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+
R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
+
R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+
R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+ OUT_VIDEO_REG(R300_US_ALU_ALPHA_ADDR(5), (R300_ALU_ALPHA_ADDRD(3) |
+
R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+
+ /* MAD temp1.rg, temp1.rrr0, const1.rgb, temp2.rgb0 */
+ OUT_VIDEO_REG(R300_US_ALU_RGB_INST(6),
(R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+
R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RRR) |
+
R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
+
R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB)));
+ OUT_VIDEO_REG(R300_US_ALU_RGB_ADDR(6), (R300_ALU_RGB_ADDR0(1) |
+
R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) |
+ R300_ALU_RGB_ADDR2(2) |
+ R300_ALU_RGB_ADDRD(1) |
+
R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R |
+ R300_ALU_RGB_MASK_G)));
+ OUT_VIDEO_REG(R300_US_ALU_ALPHA_INST(6),
(R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+
R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
+
R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+
R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+ OUT_VIDEO_REG(R300_US_ALU_ALPHA_ADDR(6), (R300_ALU_ALPHA_ADDRD(1) |
+
R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+
+ /* ADD temp1.rg, temp0.rgb0, temp1.rgb0 */
+ OUT_VIDEO_REG(R300_US_ALU_RGB_INST(7),
(R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+
R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
+
R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) |
+
R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB)));
+ OUT_VIDEO_REG(R300_US_ALU_RGB_ADDR(7), (R300_ALU_RGB_ADDR0(0) |
+ R300_ALU_RGB_ADDR2(1) |
+ R300_ALU_RGB_ADDRD(1) |
+
R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R |
+ R300_ALU_RGB_MASK_G)));
+ OUT_VIDEO_REG(R300_US_ALU_ALPHA_INST(7),
(R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+
R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
+
R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+
R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+ OUT_VIDEO_REG(R300_US_ALU_ALPHA_ADDR(7), (R300_ALU_ALPHA_ADDRD(1) |
+
R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+
+ /* ADD temp2.rg, temp0.rgb0, temp3.rgb0 */
+ OUT_VIDEO_REG(R300_US_ALU_RGB_INST(8),
(R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+
R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
+
R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) |
+
R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB)));
+ OUT_VIDEO_REG(R300_US_ALU_RGB_ADDR(8), (R300_ALU_RGB_ADDR0(0) |
+ R300_ALU_RGB_ADDR2(3) |
+ R300_ALU_RGB_ADDRD(2) |
+
R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R |
+ R300_ALU_RGB_MASK_G)));
+ OUT_VIDEO_REG(R300_US_ALU_ALPHA_INST(8),
(R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+
R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
+
R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+
R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+ OUT_VIDEO_REG(R300_US_ALU_ALPHA_ADDR(8), (R300_ALU_ALPHA_ADDRD(2) |
+
R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+
+ /* ADD temp3.rg, temp0.rgb0, temp5.rgb0 */
+ OUT_VIDEO_REG(R300_US_ALU_RGB_INST(9),
(R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+
R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
+
R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) |
+
R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB)));
+ OUT_VIDEO_REG(R300_US_ALU_RGB_ADDR(9), (R300_ALU_RGB_ADDR0(0) |
+ R300_ALU_RGB_ADDR2(5) |
+ R300_ALU_RGB_ADDRD(3) |
+
R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R |
+ R300_ALU_RGB_MASK_G)));
+ OUT_VIDEO_REG(R300_US_ALU_ALPHA_INST(9),
(R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+
R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
+
R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+
R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+ OUT_VIDEO_REG(R300_US_ALU_ALPHA_ADDR(9), (R300_ALU_ALPHA_ADDRD(3) |
+
R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+
+ /* ADD temp0.rg, temp0.rgb0, temp4.rgb0 */
+ OUT_VIDEO_REG(R300_US_ALU_RGB_INST(10),
(R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+
R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
+
R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) |
+
R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC2_RGB)));
+ OUT_VIDEO_REG(R300_US_ALU_RGB_ADDR(10), (R300_ALU_RGB_ADDR0(0) |
+ R300_ALU_RGB_ADDR2(4) |
+ R300_ALU_RGB_ADDRD(0) |
+
R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_R |
+ R300_ALU_RGB_MASK_G)));
+ OUT_VIDEO_REG(R300_US_ALU_ALPHA_INST(10),
(R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+
R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
+
R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+
R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+ OUT_VIDEO_REG(R300_US_ALU_ALPHA_ADDR(10), (R300_ALU_ALPHA_ADDRD(0) |
+
R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+
+
+ /* third node */
+ /* TEX temp4, temp1.rg--, tex0, 1D */
+ OUT_VIDEO_REG(R300_US_TEX_INST(2), (R300_TEX_INST(R300_TEX_INST_LD) |
+ R300_TEX_ID(0) |
+ R300_TEX_SRC_ADDR(1) |
+ R300_TEX_DST_ADDR(4)));
+
+ /* TEX temp3, temp3.rg--, tex0, 1D */
+ OUT_VIDEO_REG(R300_US_TEX_INST(3), (R300_TEX_INST(R300_TEX_INST_LD) |
+ R300_TEX_ID(0) |
+ R300_TEX_SRC_ADDR(3) |
+ R300_TEX_DST_ADDR(3)));
+
+ /* TEX temp5, temp2.rg--, tex0, 1D */
+ OUT_VIDEO_REG(R300_US_TEX_INST(4), (R300_TEX_INST(R300_TEX_INST_LD) |
+ R300_TEX_ID(0) |
+ R300_TEX_SRC_ADDR(2) |
+ R300_TEX_DST_ADDR(5)));
+
+ /* TEX temp0, temp0.rg--, tex0, 1D */
+ OUT_VIDEO_REG(R300_US_TEX_INST(5), (R300_TEX_INST(R300_TEX_INST_LD) |
+ R300_TEX_ID(0) |
+ R300_TEX_SRC_ADDR(0) |
+ R300_TEX_DST_ADDR(0)));
+
+ /* LRP temp3, temp1.bbbb, temp4, temp3 ->
+ * - PRESUB temps, temp4 - temp3
+ * - MAD temp3, temp1.bbbb, temps, temp3 */
+ OUT_VIDEO_REG(R300_US_ALU_RGB_INST(11),
(R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+
R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC2_BBB) |
+
R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRCP_RGB) |
+
R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) |
+
R300_ALU_RGB_SRCP_OP(R300_ALU_RGB_SRCP_OP_RGB1_MINUS_RGB0)));
+ OUT_VIDEO_REG(R300_US_ALU_RGB_ADDR(11), (R300_ALU_RGB_ADDR0(3) |
+ R300_ALU_RGB_ADDR1(4) |
+ R300_ALU_RGB_ADDR2(1) |
+ R300_ALU_RGB_ADDRD(3) |
+
R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB)));
+ OUT_VIDEO_REG(R300_US_ALU_ALPHA_INST(11),
(R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+
R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC2_B) |
+
R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_SRCP_A) |
+
R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_SRC0_A)));
+ OUT_VIDEO_REG(R300_US_ALU_ALPHA_ADDR(11), (R300_ALU_ALPHA_ADDR0(3) |
+ R300_ALU_ALPHA_ADDR1(4) |
+ R300_ALU_ALPHA_ADDR2(1) |
+ R300_ALU_ALPHA_ADDRD(3) |
+
R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_A)));
+
+ /* LRP temp0, temp1.bbbb, temp5, temp0 ->
+ * - PRESUB temps, temp5 - temp0
+ * - MAD temp0, temp1.bbbb, temps, temp0 */
+ OUT_VIDEO_REG(R300_US_ALU_RGB_INST(12),
(R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+
R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC2_BBB) |
+
R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRCP_RGB) |
+
R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) |
+
R300_ALU_RGB_SRCP_OP(R300_ALU_RGB_SRCP_OP_RGB1_MINUS_RGB0) |
+ R300_ALU_RGB_INSERT_NOP));
+ OUT_VIDEO_REG(R300_US_ALU_RGB_ADDR(12), (R300_ALU_RGB_ADDR0(0) |
+ R300_ALU_RGB_ADDR1(5) |
+ R300_ALU_RGB_ADDR2(1) |
+ R300_ALU_RGB_ADDRD(0) |
+
R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_RGB)));
+ OUT_VIDEO_REG(R300_US_ALU_ALPHA_INST(12),
(R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+
R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC2_B) |
+
R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_SRCP_A) |
+
R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_SRC0_A)));
+ OUT_VIDEO_REG(R300_US_ALU_ALPHA_ADDR(12), (R300_ALU_ALPHA_ADDR0(0) |
+ R300_ALU_ALPHA_ADDR1(5) |
+ R300_ALU_ALPHA_ADDR2(1) |
+ R300_ALU_ALPHA_ADDRD(0) |
+
R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_A)));
+
+ /* LRP output, temp2.bbbb, temp3, temp0 ->
+ * - PRESUB temps, temp3 - temp0
+ * - MAD output, temp2.bbbb, temps, temp0 */
+ OUT_VIDEO_REG(R300_US_ALU_RGB_INST(13),
(R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+
R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC2_BBB) |
+
R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRCP_RGB) |
+
R300_ALU_RGB_SEL_C(R300_ALU_RGB_SRC0_RGB) |
+
R300_ALU_RGB_SRCP_OP(R300_ALU_RGB_SRCP_OP_RGB1_MINUS_RGB0)));
+ OUT_VIDEO_REG(R300_US_ALU_RGB_ADDR(13), (R300_ALU_RGB_ADDR0(0) |
+ R300_ALU_RGB_ADDR1(3) |
+ R300_ALU_RGB_ADDR2(2) |
+
R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_RGB)));
+ OUT_VIDEO_REG(R300_US_ALU_ALPHA_INST(13),
(R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+
R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC2_B) |
+
R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_SRCP_A) |
+
R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_SRC0_A)));
+ OUT_VIDEO_REG(R300_US_ALU_ALPHA_ADDR(13), (R300_ALU_ALPHA_ADDR0(0) |
+ R300_ALU_ALPHA_ADDR1(3) |
+ R300_ALU_ALPHA_ADDR2(2) |
+
R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A)));
+
+ /* Shader constants. */
+ OUT_VIDEO_REG(R300_US_ALU_CONST_R(0), F_TO_24(1.0/(float)pPriv->w));
+ OUT_VIDEO_REG(R300_US_ALU_CONST_G(0), 0);
+ OUT_VIDEO_REG(R300_US_ALU_CONST_B(0), 0);
+ OUT_VIDEO_REG(R300_US_ALU_CONST_A(0), 0);
+
+ OUT_VIDEO_REG(R300_US_ALU_CONST_R(1), 0);
+ OUT_VIDEO_REG(R300_US_ALU_CONST_G(1), F_TO_24(1.0/(float)pPriv->h));
+ OUT_VIDEO_REG(R300_US_ALU_CONST_B(1), 0);
+ OUT_VIDEO_REG(R300_US_ALU_CONST_A(1), 0);
+
+ FINISH_VIDEO();
+ } else {
+ BEGIN_VIDEO(11);
+ /* 2 components: 2 for tex0 */
+ OUT_VIDEO_REG(R300_RS_COUNT,
((2 << R300_RS_COUNT_IT_COUNT_SHIFT) |
R300_RS_COUNT_HIRES_EN));
- /* R300_INST_COUNT_RS - highest RS instruction used */
- OUT_VIDEO_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0) |
R300_TX_OFFSET_RS(6));
+ /* R300_INST_COUNT_RS - highest RS instruction used */
+ OUT_VIDEO_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0));

- OUT_VIDEO_REG(R300_US_CODE_OFFSET,
- (R300_ALU_CODE_OFFSET(0) |
- R300_ALU_CODE_SIZE(1) |
- R300_TEX_CODE_OFFSET(0) |
- R300_TEX_CODE_SIZE(1)));
-
- OUT_VIDEO_REG(R300_US_CODE_ADDR_3,
- (R300_ALU_START(0) |
- R300_ALU_SIZE(0) |
- R300_TEX_START(0) |
- R300_TEX_SIZE(0) |
- R300_RGBA_OUT));
-
- /* tex inst is preloaded in RADEONInit3DEngine() */
-
- /* ALU inst */
- /* RGB */
- OUT_VIDEO_REG(R300_US_ALU_RGB_ADDR_0,
- (R300_ALU_RGB_ADDR0(0) |
- R300_ALU_RGB_ADDR1(0) |
- R300_ALU_RGB_ADDR2(0) |
- R300_ALU_RGB_ADDRD(0) |
- R300_ALU_RGB_OMASK((R300_ALU_RGB_MASK_R |
+ OUT_VIDEO_REG(R300_US_PIXSIZE, 0); /* highest temp used */
+
+ /* Indirection levels */
+ OUT_VIDEO_REG(R300_US_CONFIG, ((0 << R300_NLEVEL_SHIFT) |
+ R300_FIRST_TEX));
+
+ OUT_VIDEO_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) |
+ R300_ALU_CODE_SIZE(1) |
+ R300_TEX_CODE_OFFSET(0) |
+ R300_TEX_CODE_SIZE(1)));
+
+ OUT_VIDEO_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(0) |
+ R300_ALU_SIZE(0) |
+ R300_TEX_START(0) |
+ R300_TEX_SIZE(0) |
+ R300_RGBA_OUT));
+
+ /* tex inst */
+ OUT_VIDEO_REG(R300_US_TEX_INST_0, (R300_TEX_SRC_ADDR(0) |
+ R300_TEX_DST_ADDR(0) |
+ R300_TEX_ID(0) |
+ R300_TEX_INST(R300_TEX_INST_LD)));
+
+ /* ALU inst */
+ /* RGB */
+ OUT_VIDEO_REG(R300_US_ALU_RGB_ADDR_0, (R300_ALU_RGB_ADDR0(0) |
+ R300_ALU_RGB_ADDR1(0) |
+ R300_ALU_RGB_ADDR2(0) |
+ R300_ALU_RGB_ADDRD(0) |
+
R300_ALU_RGB_OMASK((R300_ALU_RGB_MASK_R |
R300_ALU_RGB_MASK_G |
R300_ALU_RGB_MASK_B)) |
- R300_ALU_RGB_TARGET_A));
- OUT_VIDEO_REG(R300_US_ALU_RGB_INST_0,
- (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
- R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
- R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) |
- R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
- R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) |
- R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) |
- R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
- R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE) |
- R300_ALU_RGB_CLAMP));
- /* Alpha */
- OUT_VIDEO_REG(R300_US_ALU_ALPHA_ADDR_0,
- (R300_ALU_ALPHA_ADDR0(0) |
- R300_ALU_ALPHA_ADDR1(0) |
- R300_ALU_ALPHA_ADDR2(0) |
- R300_ALU_ALPHA_ADDRD(0) |
- R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A) |
- R300_ALU_ALPHA_TARGET_A |
- R300_ALU_ALPHA_OMASK_W(R300_ALU_ALPHA_MASK_NONE)));
- OUT_VIDEO_REG(R300_US_ALU_ALPHA_INST_0,
- (R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_A) |
- R300_ALU_ALPHA_MOD_A(R300_ALU_ALPHA_MOD_NOP) |
- R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_1_0) |
- R300_ALU_ALPHA_MOD_B(R300_ALU_ALPHA_MOD_NOP) |
- R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0) |
- R300_ALU_ALPHA_MOD_C(R300_ALU_ALPHA_MOD_NOP) |
- R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
- R300_ALU_ALPHA_OMOD(R300_ALU_ALPHA_OMOD_NONE) |
- R300_ALU_ALPHA_CLAMP));
- FINISH_VIDEO();
- } else {
- BEGIN_VIDEO(18);
- /* 2 components: 2 for tex0 */
- OUT_VIDEO_REG(R300_RS_COUNT,
- ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) |
- R300_RS_COUNT_HIRES_EN));
+ R300_ALU_RGB_TARGET_A));
+ OUT_VIDEO_REG(R300_US_ALU_RGB_INST_0,
(R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
+
R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
+
R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) |
+
R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
+
R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_0) |
+
R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) |
+
R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+
R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE) |
+ R300_ALU_RGB_CLAMP));
+ /* Alpha */
+ OUT_VIDEO_REG(R300_US_ALU_ALPHA_ADDR_0, (R300_ALU_ALPHA_ADDR0(0) |
+ R300_ALU_ALPHA_ADDR1(0) |
+ R300_ALU_ALPHA_ADDR2(0) |
+ R300_ALU_ALPHA_ADDRD(0) |
+
R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A) |
+ R300_ALU_ALPHA_TARGET_A |
+
R300_ALU_ALPHA_OMASK_W(R300_ALU_ALPHA_MASK_NONE)));
+ OUT_VIDEO_REG(R300_US_ALU_ALPHA_INST_0,
(R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_A) |
+
R300_ALU_ALPHA_MOD_A(R300_ALU_ALPHA_MOD_NOP) |
+
R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_1_0) |
+
R300_ALU_ALPHA_MOD_B(R300_ALU_ALPHA_MOD_NOP) |
+
R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0) |
+
R300_ALU_ALPHA_MOD_C(R300_ALU_ALPHA_MOD_NOP) |
+
R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+
R300_ALU_ALPHA_OMOD(R300_ALU_ALPHA_OMOD_NONE) |
+ R300_ALU_ALPHA_CLAMP));
+ FINISH_VIDEO();
+ }

- /* R300_INST_COUNT_RS - highest RS instruction used */
- OUT_VIDEO_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0) |
R300_TX_OFFSET_RS(6));
+ BEGIN_VIDEO(6);
+ OUT_VIDEO_REG(R300_TX_INVALTAGS, 0);
+ OUT_VIDEO_REG(R300_TX_ENABLE, txenable);

- OUT_VIDEO_REG(R500_US_CODE_ADDR, (R500_US_CODE_START_ADDR(0) |
- R500_US_CODE_END_ADDR(1)));
- OUT_VIDEO_REG(R500_US_CODE_RANGE, (R500_US_CODE_RANGE_ADDR(0) |
- R500_US_CODE_RANGE_SIZE(1)));
- OUT_VIDEO_REG(R500_US_CODE_OFFSET, 0);
- OUT_VIDEO_REG(R500_GA_US_VECTOR_INDEX, 0);
-
- /* tex inst */
- OUT_VIDEO_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
- R500_INST_TEX_SEM_WAIT |
- R500_INST_RGB_WMASK_R |
- R500_INST_RGB_WMASK_G |
- R500_INST_RGB_WMASK_B |
- R500_INST_ALPHA_WMASK |
- R500_INST_RGB_CLAMP |
- R500_INST_ALPHA_CLAMP));
-
- OUT_VIDEO_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_ID(0) |
- R500_TEX_INST_LD |
- R500_TEX_SEM_ACQUIRE |
- R500_TEX_IGNORE_UNCOVERED));
-
- OUT_VIDEO_REG(R500_GA_US_VECTOR_DATA, (R500_TEX_SRC_ADDR(0) |
- R500_TEX_SRC_S_SWIZ_R |
- R500_TEX_SRC_T_SWIZ_G |
- R500_TEX_DST_ADDR(0) |
- R500_TEX_DST_R_SWIZ_R |
- R500_TEX_DST_G_SWIZ_G |
- R500_TEX_DST_B_SWIZ_B |
- R500_TEX_DST_A_SWIZ_A));
- OUT_VIDEO_REG(R500_GA_US_VECTOR_DATA, (R500_DX_ADDR(0) |
- R500_DX_S_SWIZ_R |
- R500_DX_T_SWIZ_R |
- R500_DX_R_SWIZ_R |
- R500_DX_Q_SWIZ_R |
- R500_DY_ADDR(0) |
- R500_DY_S_SWIZ_R |
- R500_DY_T_SWIZ_R |
- R500_DY_R_SWIZ_R |
- R500_DY_Q_SWIZ_R));
- OUT_VIDEO_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
- OUT_VIDEO_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
-
- /* ALU inst */
- OUT_VIDEO_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT |
- R500_INST_TEX_SEM_WAIT |
- R500_INST_LAST |
- R500_INST_RGB_OMASK_R |
- R500_INST_RGB_OMASK_G |
- R500_INST_RGB_OMASK_B |
- R500_INST_ALPHA_OMASK |
- R500_INST_RGB_CLAMP |
- R500_INST_ALPHA_CLAMP));
-
- OUT_VIDEO_REG(R500_GA_US_VECTOR_DATA, (R500_RGB_ADDR0(0) |
- R500_RGB_ADDR1(0) |
- R500_RGB_ADDR1_CONST |
- R500_RGB_ADDR2(0) |
- R500_RGB_ADDR2_CONST));
- OUT_VIDEO_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) |
- R500_ALPHA_ADDR1(0) |
- R500_ALPHA_ADDR1_CONST |
- R500_ALPHA_ADDR2(0) |
- R500_ALPHA_ADDR2_CONST));
-
- OUT_VIDEO_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 |
- R500_ALU_RGB_R_SWIZ_A_R |
- R500_ALU_RGB_G_SWIZ_A_G |
- R500_ALU_RGB_B_SWIZ_A_B |
- R500_ALU_RGB_SEL_B_SRC0 |
- R500_ALU_RGB_R_SWIZ_B_1 |
- R500_ALU_RGB_B_SWIZ_B_1 |
- R500_ALU_RGB_G_SWIZ_B_1));
-
- OUT_VIDEO_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD |
- R500_ALPHA_SWIZ_A_A |
- R500_ALPHA_SWIZ_B_1));
-
- OUT_VIDEO_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD |
- R500_ALU_RGBA_R_SWIZ_0 |
- R500_ALU_RGBA_G_SWIZ_0 |
- R500_ALU_RGBA_B_SWIZ_0 |
- R500_ALU_RGBA_A_SWIZ_0));
- FINISH_VIDEO();
- }
+ OUT_VIDEO_REG(R300_RB3D_COLOROFFSET0, dst_offset);
+ OUT_VIDEO_REG(R300_RB3D_COLORPITCH0, colorpitch);

- BEGIN_VIDEO(5);
- OUT_VIDEO_REG(R300_TX_INVALTAGS, 0);
- OUT_VIDEO_REG(R300_TX_ENABLE, txenable);
-
- OUT_VIDEO_REG(R300_RB3D_COLOROFFSET0, dst_offset);
- OUT_VIDEO_REG(R300_RB3D_COLORPITCH0, colorpitch);
-
- blendcntl = RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ZERO;
- /* no need to enable blending */
- OUT_VIDEO_REG(R300_RB3D_BLENDCNTL, blendcntl);
- FINISH_VIDEO();
+ /* no need to enable blending */
+ OUT_VIDEO_REG(R300_RB3D_BLENDCNTL, RADEON_SRC_BLEND_GL_ONE |
RADEON_DST_BLEND_GL_ZERO);

- BEGIN_VIDEO(1);
- OUT_VIDEO_REG(R300_VAP_VTX_SIZE, VTX_DWORD_COUNT);
- FINISH_VIDEO();
+ OUT_VIDEO_REG(R300_VAP_VTX_SIZE, vtx_count);
+ FINISH_VIDEO();

- } else {
-#ifdef IS_RADEON_DRIVER
- /* Same for R100/R200 */
- switch (pPixmap->drawable.bitsPerPixel) {
- case 16:
- if (pPixmap->drawable.depth == 15)
- dst_format = RADEON_COLOR_FORMAT_ARGB1555;
- else
- dst_format = RADEON_COLOR_FORMAT_RGB565;
- break;
- case 32:
- dst_format = RADEON_COLOR_FORMAT_ARGB8888;
- break;
- default:
- return;
- }

- if (pPriv->id == FOURCC_UYVY)
- txformat = RADEON_TXFORMAT_YVYU422;
- else
- txformat = RADEON_TXFORMAT_VYUY422;
+ /*
+ * Rendering of the actual polygon is done in two different
+ * ways depending on chip generation:
+ *
+ * >= R300:
+ *
+ * These chips can accept a quad, but will render it as
+ * two triangles which results in a diagonal tear. Instead
+ * We render a single, large triangle and use the scissor
+ * functionality to restrict it to the desired rectangle.
+ * Due to guardband limits on r3xx/r4xx, we can only use
+ * the single triangle up to 2880 pixels; above that we
+ * render as a quad.
+ */

- txformat |= RADEON_TXFORMAT_NON_POWER2;
+ while (nBox--) {
+ /* Only process box areas overlapping the actual display box */
+ radeon_box_intersect(&dstBox, &pPriv->act, pBox);

- colorpitch = dst_pitch >> pixel_shift;
+ if (radeon_box_area(&dstBox) > 0) {
+ /* Scale the source coordinates, there is some rounding in here,
not sure if critical */
+ srcBox.x1 = pPriv->vid.x1 + (((dstBox.x1 - pPriv->drw.x1) *
(pPriv->vid.x2 - pPriv->vid.x1)) / (pPriv->drw.x2 - pPriv->drw.x1));
+ srcBox.x2 = pPriv->vid.x1 + (((dstBox.x2 - pPriv->drw.x1) *
(pPriv->vid.x2 - pPriv->vid.x1)) / (pPriv->drw.x2 - pPriv->drw.x1));
+ srcBox.y1 = pPriv->vid.y1 + (((dstBox.y1 - pPriv->drw.y1) *
(pPriv->vid.y2 - pPriv->vid.y1)) / (pPriv->drw.y2 - pPriv->drw.y1));
+ srcBox.y2 = pPriv->vid.y1 + (((dstBox.y2 - pPriv->drw.y1) *
(pPriv->vid.y2 - pPriv->vid.y1)) / (pPriv->drw.y2 - pPriv->drw.y1));

- if (RADEONTilingEnabled(pScrn, pPixmap))
- colorpitch |= RADEON_COLOR_TILE_ENABLE;
+ /* Overlay offset, x = 256 pixels aligned and y = 2 pixels aligned
*/
+/*
+ dstBox.x1 -= pPriv->act.x1 & ~255;
+ dstBox.x2 -= pPriv->act.x1 & ~255;
+ dstBox.y1 -= pPriv->act.y1 & ~1;
+ dstBox.y2 -= pPriv->act.y1 & ~1;
+*/

- BEGIN_VIDEO(5);
+#if 0
+ /* Some info to verify all the math stuff */
+ xf86DrvMsg(pScrn->scrnIndex, X_INFO, "txpitch: %d, txoffset: %d\n",
txpitch, txoffset);
+ xf86DrvMsg(pScrn->scrnIndex, X_INFO, "dst_pitch: %d, colorpitch:
%d, dst_offset: %d\n", dst_pitch, colorpitch, dst_offset);
+ xf86DrvMsg(pScrn->scrnIndex, X_INFO, "vid: %d, %d, %d, %d\n",
pPriv->vid.x1, pPriv->vid.y1, pPriv->vid.x2, pPriv->vid.y2);
+ xf86DrvMsg(pScrn->scrnIndex, X_INFO, "drw: %d, %d, %d, %d\n",
pPriv->drw.x1, pPriv->drw.y1, pPriv->drw.x2, pPriv->drw.y2);
+ xf86DrvMsg(pScrn->scrnIndex, X_INFO, "act: %d, %d, %d, %d\n",
pPriv->act.x1, pPriv->act.y1, pPriv->act.x2, pPriv->act.y2);
+ xf86DrvMsg(pScrn->scrnIndex, X_INFO, "box: %d, %d, %d, %d\n",
pBox->x1, pBox->y1, pBox->x2, pBox->y2);
+ xf86DrvMsg(pScrn->scrnIndex, X_INFO, "src: %d, %d, %d, %d\n",
srcBox.x1, srcBox.y1, srcBox.x2, srcBox.y2);
+ xf86DrvMsg(pScrn->scrnIndex, X_INFO, "dst: %d, %d, %d, %d\n\n",
dstBox.x1, dstBox.y1, dstBox.x2, dstBox.y2);
+#endif

- OUT_VIDEO_REG(RADEON_PP_CNTL,
- RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE);
- OUT_VIDEO_REG(RADEON_RB3D_CNTL,
- dst_format | RADEON_ALPHA_BLEND_ENABLE);
- OUT_VIDEO_REG(RADEON_RB3D_COLOROFFSET, dst_offset);
+ /*
+ * Set up the scissor area to that of the output size.
+ */
+ BEGIN_VIDEO(2);
+ /* R300 has an offset */
+ OUT_VIDEO_REG(R300_SC_SCISSOR0, (((dstBox.x1 + 1088) <<
R300_SCISSOR_X_SHIFT) |
+ ((dstBox.y1 + 1088) <<
R300_SCISSOR_Y_SHIFT)));
+ OUT_VIDEO_REG(R300_SC_SCISSOR1, (((dstBox.x2 + 1088 - 1) <<
R300_SCISSOR_X_SHIFT) |
+ ((dstBox.y2 + 1088 - 1) <<
R300_SCISSOR_Y_SHIFT)));
+ FINISH_VIDEO();

- OUT_VIDEO_REG(RADEON_RB3D_COLORPITCH, colorpitch);
+ BEGIN_RING(4 * vtx_count + 4);
+ OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2,
+ 4 * vtx_count));
+ OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_QUAD_LIST |
+ RADEON_CP_VC_CNTL_PRIM_WALK_RING |
+ (4 << RADEON_CP_VC_CNTL_NUM_SHIFT));

- OUT_VIDEO_REG(RADEON_RB3D_BLENDCNTL,
- RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ZERO);
+ if (pPriv->bicubic_enabled) {
+ /*
+ * This code is only executed on >= R300, so we don't
+ * have to deal with the legacy handling.
+ */
+ VTX_OUT_FILTER((float)dstBox.x1,
(float)dstBox.y1,
+ (float)srcBox.x1 / accel_state->texW[0],
(float)srcBox.y1 / accel_state->texH[0],
+ (float)srcBox.x1 + 0.5,
(float)srcBox.y1 + 0.5);
+ VTX_OUT_FILTER((float)dstBox.x1,
(float)dstBox.y2,
+ (float)srcBox.x1 / accel_state->texW[0],
(float)srcBox.y2 / accel_state->texH[0],
+ (float)srcBox.x1 + 0.5,
(float)srcBox.y2 + 0.5);
+ VTX_OUT_FILTER((float)dstBox.x2,
(float)dstBox.y2,
+ (float)srcBox.x2 / accel_state->texW[0],
(float)srcBox.y2 / accel_state->texH[0],
+ (float)srcBox.x2 + 0.5,
(float)srcBox.y2 + 0.5);
+ VTX_OUT_FILTER((float)dstBox.x2,
(float)dstBox.y1,
+ (float)srcBox.x2 / accel_state->texW[0],
(float)srcBox.y1 / accel_state->texH[0],
+ (float)srcBox.x2 + 0.5,
(float)srcBox.y1 + 0.5);
+ } else {
+ VTX_OUT( (float)dstBox.x1,
(float)dstBox.y1,
+ (float)srcBox.x1 / accel_state->texW[0],
(float)srcBox.y1 / accel_state->texH[0]);
+ VTX_OUT( (float)dstBox.x1,
(float)dstBox.y2,
+ (float)srcBox.x1 / accel_state->texW[0],
(float)srcBox.y2 / accel_state->texH[0]);
+ VTX_OUT( (float)dstBox.x2,
(float)dstBox.y2,
+ (float)srcBox.x2 / accel_state->texW[0],
(float)srcBox.y2 / accel_state->texH[0]);
+ VTX_OUT( (float)dstBox.x2,
(float)dstBox.y1,
+ (float)srcBox.x2 / accel_state->texW[0],
(float)srcBox.y1 / accel_state->texH[0]);
+ }

- FINISH_VIDEO();
+ /* flushing is pipelined, free/finish is not */
+ OUT_VIDEO_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D);
+
+ ADVANCE_RING();
+ }
+ pBox++;
+ }

+ BEGIN_VIDEO(3);
+ OUT_VIDEO_REG(R300_SC_CLIP_RULE, 0xAAAA);
+ OUT_VIDEO_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_RB3D_DC_FLUSH_ALL);
+ OUT_VIDEO_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN);
+ FINISH_VIDEO();

- if ((info->ChipFamily == CHIP_FAMILY_RV250) ||
- (info->ChipFamily == CHIP_FAMILY_RV280) ||
- (info->ChipFamily == CHIP_FAMILY_RS300) ||
- (info->ChipFamily == CHIP_FAMILY_R200)) {
-
- accel_state->texW[0] = pPriv->w;
- accel_state->texH[0] = pPriv->h;
-
- BEGIN_VIDEO(12);
-
- OUT_VIDEO_REG(R200_SE_VTX_FMT_0, R200_VTX_XY);
- OUT_VIDEO_REG(R200_SE_VTX_FMT_1,
- (2 << R200_VTX_TEX0_COMP_CNT_SHIFT));
-
- OUT_VIDEO_REG(R200_PP_TXFILTER_0,
- R200_MAG_FILTER_LINEAR |
- R200_MIN_FILTER_LINEAR |
- R200_YUV_TO_RGB);
- OUT_VIDEO_REG(R200_PP_TXFORMAT_0, txformat);
- OUT_VIDEO_REG(R200_PP_TXFORMAT_X_0, 0);
- OUT_VIDEO_REG(R200_PP_TXSIZE_0,
- (pPriv->w - 1) |
- ((pPriv->h - 1) << RADEON_TEX_VSIZE_SHIFT));
- OUT_VIDEO_REG(R200_PP_TXPITCH_0, pPriv->src_pitch - 32);
-
- OUT_VIDEO_REG(R200_PP_TXOFFSET_0, pPriv->src_offset);
-
- OUT_VIDEO_REG(R200_PP_TXCBLEND_0,
- R200_TXC_ARG_A_ZERO |
- R200_TXC_ARG_B_ZERO |
- R200_TXC_ARG_C_R0_COLOR |
- R200_TXC_OP_MADD);
- OUT_VIDEO_REG(R200_PP_TXCBLEND2_0,
- R200_TXC_CLAMP_0_1 | R200_TXC_OUTPUT_REG_R0);
- OUT_VIDEO_REG(R200_PP_TXABLEND_0,
- R200_TXA_ARG_A_ZERO |
- R200_TXA_ARG_B_ZERO |
- R200_TXA_ARG_C_R0_ALPHA |
- R200_TXA_OP_MADD);
- OUT_VIDEO_REG(R200_PP_TXABLEND2_0,
- R200_TXA_CLAMP_0_1 | R200_TXA_OUTPUT_REG_R0);
- FINISH_VIDEO();
- } else {
+ /* FIXME: Is this needed with overlay ??? */
+ DamageDamageRegion(pPriv->pDraw, &pPriv->clip);

- accel_state->texW[0] = 1;
- accel_state->texH[0] = 1;
+ /* Now setup the overlay part, command processor HAS to wait for rendering
to finish */
+ BEGIN_VIDEO(14);

- BEGIN_VIDEO(8);
+ /* Lock overlay registers to prevent incomplete updates */
+ OUT_VIDEO_REG(AVIVO_D1OVL_UPDATE, (1 << 16));

- OUT_VIDEO_REG(RADEON_SE_VTX_FMT, RADEON_SE_VTX_FMT_XY |
- RADEON_SE_VTX_FMT_ST0);
+ /* Setup graphics/overlay blending */
+ /*
+ Per pixel overlay alpha mode
+ Pixel = overlay pixel + (1-PIX_ALPHA) * graphics pixel
+ PIX_ALPHA = 1 - alpha from graphics or overlay
+ */
+ OUT_VIDEO_REG(AVIVO_D1OVL_ALPHA_CONTROL, 3 |
+ (1 << 8) |
+ (1 << 16) );
+
+ /* Set depth, format, address translation, arraymode, full resolution and
crossbar, enable tiling */
+ OUT_VIDEO_REG(AVIVO_D1OVL_CONTROL1, (pPixmap->drawable.bitsPerPixel == 32
? 2 : 1) |
+ ((pPixmap->drawable.bitsPerPixel == 16
? 1 : 0) << 8) |
+ (2 << 20) );
+ OUT_VIDEO_REG(AVIVO_D1OVL_CONTROL2, 0);
+ OUT_VIDEO_REG(AVIVO_D1OVL_SWAPCNTL, 0);
+
+ OUT_VIDEO_REG(AVIVO_D1OVL_MATRIX_TRANSFORM_EN, 0);
+
+ /* Address and pitch of the rendered picture */
+ OUT_VIDEO_REG(AVIVO_D1OVL_SURFACE_ADDRESS, dst_offset & ~1 );
+/* OUT_VIDEO_REG(AVIVO_D1OVL_PITCH, dst_pitch >> pixel_shift); */
+ OUT_VIDEO_REG(AVIVO_D1OVL_PITCH, dst_pitch / pPriv->cpp);

- OUT_VIDEO_REG(RADEON_PP_TXFILTER_0, RADEON_MAG_FILTER_LINEAR |
- RADEON_MIN_FILTER_LINEAR |
- RADEON_YUV_TO_RGB);
- OUT_VIDEO_REG(RADEON_PP_TXFORMAT_0, txformat);
- OUT_VIDEO_REG(RADEON_PP_TXOFFSET_0, pPriv->src_offset);
- OUT_VIDEO_REG(RADEON_PP_TXCBLEND_0,
- RADEON_COLOR_ARG_A_ZERO |
- RADEON_COLOR_ARG_B_ZERO |
- RADEON_COLOR_ARG_C_T0_COLOR |
- RADEON_BLEND_CTL_ADD |
- RADEON_CLAMP_TX);
- OUT_VIDEO_REG(RADEON_PP_TXABLEND_0,
- RADEON_ALPHA_ARG_A_ZERO |
- RADEON_ALPHA_ARG_B_ZERO |
- RADEON_ALPHA_ARG_C_T0_ALPHA |
- RADEON_BLEND_CTL_ADD |
- RADEON_CLAMP_TX);
-
- OUT_VIDEO_REG(RADEON_PP_TEX_SIZE_0,
- (pPriv->w - 1) |
- ((pPriv->h - 1) << RADEON_TEX_VSIZE_SHIFT));
- OUT_VIDEO_REG(RADEON_PP_TEX_PITCH_0,
- pPriv->src_pitch - 32);
- FINISH_VIDEO();
- }
-#endif /* IS_RADEON_DRIVER */
- }
+ /* Subimage position and corner points, clip image to actual crtc size */
+/*
+ OUT_VIDEO_REG(AVIVO_D1OVL_SURFACE_OFFSET_X, pPriv->act.x1 & ~255);
+ OUT_VIDEO_REG(AVIVO_D1OVL_SURFACE_OFFSET_Y, pPriv->act.y1 & ~1);
+*/
+ OUT_VIDEO_REG(AVIVO_D1OVL_SURFACE_OFFSET_X, 0);
+ OUT_VIDEO_REG(AVIVO_D1OVL_SURFACE_OFFSET_Y, 0);
+
+ OUT_VIDEO_REG(AVIVO_D1OVL_START, (pPriv->act.x1 << 16) | pPriv->act.y1);
+ OUT_VIDEO_REG(AVIVO_D1OVL_END, (pPriv->act.x2 << 16) | pPriv->act.y2);
+
+ /* Enable the overlay */
+ OUT_VIDEO_REG(AVIVO_D1OVL_ENABLE, 1);
+
+ /* Release the lock, tear-free update takes place in next V-sync! */
+ OUT_VIDEO_REG(AVIVO_D1OVL_UPDATE, 0);
+
+ FINISH_VIDEO();
+}

- while (nBox--) {
- int srcX, srcY, srcw, srch;
- int dstX, dstY, dstw, dsth;
- xPointFixed srcTopLeft, srcTopRight, srcBottomLeft, srcBottomRight;
-
- dstX = pBox->x1 + dstxoff;
- dstY = pBox->y1 + dstyoff;
- dstw = pBox->x2 - pBox->x1;
- dsth = pBox->y2 - pBox->y1;
-
- srcX = ((pBox->x1 - pPriv->drw_x) * pPriv->src_w) / pPriv->dst_w;
- srcY = ((pBox->y1 - pPriv->drw_y) * pPriv->src_h) / pPriv->dst_h;
-
- srcw = (pPriv->src_w * dstw) / pPriv->dst_w;
- srch = (pPriv->src_h * dsth) / pPriv->dst_h;
-
- srcTopLeft.x = IntToxFixed(srcX);
- srcTopLeft.y = IntToxFixed(srcY);
- srcTopRight.x = IntToxFixed(srcX + srcw);
- srcTopRight.y = IntToxFixed(srcY);
- srcBottomLeft.x = IntToxFixed(srcX);
- srcBottomLeft.y = IntToxFixed(srcY + srch);
- srcBottomRight.x = IntToxFixed(srcX + srcw);
- srcBottomRight.y = IntToxFixed(srcY + srch);
+void
+FUNC_NAME(RADEONDisplayOverlay)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
+{
+ VAR_PSCRN_PREAMBLE(pScrn);
+ THREEDSTATE_PREAMBLE();

+ CARD32 dst_offset, dst_pitch, dst_format;

-#if 0
- ErrorF("dst: %d, %d, %d, %d\n", dstX, dstY, dstw, dsth);
- ErrorF("src: %d, %d, %d, %d\n", srcX, srcY, srcw, srch);
-#endif
+ VIDEO_PREAMBLE();

-#ifdef ACCEL_CP
-# ifdef IS_RADEON_DRIVER
- if (info->ChipFamily < CHIP_FAMILY_R200) {
- BEGIN_RING(3 * VTX_DWORD_COUNT + 3);
- OUT_RING(CP_PACKET3(RADEON_CP_PACKET3_3D_DRAW_IMMD,
- 3 * VTX_DWORD_COUNT + 1));
- OUT_RING(RADEON_CP_VC_FRMT_XY |
- RADEON_CP_VC_FRMT_ST0);
- OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST |
- RADEON_CP_VC_CNTL_PRIM_WALK_RING |
- RADEON_CP_VC_CNTL_MAOS_ENABLE |
- RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
- (3 << RADEON_CP_VC_CNTL_NUM_SHIFT));
- } else
-# endif /* IS_RADEON_DRIVER */
- {
- if (IS_R300_3D || IS_R500_3D)
- BEGIN_RING(4 * VTX_DWORD_COUNT + 4);
- else
- BEGIN_RING(4 * VTX_DWORD_COUNT + 2);
- OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2,
- 4 * VTX_DWORD_COUNT));
- OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_QUAD_LIST |
- RADEON_CP_VC_CNTL_PRIM_WALK_RING |
- (4 << RADEON_CP_VC_CNTL_NUM_SHIFT));
- }
-#else /* ACCEL_CP */
- if (IS_R300_3D || IS_R500_3D)
- BEGIN_VIDEO(2 + VTX_DWORD_COUNT * 4);
-# ifdef IS_RADEON_DRIVER
- else if (info->ChipFamily < CHIP_FAMILY_R200)
- BEGIN_VIDEO(1 + VTX_DWORD_COUNT * 3);
-# endif /* IS_RADEON_DRIVER */
- else
- BEGIN_VIDEO(1 + VTX_DWORD_COUNT * 4);
+ /* Overlay tear proof video only for R300 !!!!! */

-# ifdef IS_RADEON_DRIVER
- if (info->ChipFamily < CHIP_FAMILY_R200) {
- OUT_VIDEO_REG(RADEON_SE_VF_CNTL,
(RADEON_VF_PRIM_TYPE_RECTANGLE_LIST |
- RADEON_VF_PRIM_WALK_DATA |
- RADEON_VF_RADEON_MODE |
- (3 <<
RADEON_VF_NUM_VERTICES_SHIFT)));
- } else
-# endif /* IS_RADEON_DRIVER */
- {
- OUT_VIDEO_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_QUAD_LIST |
- RADEON_VF_PRIM_WALK_DATA |
- (4 <<
RADEON_VF_NUM_VERTICES_SHIFT)));
- }
-#endif
+ /* Wait for old updates to be taken */
+ int n;

-# ifdef IS_RADEON_DRIVER
- if (info->ChipFamily >= CHIP_FAMILY_R200)
-# endif /* IS_RADEON_DRIVER */
- VTX_OUT((float)dstX,
(float)dstY,
- xFixedToFloat(srcTopLeft.x) / accel_state->texW[0],
xFixedToFloat(srcTopLeft.y) / accel_state->texH[0]);
- VTX_OUT((float)dstX, (float)(dstY
+ dsth),
- xFixedToFloat(srcBottomLeft.x) / accel_state->texW[0],
xFixedToFloat(srcBottomLeft.y) / accel_state->texH[0]);
- VTX_OUT((float)(dstX + dstw),
(float)(dstY + dsth),
- xFixedToFloat(srcBottomRight.x) / accel_state->texW[0],
xFixedToFloat(srcBottomRight.y) / accel_state->texH[0]);
- VTX_OUT((float)(dstX + dstw),
(float)dstY,
- xFixedToFloat(srcTopRight.x) / accel_state->texW[0],
xFixedToFloat(srcTopRight.y) / accel_state->texH[0]);
+ n = 0;
+ while ((n < 5000) && ((RHDRegRead(rhdPtr, AVIVO_D1OVL_UPDATE) & 1) == 1)) {
+ usleep(10);
+ n++;
+ }
+
+ /* Select updated buffer. */
+ if (pPriv->ovlCurrent != 0) {
+ dst_offset = pPriv->Ovl2BufferOffset + rhdPtr->FbIntAddress;
+ } else {
+ dst_offset = pPriv->Ovl1BufferOffset + rhdPtr->FbIntAddress;
+ }
+ dst_pitch = pPriv->OvlBufferPitch;

- if (IS_R300_3D || IS_R500_3D)
- /* flushing is pipelined, free/finish is not */
- OUT_VIDEO_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D);
+ /* Now setup the overlay part, command processor HAS to wait for rendering
to finish */

-#ifdef ACCEL_CP
- ADVANCE_RING();
-#else
- FINISH_VIDEO();
-#endif /* !ACCEL_CP */
+ BEGIN_VIDEO(2);
+ OUT_VIDEO_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D);
+ /* We must wait for 3d to idle, in case source was just written as a dest.
*/
+ OUT_VIDEO_REG(RADEON_WAIT_UNTIL,
+ RADEON_WAIT_HOST_IDLECLEAN |
+ RADEON_WAIT_2D_IDLECLEAN |
+ RADEON_WAIT_3D_IDLECLEAN |
+ RADEON_WAIT_DMA_GUI_IDLE);
+ FINISH_VIDEO();

- pBox++;
- }
+ BEGIN_VIDEO(14);

- if (IS_R300_3D || IS_R500_3D) {
- BEGIN_VIDEO(2);
- OUT_VIDEO_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_RB3D_DC_FLUSH_ALL);
- } else
- BEGIN_VIDEO(1);
- OUT_VIDEO_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN);
+ /* Lock overlay registers to prevent incomplete updates */
+ OUT_VIDEO_REG(AVIVO_D1OVL_UPDATE, (1 << 16));
+
+ /* Setup graphics/overlay blending */
+ /*
+ Per pixel overlay alpha mode
+ Pixel = overlay pixel + (1-PIX_ALPHA) * graphics pixel
+ PIX_ALPHA = 1 - alpha from graphics or overlay
+ */
+ OUT_VIDEO_REG(AVIVO_D1OVL_ALPHA_CONTROL, 3 |
+ (1 << 8) |
+ (1 << 16) );
+
+ /* 32bpp,
+ Z=0,
+ ACrYCb 8888,
+ disable multichip tile compacting,
+ physical memory,
+ no privileged access,
+ aligned linear array,
+ zero expansion for YCbCr */
+ OUT_VIDEO_REG(AVIVO_D1OVL_CONTROL1, (2) |
+ (0 << 4) |
+ (0 << 8) |
+ (0 << 12) |
+ (0 << 16) |
+ (0 << 17) |
+ (0 << 20) |
+ (0 << 24));
+ OUT_VIDEO_REG(AVIVO_D1OVL_CONTROL2, 0);
+ OUT_VIDEO_REG(AVIVO_D1OVL_SWAPCNTL, 0);
+
+ /* Convert YCbCr to RGB */
+ OUT_VIDEO_REG(AVIVO_D1OVL_MATRIX_TRANSFORM_EN, 1);
+
+
+ /* Address and pitch of the rendered picture */
+ OUT_VIDEO_REG(AVIVO_D1OVL_SURFACE_ADDRESS, dst_offset & ~1 );
+ OUT_VIDEO_REG(AVIVO_D1OVL_PITCH, dst_pitch / pPriv->cpp);
+
+ /* Subimage position and corner points, clip image to actual crtc size */
+ OUT_VIDEO_REG(AVIVO_D1OVL_SURFACE_OFFSET_X, 0);
+ OUT_VIDEO_REG(AVIVO_D1OVL_SURFACE_OFFSET_Y, 0);
+
+ OUT_VIDEO_REG(AVIVO_D1OVL_START, (pPriv->act.x1 << 16) | pPriv->act.y1);
+ OUT_VIDEO_REG(AVIVO_D1OVL_END, (pPriv->act.x2 << 16) | pPriv->act.y2);
+
+ /* Enable the overlay */
+ OUT_VIDEO_REG(AVIVO_D1OVL_ENABLE, 1);
+
+ /* Release the lock, tear-free update takes place in next V-sync! */
+ OUT_VIDEO_REG(AVIVO_D1OVL_UPDATE, 0);
+
FINISH_VIDEO();
+
+ /* Wait for CP to IDLE */
+/* RHDCSIdle(rhdPtr->CS);*/
+
+}

-#ifdef DAMAGE
- DamageDamageRegion(pPriv->pDraw, &pPriv->clip);
-#endif
+void
+FUNC_NAME(RADEONStopTexturedVideo)(ScrnInfoPtr pScrn, pointer data)
+{
+ VAR_PSCRN_PREAMBLE(pScrn);
+ THREEDSTATE_PREAMBLE();
+
+ VIDEO_PREAMBLE();
+
+ BEGIN_VIDEO(2);
+ OUT_VIDEO_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D);
+ /* We must wait for any processing to finish. */
+ OUT_VIDEO_REG(RADEON_WAIT_UNTIL,
+ RADEON_WAIT_HOST_IDLECLEAN |
+ RADEON_WAIT_2D_IDLECLEAN |
+ RADEON_WAIT_3D_IDLECLEAN |
+ RADEON_WAIT_DMA_GUI_IDLE);
+ FINISH_VIDEO();
+
+ if (!accel_state->XHas3DEngineState)
+ RADEONInit3DEngine(pScrn);
+
+ BEGIN_VIDEO(1);
+ OUT_VIDEO_REG(AVIVO_D1OVL_ENABLE, 0);
+ FINISH_VIDEO();
}

#undef VTX_OUT
+#undef VTX_OUT_FILTER
#undef FUNC_NAME
diff -u -r xf86-video-radeonhd/src/rhd_video.c
xf86-video-radeonhd-work/src/rhd_video.c
--- xf86-video-radeonhd/src/rhd_video.c 2009-04-24 19:32:07.000000000 +0200
+++ xf86-video-radeonhd-work/src/rhd_video.c 2009-05-03 16:53:08.000000000
+0200
@@ -28,6 +28,8 @@
*
*/

+#define RHD_VID_ALIGNMENT 0x0100
+
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
@@ -47,6 +49,7 @@

#include "rhd.h"
#include "rhd_cs.h"
+#include "rhd_crtc.h"

#include "r5xx_regs.h"

@@ -67,27 +70,70 @@
*
*/
static Bool
-rhdXvAllocateEXA(ScrnInfoPtr pScrn, struct RHDPortPriv *pPriv, int size)
+rhdXvAllocateEXA(ScrnInfoPtr pScrn, struct RHDPortPriv *pPriv, int SrcSize,
int OvlSize)
{
- ExaOffscreenArea *area = pPriv->BufferHandle;
+ ExaOffscreenArea *SrcArea = pPriv->SrcBufferHandle;
+ ExaOffscreenArea *Ovl1Area = pPriv->Ovl1BufferHandle;
+ ExaOffscreenArea *Ovl2Area = pPriv->Ovl2BufferHandle;
+
+ if (!(Ovl1Area && (Ovl1Area->size == OvlSize))) {
+ if (Ovl1Area)
+ exaOffscreenFree(pScrn->pScreen, Ovl1Area);
+
+ Ovl1Area = exaOffscreenAlloc(pScrn->pScreen, OvlSize, RHD_VID_ALIGNMENT,
+ TRUE, NULL, NULL);
+ if (!Ovl1Area) {
+ pPriv->Ovl1BufferHandle = NULL;
+ pPriv->Ovl1BufferOffset = 0;
+ return FALSE;
+ } else {
+ pPriv->Ovl1BufferHandle = Ovl1Area;
+ pPriv->Ovl1BufferOffset = Ovl1Area->offset +
RHDPTR(pScrn)->FbScanoutStart;
+ }
+
+ xf86DrvMsg(pScrn->scrnIndex, X_INFO,
+ "%s: Overlay buffer 1, %i bytes at %i\n", __func__, OvlSize,
pPriv->Ovl1BufferOffset);
+ }

- if (area && (area->size == size))
- return TRUE;
+ if (!(Ovl2Area && (Ovl2Area->size == OvlSize))) {
+ if (Ovl2Area)
+ exaOffscreenFree(pScrn->pScreen, Ovl2Area);
+
+ Ovl2Area = exaOffscreenAlloc(pScrn->pScreen, OvlSize, RHD_VID_ALIGNMENT,
+ TRUE, NULL, NULL);
+ if (!Ovl2Area) {
+ pPriv->Ovl2BufferHandle = NULL;
+ pPriv->Ovl2BufferOffset = 0;
+ return FALSE;
+ } else {
+ pPriv->Ovl2BufferHandle = Ovl2Area;
+ pPriv->Ovl2BufferOffset = Ovl2Area->offset +
RHDPTR(pScrn)->FbScanoutStart;

- if (area)
- exaOffscreenFree(pScrn->pScreen, area);
+ xf86DrvMsg(pScrn->scrnIndex, X_INFO,
+ "%s: Overlay buffer 2, %i bytes at %i\n", __func__, OvlSize,
pPriv->Ovl2BufferOffset);
+ }
+ }
+
+ if (!(SrcArea && (SrcArea->size == SrcSize))) {
+ if (SrcArea)
+ exaOffscreenFree(pScrn->pScreen, SrcArea);
+
+ SrcArea = exaOffscreenAlloc(pScrn->pScreen, SrcSize, RHD_VID_ALIGNMENT,
+ TRUE, NULL, NULL);
+ if (!SrcArea) {
+ pPriv->SrcBufferHandle = NULL;
+ pPriv->SrcBufferOffset = 0;
+ return FALSE;
+ } else {
+ pPriv->SrcBufferHandle = SrcArea;
+ pPriv->SrcBufferOffset = SrcArea->offset +
RHDPTR(pScrn)->FbScanoutStart;
+ }

- area = exaOffscreenAlloc(pScrn->pScreen, size, RHD_FB_ALIGNMENT,
- TRUE, NULL, NULL);
- if (!area) {
- pPriv->BufferHandle = NULL;
- pPriv->BufferOffset = 0;
- return FALSE;
- } else {
- pPriv->BufferHandle = area;
- pPriv->BufferOffset = area->offset + RHDPTR(pScrn)->FbScanoutStart;
- return TRUE;
+ xf86DrvMsg(pScrn->scrnIndex, X_INFO,
+ "%s: Source buffer, %i bytes at %i\n", __func__, SrcSize,
pPriv->SrcBufferOffset);
}
+
+ return TRUE;
}
#endif /* USE_EXA */

@@ -95,8 +141,10 @@
*
*/
static FBLinearPtr
-rhdXvAllocateXAAHelper(ScreenPtr pScreen, FBLinearPtr linear, int size)
+rhdXvAllocateXAAHelper(ScrnInfoPtr pScrn, FBLinearPtr linear, int size)
{
+ int cpp = pScrn->bitsPerPixel >> 3;
+
if (linear) {
if (linear->size == size)
return linear;
@@ -104,25 +152,38 @@
if (xf86ResizeOffscreenLinear(linear, size))
return linear;

+ xf86DrvMsg(pScrn->scrnIndex, X_INFO,
+ "%s: Freeing memory.\n", __func__);
+
xf86FreeOffscreenLinear(linear);
}

- linear = xf86AllocateOffscreenLinear(pScreen, size, 1,
+ xf86DrvMsg(pScrn->scrnIndex, X_INFO,
+ "%s: Allocating memory, %i bytes.\n", __func__, size * cpp);
+
+ linear = xf86AllocateOffscreenLinear(pScrn->pScreen, size, 1,
NULL, NULL, NULL);

if (!linear) {
int max_size;

- xf86QueryLargestOffscreenLinear(pScreen, &max_size, 1,
+ xf86QueryLargestOffscreenLinear(pScrn->pScreen, &max_size, 1,
PRIORITY_EXTREME);
+
+ xf86DrvMsg(pScrn->scrnIndex, X_INFO,
+ "%s: Maximum memory available, %i bytes.\n", __func__,
max_size * cpp);
+
if (max_size < size)
return NULL;

- xf86PurgeUnlockedOffscreenAreas(pScreen);
- linear = xf86AllocateOffscreenLinear(pScreen, size, 1,
+ xf86PurgeUnlockedOffscreenAreas(pScrn->pScreen);
+ linear = xf86AllocateOffscreenLinear(pScrn->pScreen, size, 1,
NULL, NULL, NULL);
}

+ xf86DrvMsg(pScrn->scrnIndex, X_INFO,
+ "%s: Memory allocated at %i.\n", __func__, linear->offset * cpp);
+
return linear;
}

@@ -130,28 +191,55 @@
*
*/
static Bool
-rhdXvAllocateXAA(ScrnInfoPtr pScrn, struct RHDPortPriv *pPriv, int size)
+rhdXvAllocateXAA(ScrnInfoPtr pScrn, struct RHDPortPriv *pPriv, int SrcSize,
int OvlSize)
{
int cpp = pScrn->bitsPerPixel >> 3;
- FBLinearPtr linear;
+ FBLinearPtr SrcLinear;
+ FBLinearPtr Ovl1Linear;
+ FBLinearPtr Ovl2Linear;

/* We need to do FB alignment manually */
- size += RHD_FB_ALIGNMENT - 1;
+ SrcSize += RHD_VID_ALIGNMENT - 1;
+ OvlSize += RHD_VID_ALIGNMENT - 1;

/* XAA allocates in units of pixels */
- size = (size + cpp - 1) / cpp;
+ SrcSize = (SrcSize + cpp - 1) / cpp;
+ OvlSize = (OvlSize + cpp - 1) / cpp;

- linear = rhdXvAllocateXAAHelper(pScrn->pScreen, pPriv->BufferHandle, size);
- if (!linear) {
- pPriv->BufferHandle = NULL;
- pPriv->BufferOffset = 0;
+ Ovl1Linear = rhdXvAllocateXAAHelper(pScrn, pPriv->Ovl1BufferHandle,
OvlSize);
+ Ovl2Linear = rhdXvAllocateXAAHelper(pScrn, pPriv->Ovl2BufferHandle,
OvlSize);
+ SrcLinear = rhdXvAllocateXAAHelper(pScrn, pPriv->SrcBufferHandle, SrcSize);
+
+ if (!SrcLinear) {
+ pPriv->SrcBufferHandle = NULL;
+ pPriv->SrcBufferOffset = 0;
+ return FALSE;
+ } else {
+ pPriv->SrcBufferHandle = SrcLinear;
+ pPriv->SrcBufferOffset = RHDPTR(pScrn)->FbScanoutStart +
+ RHD_FB_CHUNK(SrcLinear->offset * cpp + RHD_VID_ALIGNMENT - 1);
+ }
+
+ if (!Ovl1Linear) {
+ pPriv->Ovl1BufferHandle = NULL;
+ pPriv->Ovl1BufferOffset = 0;
+ return FALSE;
+ } else {
+ pPriv->Ovl1BufferHandle = Ovl1Linear;
+ pPriv->Ovl1BufferOffset = RHDPTR(pScrn)->FbScanoutStart +
+ RHD_FB_CHUNK(Ovl1Linear->offset * cpp + RHD_VID_ALIGNMENT - 1);
+ }
+
+ if (!Ovl2Linear) {
+ pPriv->Ovl2BufferHandle = NULL;
+ pPriv->Ovl2BufferOffset = 0;
return FALSE;
} else {
- pPriv->BufferHandle = linear;
- pPriv->BufferOffset = RHDPTR(pScrn)->FbScanoutStart +
- RHD_FB_CHUNK(linear->offset * cpp + RHD_FB_ALIGNMENT - 1);
- return TRUE;
+ pPriv->Ovl2BufferHandle = Ovl2Linear;
+ pPriv->Ovl2BufferOffset = RHDPTR(pScrn)->FbScanoutStart +
+ RHD_FB_CHUNK(Ovl2Linear->offset * cpp + RHD_VID_ALIGNMENT - 1);
}
+ return TRUE;
}

/*
@@ -160,18 +248,26 @@
static void
rhdStopVideo(ScrnInfoPtr pScrn, pointer data, Bool cleanup)
{
- if (cleanup) {
- struct RHDPortPriv *pPriv = data;
+ struct RHDPortPriv *pPriv = data;
+
+ RHDRADEONStopTexturedVideo(pScrn, pPriv);

+ if (cleanup) {
switch (RHDPTR(pScrn)->AccelMethod) {
#ifdef USE_EXA
case RHD_ACCEL_EXA:
exaOffscreenFree(pScrn->pScreen,
- (ExaOffscreenArea *) pPriv->BufferHandle);
+ (ExaOffscreenArea *) pPriv->SrcBufferHandle);
+ exaOffscreenFree(pScrn->pScreen,
+ (ExaOffscreenArea *) pPriv->Ovl2BufferHandle);
+ exaOffscreenFree(pScrn->pScreen,
+ (ExaOffscreenArea *) pPriv->Ovl1BufferHandle);
break;
#endif /* USE_EXA */
case RHD_ACCEL_XAA:
- xf86FreeOffscreenLinear((FBLinearPtr) pPriv->BufferHandle);
+ xf86FreeOffscreenLinear((FBLinearPtr) pPriv->SrcBufferHandle);
+ xf86FreeOffscreenLinear((FBLinearPtr) pPriv->Ovl2BufferHandle);
+ xf86FreeOffscreenLinear((FBLinearPtr) pPriv->Ovl1BufferHandle);
break;
default:
xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
@@ -179,8 +275,12 @@
break;
}

- pPriv->BufferHandle = NULL;
- pPriv->BufferOffset = 0;
+ pPriv->SrcBufferHandle = NULL;
+ pPriv->Ovl1BufferHandle = NULL;
+ pPriv->Ovl2BufferHandle = NULL;
+ pPriv->SrcBufferOffset = 0;
+ pPriv->Ovl1BufferOffset = 0;
+ pPriv->Ovl2BufferOffset = 0;
}
}

@@ -640,6 +740,157 @@
}
}

+
+/*
+ * Memory copy functions for non-scaled overlay playback
+ */
+static void
+R3xxPrepareSingleLine(RHDPtr rhdPtr,
+ CARD8 *dst,
+ CARD8 *src1,
+ CARD8 *src2,
+ CARD8 *src3,
+ int width,
+ int id)
+ {
+ /* Data is copied one line at a time.
+ *
+ * Target format:
+ *
+ * 32 bits/pixel A Cr Y Cb 8888 = A V Y U 8888
+ */
+
+ CARD32 *d = (CARD32 *) dst;
+ CARD8 *s1 = src1;
+ CARD8 *s2 = src2;
+ CARD8 *s3 = src3;
+ int i;
+
+ switch (id) {
+ case(FOURCC_UYVY):
+ /*
+ * 16 bits/pixel U0 Y0 V0 Y1 , U1 Y2 V1 Y3
+ * dst = target line buffer
+ * src1 = source line buffer
+ */
+ for (i = width / 2; i > 0; i --) {
+ /* U Y V */
+ d[0] = s1[0] | (s1[1] << 8) | (s1[2] << 16);
+ d[1] = s1[0] | (s1[3] << 8) | (s1[2] << 16);
+
+ d += 2;
+ s1 += 4;
+ }
+ break;
+ case(FOURCC_YUY2):
+ /*
+ * 16 bits/pixel Y0 U0 Y1 V0 , Y2, U1, Y3, V1
+ * dst = target line buffer
+ * src1 = source line buffer
+ */
+ for (i = width / 2; i > 0; i --) {
+ /* U Y V */
+ d[0] = s1[1] | (s1[0] << 8) | (s1[3] << 16);
+ d[1] = s1[1] | (s1[2] << 8) | (s1[3] << 16);
+
+ d += 2;
+ s1 += 4;
+ }
+ break;
+ case (FOURCC_YV12):
+ /*
+ * dst = target line buffer
+ * src1 = Y source line buffer
+ * src2 = U source line buffer
+ * src3 = V source line buffer
+ */
+ for (i = width / 2; i > 0; i --) {
+ /* U Y V */
+ d[0] = s3[0] | (s1[0] << 8) | (s2[0] << 16);
+ d[1] = s3[0] | (s1[1] << 8) | (s2[0] << 16);
+
+ d += 2;
+ s1 += 2;
+ s2 += 1;
+ s3 += 1;
+ }
+ break;
+ case (FOURCC_I420):
+ /* Not supported yet */
+ break;
+ }
+}
+
+static void
+R3xxXvCopyOverlay(RHDPtr rhdPtr,
+ CARD8 *dst,
+ CARD8 *src,
+ CARD16 dstPitch,
+ CARD32 dstOffset,
+ CARD16 srcPitch,
+ CARD16 w,
+ CARD16 h,
+ int id)
+{
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+ CARD32 val, newval;
+ val = RHDRegRead(rhdPtr, R5XX_SURFACE_CNTL);
+ newval = val &
+ ~(R5XX_NONSURF_AP0_SWP_32BPP | R5XX_NONSURF_AP1_SWP_32BPP |
+ R5XX_NONSURF_AP0_SWP_16BPP | R5XX_NONSURF_AP1_SWP_16BPP);
+ RHDRegWrite(rhdPtr, R5XX_SURFACE_CNTL, newval);
+#endif
+
+ CARD8 *dst1;
+ CARD8 *src1 = src;
+ CARD8 *src2 = src1 + (w * h);
+ CARD8 *src3 = src2 + ((w * h) / 4);
+
+ int n = 0;
+
+ dst1 = dst + dstOffset;
+
+ while (h--) {
+ R3xxPrepareSingleLine(rhdPtr,
+ dst1,
+ src1,
+ src2,
+ src3,
+ w,
+ id);
+ src1 += srcPitch;
+
+ if (n & 1) {
+ src2 += srcPitch / 2;
+ src3 += srcPitch / 2;
+ }
+ n++;
+
+ dst1 += dstPitch;
+ }
+
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+ /* restore byte swapping */
+ RHDRegWrite(rhdPtr, R5XX_SURFACE_CNTL, val);
+#endif
+}
+
+/*
+ *
+ */
+
+void
+radeon_box_intersectB(BoxPtr dest, BoxPtr a, BoxPtr b)
+{
+ dest->x1 = a->x1 > b->x1 ? a->x1 : b->x1;
+ dest->x2 = a->x2 < b->x2 ? a->x2 : b->x2;
+ dest->y1 = a->y1 > b->y1 ? a->y1 : b->y1;
+ dest->y2 = a->y2 < b->y2 ? a->y2 : b->y2;
+
+ if (dest->x1 >= dest->x2 || dest->y1 >= dest->y2)
+ dest->x1 = dest->x2 = dest->y1 = dest->y2 = 0;
+}
+
/*
*
*/
@@ -659,11 +910,18 @@
DrawablePtr pDraw)
{
RHDPtr rhdPtr = RHDPTR(pScrn);
+
+ struct rhdCrtc *Crtc;
+
struct RHDPortPriv *pPriv = data;
CARD8 *FBBuf;

+ int SrcBufferSize, OvlBufferSize;
+ BoxRec CrtcBox;
+
+
/*
- * First, make sure we can render to the drawable.
+ * First, make sure we can render to the drawable. (NEEDED!!!)
*/
if (pDraw->type == DRAWABLE_WINDOW)
pPriv->pPixmap = (*pScrn->pScreen->GetWindowPixmap)((WindowPtr)pDraw);
@@ -671,32 +929,63 @@
pPriv->pPixmap = (PixmapPtr)pDraw;


-#if defined(USE_EXA) && ((EXA_VERSION_MAJOR > 2) || (EXA_VERSION_MAJOR == 2 &&
EXA_VERSION_MINOR >= 1))
- if (rhdPtr->AccelMethod == RHD_ACCEL_EXA) {
- /* Force the pixmap into framebuffer so we can draw to it. */
- exaMoveInPixmap(pPriv->pPixmap);
- } else
-#endif
- /*
- * TODO: Copy the pixmap into the FB ourselves!!!
- */
- if (((rhdPtr->AccelMethod != RHD_ACCEL_NONE) || (rhdPtr->AccelMethod !=
RHD_ACCEL_SHADOWFB)) &&
- (((char *)pPriv->pPixmap->devPrivate.ptr < ((char *)rhdPtr->FbBase +
rhdPtr->FbScanoutStart)) ||
- ((char *)pPriv->pPixmap->devPrivate.ptr >= ((char *)rhdPtr->FbBase +
rhdPtr->FbMapSize)))) {
- /* If the pixmap wasn't in framebuffer, then we have no way to
- * force it there. So, we simply refuse to draw and fail.
- */
- xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
- "%s: pixmap is not in Framebuffer!\n", __func__);
- return BadAlloc;
- }

+ /* For overlay two buffers are reserved. Size is determined by the actual
Crtc size.
+ Right now only Crtc[0] is supported, if the video is not in this area
there will be no output.
+ Extending the support for Crtc[1] is not too complicated but I first
want to have this part working. */
+
+ /* Put all the stuff in BoxRec's */
+ /* Source */
+ pPriv->vid.x1 = src_x;
+ pPriv->vid.y1 = src_y;
+ pPriv->vid.x2 = src_x + src_w;
+ pPriv->vid.y2 = src_y + src_h;
+
+ /* Destination */
+ pPriv->drw.x1 = drw_x;
+ pPriv->drw.y1 = drw_y;
+ pPriv->drw.x2 = drw_x + drw_w;
+ pPriv->drw.y2 = drw_y + drw_h;
+
+ /* Source dimensions */
+ pPriv->w = width;
+ pPriv->h = height;
+
+ /* Other stuff */
+ pPriv->id = id;
pPriv->pDraw = pDraw;

+ /* Crtc[0] BoxRec and buffer size/pitch, take one additional line for
alignment margin (needed??). */
+ Crtc = rhdPtr->Crtc[0];
+ if (Crtc && Crtc->Active) {
+ CrtcBox.x1 = Crtc->X;
+ CrtcBox.y1 = Crtc->Y;
+ CrtcBox.x2 = Crtc->X + Crtc->CurrentMode->CrtcHDisplay;
+ CrtcBox.y2 = Crtc->Y + Crtc->CurrentMode->CrtcVDisplay;
+ pPriv->cpp = pScrn->bitsPerPixel >> 3;;
+ pPriv->OvlBufferPitch = ALIGN(Crtc->CurrentMode->CrtcHDisplay *
pPriv->cpp, 256);
+ OvlBufferSize = pPriv->OvlBufferPitch *
(Crtc->CurrentMode->CrtcVDisplay + 1);
+
+ } else {
+ CrtcBox.x1 = 0;
+ CrtcBox.y1 = 0;
+ CrtcBox.x2 = 0;
+ CrtcBox.y2 = 0;
+ pPriv->OvlBufferPitch = 0;
+ OvlBufferSize = 0;
+ }
+
+ /* Source buffer size/pitch, take one additional line for alignment margin
(needed??). */
if (rhdPtr->ChipSet >= RHD_R600)
- pPriv->BufferPitch = ALIGN(2 * width, 256);
+ pPriv->SrcBufferPitch = ALIGN(2 * width, 256);
else
- pPriv->BufferPitch = ALIGN(2 * width, 64);
+ pPriv->SrcBufferPitch = ALIGN(2 * width, 64);
+ SrcBufferSize = pPriv->SrcBufferPitch * (height + 1);
+
+ /* Create a BoxRec containing the overlapping area of the Crtc and
destination.
+ This is the area actualy rendered for this Crtc */
+ radeon_box_intersectB(&pPriv->act, &CrtcBox, &pPriv->drw);
+

/*
* Now, find out whether we have enough memory available.
@@ -704,34 +993,78 @@
switch (rhdPtr->AccelMethod) {
#ifdef USE_EXA
case RHD_ACCEL_EXA:
- rhdXvAllocateEXA(pScrn, pPriv, 2 * pPriv->BufferPitch * height);
+ rhdXvAllocateEXA(pScrn, pPriv, SrcBufferSize, OvlBufferSize);
break;
#endif /* USE_EXA */
case RHD_ACCEL_XAA:
- rhdXvAllocateXAA(pScrn, pPriv, 2 * pPriv->BufferPitch * height);
+ rhdXvAllocateXAA(pScrn, pPriv, SrcBufferSize, OvlBufferSize);
break;
default:
- pPriv->BufferHandle = NULL;
- pPriv->BufferOffset = 0;
+ pPriv->SrcBufferHandle = NULL;
+ pPriv->SrcBufferOffset = 0;
+ pPriv->Ovl1BufferHandle = NULL;
+ pPriv->Ovl1BufferOffset = 0;
+ pPriv->Ovl2BufferHandle = NULL;
+ pPriv->Ovl2BufferOffset = 0;
break;
}

- if (!pPriv->BufferHandle) {
+ if (!pPriv->SrcBufferHandle ||
+ !pPriv->Ovl1BufferHandle ||
+ !pPriv->Ovl2BufferHandle) {
xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
"%s: Failed to allocate framebuffer memory.\n", __func__);
return BadAlloc;
}

- if (rhdPtr->ChipSet >= RHD_R600)
- pPriv->BufferOffset = (pPriv->BufferOffset + 255) & ~255;
+ /* Special situation if no scaling is needed */
+ /* HIGHLY EXPERIMENTAL */
+ if ((src_w == drw_w) && (src_h == drw_h)) {
+ /* Special processing */
+
+ CARD32 OvlOffset;
+
+ /* Select buffer for off-screen overlay buffer destination. Togle
between 1 and 2 */
+ if (pPriv->ovlCurrent != 0) {
+ FBBuf = (CARD8 *)rhdPtr->FbBase + pPriv->Ovl1BufferOffset;
+ pPriv->ovlCurrent = 0;
+ } else {
+ FBBuf = (CARD8 *)rhdPtr->FbBase + pPriv->Ovl2BufferOffset;
+ pPriv->ovlCurrent = 1;
+ }
+
+
+ OvlOffset = (pPriv->act.x1 * 4) + (pPriv->act.y1 *
pPriv->OvlBufferPitch);
+
+ /* Needs fixing in case of sub section of big image displayed !! */
+ R3xxXvCopyOverlay(rhdPtr,
+ FBBuf,
+ buf,
+ pPriv->OvlBufferPitch,
+ OvlOffset,
+ width, /* source pitch */
+ width,
+ height,
+ id);
+
+ RHDRADEONDisplayOverlay(pScrn, pPriv);
+/*
+ xf86DrvMsg(pScrn->scrnIndex, X_INFO,
+ "%s: Buffer address: %i, OvlPitch: %i, OvlOffset: %i, width:
%i, height: %i, id: %i.\n",
+ __func__, FBBuf, pPriv->OvlBufferPitch,
+ (pPriv->act.x1 * 4) + (pPriv->OvlBufferPitch *
pPriv->act.y1),
+ width, height, id);
+*/
+ return Success;
+ }

/*
- * Now copy the buffer to the framebuffer, and convert to planar when
necessary.
+ * Now copy the source image to the framebuffer, and convert to planar
when necessary.
*/
if (rhdPtr->ChipSet >= RHD_R600)
- FBBuf = (CARD8 *)rhdPtr->FbBase + rhdPtr->FbScanoutStart +
pPriv->BufferOffset;
+ FBBuf = (CARD8 *)rhdPtr->FbBase + rhdPtr->FbScanoutStart +
pPriv->SrcBufferOffset;
else
- FBBuf = (CARD8 *)rhdPtr->FbBase + pPriv->BufferOffset;
+ FBBuf = (CARD8 *)rhdPtr->FbBase + pPriv->SrcBufferOffset;

switch(id) {
case FOURCC_YV12:
@@ -744,48 +1077,51 @@

if (id == FOURCC_YV12) {
if (rhdPtr->ChipSet >= RHD_R600) {
- pPriv->BufferPitch = ALIGN(width, 256);
+ pPriv->SrcBufferPitch = ALIGN(width, 256);
if (rhdPtr->cardType != RHD_CARD_AGP)
R600CopyPlanarHW(pScrn, buf, buf + s3offset, buf +
s2offset,
- pPriv->BufferOffset +
rhdPtr->FbIntAddress + rhdPtr->FbScanoutStart,
- srcPitch, srcPitch2,
pPriv->BufferPitch,
+ pPriv->SrcBufferOffset +
rhdPtr->FbIntAddress + rhdPtr->FbScanoutStart,
+ srcPitch, srcPitch2,
pPriv->SrcBufferPitch,
width, height);
else
R600CopyPlanarSW(pScrn, buf, buf + s3offset, buf +
s2offset,
FBBuf,
- srcPitch, srcPitch2,
pPriv->BufferPitch,
+ srcPitch, srcPitch2,
pPriv->SrcBufferPitch,
width, height);
} else if (rhdPtr->CS->Type == RHD_CS_CPDMA)
R5xxXvCopyPlanarDMA(rhdPtr, buf, buf + s2offset,
buf + s3offset, FBBuf, srcPitch,
- srcPitch2, pPriv->BufferPitch,
+ srcPitch2, pPriv->SrcBufferPitch,
height, width);
else
R5xxXvCopyPlanar(rhdPtr, buf, buf + s2offset,
buf + s3offset, FBBuf, srcPitch,
- srcPitch2, pPriv->BufferPitch,
+ srcPitch2, pPriv->SrcBufferPitch,
height, width);
} else {
if (rhdPtr->ChipSet >= RHD_R600) {
if (rhdPtr->cardType != RHD_CARD_AGP)
R600CopyPlanarHW(pScrn, buf, buf + s2offset, buf +
s3offset,
- pPriv->BufferOffset +
rhdPtr->FbIntAddress + rhdPtr->FbScanoutStart,
- srcPitch, srcPitch2,
pPriv->BufferPitch,
+ pPriv->SrcBufferOffset +
rhdPtr->FbIntAddress + rhdPtr->FbScanoutStart,
+ srcPitch, srcPitch2,
pPriv->SrcBufferPitch,
width, height);
else
R600CopyPlanarSW(pScrn, buf, buf + s2offset, buf +
s3offset,
FBBuf,
- srcPitch, srcPitch2,
pPriv->BufferPitch,
+ srcPitch, srcPitch2,
pPriv->SrcBufferPitch,
width, height);
- } else if (rhdPtr->CS->Type == RHD_CS_CPDMA)
+ }
+/* Why use the CP? This means copying the image twice in memory!
+ else if (rhdPtr->CS->Type == RHD_CS_CPDMA)
R5xxXvCopyPlanarDMA(rhdPtr, buf, buf + s3offset,
buf + s2offset, FBBuf, srcPitch,
- srcPitch2, pPriv->BufferPitch,
+ srcPitch2, pPriv->SrcBufferPitch,
height, width);
+*/
else
R5xxXvCopyPlanar(rhdPtr, buf, buf + s3offset,
buf + s2offset, FBBuf, srcPitch,
- srcPitch2, pPriv->BufferPitch,
+ srcPitch2, pPriv->SrcBufferPitch,
height, width);
}
}
@@ -794,25 +1130,29 @@
case FOURCC_YUY2:
default:
if (rhdPtr->ChipSet >= RHD_R600) {
- pPriv->BufferPitch = ALIGN(2 * width, 256);
+ pPriv->SrcBufferPitch = ALIGN(2 * width, 256);
if (rhdPtr->cardType != RHD_CARD_AGP)
- R600CopyPackedHW(pScrn, buf, pPriv->BufferOffset +
rhdPtr->FbIntAddress + rhdPtr->FbScanoutStart,
- 2 * width, pPriv->BufferPitch,
+ R600CopyPackedHW(pScrn, buf, pPriv->SrcBufferOffset +
rhdPtr->FbIntAddress + rhdPtr->FbScanoutStart,
+ 2 * width, pPriv->SrcBufferPitch,
width, height);
else
R600CopyPackedSW(pScrn, buf, FBBuf,
- 2 * width, pPriv->BufferPitch,
+ 2 * width, pPriv->SrcBufferPitch,
width, height);
} else if (rhdPtr->CS->Type == RHD_CS_CPDMA)
R5xxXvCopyPackedDMA(rhdPtr, buf, FBBuf, 2 * width,
- pPriv->BufferPitch, height);
+ pPriv->SrcBufferPitch, height);
else
R5xxXvCopyPacked(rhdPtr, buf, FBBuf, 2 * width,
- pPriv->BufferPitch, height);
+ pPriv->SrcBufferPitch, height);
break;
}

/*
+ * Bicubic filter setup (not available yet)
+ */
+
+ /*
* Update cliplist
*/
if (!REGION_EQUAL(pScrn->pScreen, &pPriv->clip, clipBoxes))
@@ -821,16 +1161,6 @@
/*
* Now let the 3D engine work its magic.
*/
- pPriv->id = id;
- pPriv->src_w = src_w;
- pPriv->src_h = src_h;
- pPriv->drw_x = drw_x;
- pPriv->drw_y = drw_y;
- pPriv->dst_w = drw_w;
- pPriv->dst_h = drw_h;
- pPriv->w = width;
- pPriv->h = height;
-
if (rhdPtr->ChipSet >= RHD_R600)
R600DisplayTexturedVideo(pScrn, pPriv);
else
@@ -886,16 +1216,18 @@
struct RHDPortPriv *pPortPriv;
XF86VideoAdaptorPtr adapt;
int i;
- int num_texture_ports = 16;
+ int num_texture_ports = 1;

RHDFUNC(pScrn);

+ /* VIDEO_OVERLAID_IMAGES is needed here, without the StopVideo function is
never called */
+
adapt = xnfcalloc(1, sizeof(XF86VideoAdaptorRec) + num_texture_ports *
(sizeof(struct RHDPortPriv) + sizeof(DevUnion)));

adapt->type = XvWindowMask | XvInputMask | XvImageMask;
- adapt->flags = 0;
- adapt->name = "RadeonHD Textured Video";
+ adapt->flags = VIDEO_OVERLAID_IMAGES;
+ adapt->name = "RadeonHD Textured Overlay Video";
adapt->nEncodings = 1;

if ((rhdPtr->ChipSet == RHD_RS690) || (rhdPtr->ChipSet == RHD_RS600) ||
@@ -935,6 +1267,9 @@

/* gotta uninit this someplace, XXX: shouldn't be necessary for
textured */
REGION_NULL(pScreen, &pPriv->clip);
+
+ pPriv->bicubic_state = BICUBIC_OFF;
+ pPriv->bicubic_enabled = FALSE;

adapt->pPortPrivates[i].ptr = (pointer) (pPriv);
}
diff -u -r xf86-video-radeonhd/src/rhd_video.h
xf86-video-radeonhd-work/src/rhd_video.h
--- xf86-video-radeonhd/src/rhd_video.h 2009-04-24 19:32:07.000000000 +0200
+++ xf86-video-radeonhd-work/src/rhd_video.h 2009-05-03 13:06:58.000000000
+0200
@@ -27,32 +27,69 @@
#define _RHD_VIDEO_H

/* seriously ?! @#$%% */
+/*
# define uint32_t CARD32
# define uint64_t CARD64
+*/

/* Xvideo port struct */
struct RHDPortPriv {
DrawablePtr pDraw;
PixmapPtr pPixmap;
-
RegionRec clip;

- void *BufferHandle;
- CARD32 BufferOffset;
- CARD32 BufferPitch;
+ /* Images in frame buffer memory */
+ void *SrcBufferHandle;
+ CARD32 SrcBufferOffset;
+ CARD32 SrcBufferPitch;
+
+ void *Ovl1BufferHandle;
+ void *Ovl2BufferHandle;
+ CARD32 Ovl1BufferOffset;
+ CARD32 Ovl2BufferOffset;
+ CARD32 OvlBufferPitch;
+
+ /* Textured video */
+ int ovlCurrent;
+
+ /* bicubic filtering */
+ void *bicubic_memory;
+ int bicubic_offset;
+ Bool bicubic_enabled;
+ CARD32 bicubic_src_offset;
+ int bicubic_state;
+#define BICUBIC_OFF 0
+#define BICUBIC_ON 1
+#define BICUBIC_AUTO 2
+
+ /* textured video */
+ Bool textured;

int id;
+ BoxRec vid, drw, act;
+ int cpp;
+ int w, h;
+
+ /* Other video */
+ void *video_memory;
+ int video_offset;
+
+ /* these need to go... */
int src_w;
int src_h;
int dst_w;
int dst_h;
- int w;
- int h;
int drw_x;
int drw_y;
+
+ CARD32 src_offset;
+ CARD32 src_pitch;
+ CARD8 *src_addr;
};

extern void RHDRADEONDisplayTexturedVideo(ScrnInfoPtr pScrn, struct
RHDPortPriv *pPriv);
+extern void RHDRADEONDisplayOverlay(ScrnInfoPtr pScrn, struct RHDPortPriv
*pPriv);
+extern void RHDRADEONStopTexturedVideo(ScrnInfoPtr pScrn, pointer data);
extern void RHDInitVideo(ScreenPtr pScreen);
extern void R600DisplayTexturedVideo(ScrnInfoPtr pScrn, struct RHDPortPriv
*pPriv);
extern Bool
< Previous Next >
Follow Ups