Project Ne10
An Open Optimized Software Library Project for the ARM Architecture
Loading...
Searching...
No Matches
NE10_addmat.c
1/*
2 * Copyright 2011-15 ARM Limited and Contributors.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of ARM Limited nor the
13 * names of its contributors may be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED AND CONTRIBUTORS BE LIABLE FOR ANY
20 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28/*
29 * NE10 Library : math/NE10_addmat.c
30 */
31
32#include "NE10_types.h"
33#include "macros.h"
34
35#include <assert.h>
36
37ne10_result_t ne10_addmat_2x2f_c (ne10_mat2x2f_t * dst, ne10_mat2x2f_t * src1, ne10_mat2x2f_t * src2, ne10_uint32_t count)
38{
39 NE10_X_OPERATION_FLOAT_C
40 (
41 dst[ itr ].c1.r1 = src1[ itr ].c1.r1 + src2[ itr ].c1.r1;
42 dst[ itr ].c1.r2 = src1[ itr ].c1.r2 + src2[ itr ].c1.r2;
43
44 dst[ itr ].c2.r1 = src1[ itr ].c2.r1 + src2[ itr ].c2.r1;
45 dst[ itr ].c2.r2 = src1[ itr ].c2.r2 + src2[ itr ].c2.r2;
46 );
47}
48
49ne10_result_t ne10_addmat_3x3f_c (ne10_mat3x3f_t * dst, ne10_mat3x3f_t * src1, ne10_mat3x3f_t * src2, ne10_uint32_t count)
50{
51 NE10_X_OPERATION_FLOAT_C
52 (
53 dst[ itr ].c1.r1 = src1[ itr ].c1.r1 + src2[ itr ].c1.r1;
54 dst[ itr ].c1.r2 = src1[ itr ].c1.r2 + src2[ itr ].c1.r2;
55 dst[ itr ].c1.r3 = src1[ itr ].c1.r3 + src2[ itr ].c1.r3;
56
57 dst[ itr ].c2.r1 = src1[ itr ].c2.r1 + src2[ itr ].c2.r1;
58 dst[ itr ].c2.r2 = src1[ itr ].c2.r2 + src2[ itr ].c2.r2;
59 dst[ itr ].c2.r3 = src1[ itr ].c2.r3 + src2[ itr ].c2.r3;
60
61 dst[ itr ].c3.r1 = src1[ itr ].c3.r1 + src2[ itr ].c3.r1;
62 dst[ itr ].c3.r2 = src1[ itr ].c3.r2 + src2[ itr ].c3.r2;
63 dst[ itr ].c3.r3 = src1[ itr ].c3.r3 + src2[ itr ].c3.r3;
64 );
65}
66
67ne10_result_t ne10_addmat_4x4f_c (ne10_mat4x4f_t * dst, ne10_mat4x4f_t * src1, ne10_mat4x4f_t * src2, ne10_uint32_t count)
68{
69 NE10_X_OPERATION_FLOAT_C
70 (
71 dst[ itr ].c1.r1 = src1[ itr ].c1.r1 + src2[ itr ].c1.r1;
72 dst[ itr ].c1.r2 = src1[ itr ].c1.r2 + src2[ itr ].c1.r2;
73 dst[ itr ].c1.r3 = src1[ itr ].c1.r3 + src2[ itr ].c1.r3;
74 dst[ itr ].c1.r4 = src1[ itr ].c1.r4 + src2[ itr ].c1.r4;
75
76 dst[ itr ].c2.r1 = src1[ itr ].c2.r1 + src2[ itr ].c2.r1;
77 dst[ itr ].c2.r2 = src1[ itr ].c2.r2 + src2[ itr ].c2.r2;
78 dst[ itr ].c2.r3 = src1[ itr ].c2.r3 + src2[ itr ].c2.r3;
79 dst[ itr ].c2.r4 = src1[ itr ].c2.r4 + src2[ itr ].c2.r4;
80
81 dst[ itr ].c3.r1 = src1[ itr ].c3.r1 + src2[ itr ].c3.r1;
82 dst[ itr ].c3.r2 = src1[ itr ].c3.r2 + src2[ itr ].c3.r2;
83 dst[ itr ].c3.r3 = src1[ itr ].c3.r3 + src2[ itr ].c3.r3;
84 dst[ itr ].c3.r4 = src1[ itr ].c3.r4 + src2[ itr ].c3.r4;
85
86 dst[ itr ].c4.r1 = src1[ itr ].c4.r1 + src2[ itr ].c4.r1;
87 dst[ itr ].c4.r2 = src1[ itr ].c4.r2 + src2[ itr ].c4.r2;
88 dst[ itr ].c4.r3 = src1[ itr ].c4.r3 + src2[ itr ].c4.r3;
89 dst[ itr ].c4.r4 = src1[ itr ].c4.r4 + src2[ itr ].c4.r4;
90 );
91}