#!/bin/sh
# pocl-build - Compile a OpenCL kernel to bytecode using clang.
#
# Copyright (c) 2011 Universidad Rey Juan Carlos and
#               2012 Pekka Jääskeläinen / TUT
# 
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
# 
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
# 
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.

#echo 0=$0 @=$@
#set -x
#set -e

target=i686-redhat-linux-gnu

while getopts t:o: o
do
    case "$o" in
	t)   target="$OPTARG";;
	o)   output_file="$OPTARG";;
	[?]) echo >&2 "Usage: $0 [-t <llvm_target_triplet>] -o output input" && exit 1;;
    esac
done
shift $((${OPTIND}-1))

if [ "x$output_file" = x ]
then
    echo >&2 "Usage: $0 [-t <llvm_target_triplet>] -o output input" && exit 1
fi

target_dir=${target}
case $target in
  arm*gnueabihf)    target_dir="arm"
                    target="armv7l-unknown-linux-gnueabihf";;
  arm*)        target_dir="arm"
               target="armv7";;
  cellspu-*)   target_dir="cellspu";;
  powerpc-*) target_dir="powerpc";;
  powerpc64-*) target_dir="powerpc64";;
  tce-*)       target_dir="tce";;
  amd64-*|x86*|i?86-*)    target_dir="x86_64";;
esac

case $target in
  arm*)     CLANG_FLAGS="";;
  i686-redhat-linux-gnu) CLANG_FLAGS="";;
  i686-redhat-linux-gnu)   CLANG_FLAGS="";;
esac
CLANG_FLAGS="$CLANG_FLAGS -fasm -fsigned-char -Xclang -ffake-address-space-map -target $target "

# With fp-contract we get calls to fma with processors which do not
# have fma instructions. These ruin the performance. Better to have
# the mul+add separated in the IR.
CLANG_FLAGS="$CLANG_FLAGS -ffp-contract=off "

preprocessed_file=${output_file}.i

/usr/bin/clang ${EXTRA_CPPFLAGS} ${USER_OPTIONS} ${CLANG_FLAGS}  -E -include /usr/include/pocl/${target_dir}/types.h -include /usr/include/pocl/_kernel.h -o ${preprocessed_file} -x cl $1

# Use -fno-builtin to avoid clang producing extra memcpys etc. from
# kernel loops. Otherwise, it a) ruins wg-vectorization b) causes issues with embedded 
# devices such as TCE which do not have OS and a system linker.
/usr/bin/clang -fblocks -fno-builtin ${CLANG_FLAGS}  -c -emit-llvm -o ${output_file} -x cl ${preprocessed_file}

