# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
#
# Includes code assembled from BSD/MIT/Apache-licensed code from some 3rd-party
# projects, including Kudu, Impala, and libdynd. See python/LICENSE.txt

cmake_minimum_required(VERSION 3.16)
project(pyarrow)

# This is needed for 3.13 free-threading. CMake used to add Python
# include directories with `-isystem`, which led to some Python-internal
# includes to resolve to normal 3.13 includes (cause -isystem includes
# are searched after system directories), instead of 3.13-freethreading,
# which in turn meant that Py_GIL_DISABLED was not set.
set(CMAKE_NO_SYSTEM_FROM_IMPORTED ON)

set(PYARROW_VERSION "18.0.0")
string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" PYARROW_BASE_VERSION "${PYARROW_VERSION}")

# Running from a Python sdist tarball
set(LOCAL_CMAKE_MODULES "${CMAKE_SOURCE_DIR}/cmake_modules")
if(EXISTS "${LOCAL_CMAKE_MODULES}")
  set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${LOCAL_CMAKE_MODULES})
endif()

# Running from a git source tree
set(CPP_CMAKE_MODULES "${CMAKE_SOURCE_DIR}/../cpp/cmake_modules")
if(EXISTS "${CPP_CMAKE_MODULES}")
  set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CPP_CMAKE_MODULES})
endif()

if(PYARROW_CPP_HOME)
  list(INSERT CMAKE_PREFIX_PATH 0 "${PYARROW_CPP_HOME}")
endif()

include(CMakeParseArguments)

# MACOSX_RPATH is enabled by default.
# https://www.cmake.org/cmake/help/latest/policy/CMP0042.html
cmake_policy(SET CMP0042 NEW)

# Only interpret if() arguments as variables or keywords when unquoted.
# https://www.cmake.org/cmake/help/latest/policy/CMP0054.html
cmake_policy(SET CMP0054 NEW)

# RPATH settings on macOS do not affect install_name.
# https://cmake.org/cmake/help/latest/policy/CMP0068.html
if(POLICY CMP0068)
  cmake_policy(SET CMP0068 NEW)
endif()

# find_package() uses <PackageName>_ROOT variables.
# https://cmake.org/cmake/help/latest/policy/CMP0074.html
if(POLICY CMP0074)
  cmake_policy(SET CMP0074 NEW)
endif()

# RPATH entries are properly escaped in the intermediary CMake install script.
# https://cmake.org/cmake/help/latest/policy/CMP0095.html
if(POLICY CMP0095)
  cmake_policy(SET CMP0095 NEW)
endif()

# Use the first Python installation on PATH, not the newest one
set(Python3_FIND_STRATEGY "LOCATION")
# On Windows, use registry last, not first
set(Python3_FIND_REGISTRY "LAST")
# On macOS, use framework last, not first
set(Python3_FIND_FRAMEWORK "LAST")

# Allow "make install" to not depend on all targets.
#
# Must be declared in the top-level CMakeLists.txt.
set(CMAKE_SKIP_INSTALL_ALL_DEPENDENCY true)

set(CMAKE_MACOSX_RPATH 1)
if(DEFINED ENV{MACOSX_DEPLOYMENT_TARGET})
  set(CMAKE_OSX_DEPLOYMENT_TARGET $ENV{MACOSX_DEPLOYMENT_TARGET})
else()
  set(CMAKE_OSX_DEPLOYMENT_TARGET 12.0)
endif()

# Generate a Clang compile_commands.json "compilation database" file for use
# with various development tools, such as Vim's YouCompleteMe plugin.
# See http://clang.llvm.org/docs/JSONCompilationDatabase.html
if("$ENV{CMAKE_EXPORT_COMPILE_COMMANDS}" STREQUAL "1")
  set(CMAKE_EXPORT_COMPILE_COMMANDS 1)
endif()

if(UNIX)
  set(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE)
  # In the event that we are bundling the shared libraries (e.g. in a
  # manylinux1 wheel), we need to set the RPATH of the extensions to the
  # root of the pyarrow/ package so that libarrow is able to be
  # loaded properly
  if(APPLE)
    set(CMAKE_INSTALL_NAME_DIR "@rpath")
    set(CMAKE_INSTALL_RPATH "@loader_path/")
  else()
    set(CMAKE_INSTALL_RPATH "\$ORIGIN")
  endif()
endif()

find_program(CCACHE_FOUND ccache)
if(CCACHE_FOUND
   AND NOT CMAKE_C_COMPILER_LAUNCHER
   AND NOT CMAKE_CXX_COMPILER_LAUNCHER)
  message(STATUS "Using ccache: ${CCACHE_FOUND}")
  set(CMAKE_C_COMPILER_LAUNCHER ${CCACHE_FOUND})
  set(CMAKE_CXX_COMPILER_LAUNCHER ${CCACHE_FOUND})
  # ARROW-3985: let ccache preserve C++ comments, because some of them may be
  # meaningful to the compiler
  set(ENV{CCACHE_COMMENTS} "1")
endif()

#
# Compiler flags
#

include(BuildUtils)

# Cython generated code emits way to many warnings at CHECKIN and EVERYTHING
set(BUILD_WARNING_LEVEL "PRODUCTION")

# This must be synchronized with the definition in
# cpp/cmake_modules/DefineOptions.cmake.
if(NOT DEFINED ARROW_SIMD_LEVEL)
  set(ARROW_SIMD_LEVEL
      "DEFAULT"
      CACHE STRING "Compile time SIMD optimization level")
endif()
if(NOT DEFINED ARROW_RUNTIME_SIMD_LEVEL)
  set(ARROW_RUNTIME_SIMD_LEVEL
      "MAX"
      CACHE STRING "Max runtime SIMD optimization level")
endif()
include(SetupCxxFlags)

if($ENV{PYODIDE})
  # These variables are needed for building PyArrow on Emscripten.
  # If they aren't set, CMake cross compiling fails for Python
  # modules (at least under Pyodide it does).
  set(Python3_INCLUDE_DIR $ENV{PYTHONINCLUDE})
  set(Python3_LIBRARY $ENV{CPYTHONLIB})
  set(Python3_NumPy_INCLUDE_DIR $ENV{NUMPY_LIB}/core/include)
  set(Python3_EXECUTABLE)
  set(ENV{_PYTHON_SYSCONFIGDATA_NAME} $ENV{SYSCONFIG_NAME})
  # we set the c and cxx compiler manually to bypass pywasmcross
  # which is pyodide's way of messing with C++ build parameters.
  set(CMAKE_C_COMPILER emcc)
  set(CMAKE_CXX_COMPILER em++)
endif()

# Add common flags
set(CMAKE_CXX_FLAGS "${CXX_COMMON_FLAGS} ${CMAKE_CXX_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${PYARROW_CXXFLAGS}")

if(MSVC)
  # MSVC version of -Wno-return-type-c-linkage
  string(APPEND CMAKE_CXX_FLAGS " /wd4190")

  # Cython generates some bitshift expressions that MSVC does not like in
  # __Pyx_PyFloat_DivideObjC
  string(APPEND CMAKE_CXX_FLAGS " /wd4293")

  # Converting to/from C++ bool is pretty wonky in Cython. The C4800 warning
  # seem harmless, and probably not worth the effort of working around it
  string(APPEND CMAKE_CXX_FLAGS " /wd4800")

  # See https://github.com/cython/cython/issues/2731. Change introduced in
  # Cython 0.29.1 causes "unsafe use of type 'bool' in operation"
  string(APPEND CMAKE_CXX_FLAGS " /wd4804")

  # See https://github.com/cython/cython/issues/4445.
  #
  # Cython 3 emits "(void)__Pyx_PyObject_CallMethod0;" to suppress a
  # "unused function" warning but the code emits another "function
  # call missing argument list" warning.
  string(APPEND CMAKE_CXX_FLAGS " /wd4551")
else()
  # Enable perf and other tools to work properly
  string(APPEND CMAKE_CXX_FLAGS " -fno-omit-frame-pointer")

  # Suppress Cython warnings
  string(APPEND CMAKE_CXX_FLAGS " -Wno-unused-variable -Wno-maybe-uninitialized")

  if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STREQUAL
                                                    "Clang")
    # Cython warnings in clang
    string(APPEND CMAKE_CXX_FLAGS " -Wno-parentheses-equality")
    string(APPEND CMAKE_CXX_FLAGS " -Wno-constant-logical-operand")
    string(APPEND CMAKE_CXX_FLAGS " -Wno-missing-declarations")
    string(APPEND CMAKE_CXX_FLAGS " -Wno-sometimes-uninitialized")

    # We have public Cython APIs which return C++ types, which are in an extern
    # "C" blog (no symbol mangling) and clang doesn't like this
    string(APPEND CMAKE_CXX_FLAGS " -Wno-return-type-c-linkage")
  endif()
endif()

# For any C code, use the same flags.
set(CMAKE_C_FLAGS "${CMAKE_CXX_FLAGS}")

# Add C++-only flags, like -std=c++17
set(CMAKE_CXX_FLAGS "${CXX_ONLY_FLAGS} ${CMAKE_CXX_FLAGS}")

message(STATUS "CMAKE_C_FLAGS: ${CMAKE_C_FLAGS}")
message(STATUS "CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}")

if(MSVC)
  # MSVC makes its own output directories based on the build configuration
  set(BUILD_SUBDIR_NAME "")
else()
  # Set compile output directory
  string(TOLOWER ${CMAKE_BUILD_TYPE} BUILD_SUBDIR_NAME)
endif()

# If build in-source, create the latest symlink. If build out-of-source, which is
# preferred, simply output the binaries in the build folder
if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_CURRENT_BINARY_DIR})
  set(BUILD_OUTPUT_ROOT_DIRECTORY
      "${CMAKE_CURRENT_BINARY_DIR}/build/${BUILD_SUBDIR_NAME}")
  # Link build/latest to the current build directory, to avoid developers
  # accidentally running the latest debug build when in fact they're building
  # release builds.
  file(MAKE_DIRECTORY ${BUILD_OUTPUT_ROOT_DIRECTORY})
  if(NOT APPLE)
    set(MORE_ARGS "-T")
  endif()
  execute_process(COMMAND ln ${MORE_ARGS} -sf ${BUILD_OUTPUT_ROOT_DIRECTORY}
                          ${CMAKE_CURRENT_BINARY_DIR}/build/latest)
else()
  set(BUILD_OUTPUT_ROOT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/${BUILD_SUBDIR_NAME}")
endif()

message(STATUS "Generator: ${CMAKE_GENERATOR}")
message(STATUS "Build output directory: ${BUILD_OUTPUT_ROOT_DIRECTORY}")

# where to put generated archives (.a files)
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${BUILD_OUTPUT_ROOT_DIRECTORY}")
set(ARCHIVE_OUTPUT_DIRECTORY "${BUILD_OUTPUT_ROOT_DIRECTORY}")

# where to put generated libraries (.so files)
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${BUILD_OUTPUT_ROOT_DIRECTORY}")
set(LIBRARY_OUTPUT_DIRECTORY "${BUILD_OUTPUT_ROOT_DIRECTORY}")

# where to put generated binaries
set(EXECUTABLE_OUTPUT_PATH "${BUILD_OUTPUT_ROOT_DIRECTORY}")

# Python and Numpy libraries
find_package(Python3Alt REQUIRED)
message(STATUS "Found NumPy version: ${Python3_NumPy_VERSION}")
message(STATUS "NumPy include dir: ${NUMPY_INCLUDE_DIRS}")

include(UseCython)
message(STATUS "Found Cython version: ${CYTHON_VERSION}")

# Arrow C++ and set default PyArrow build options
include(GNUInstallDirs)
find_package(Arrow REQUIRED)

macro(define_option name description arrow_option)
  set("PYARROW_${name}"
      "AUTO"
      CACHE STRING ${description})

  if("${PYARROW_${name}}" STREQUAL "AUTO")
    # by default, first check if env variable exists, otherwise use Arrow C++ config
    set(env_variable "PYARROW_WITH_${name}")
    if(DEFINED ENV{${env_variable}})
      if($ENV{${env_variable}})
        set("PYARROW_BUILD_${name}" ON)
      else()
        set("PYARROW_BUILD_${name}" OFF)
      endif()
    else()
      if(${arrow_option})
        set("PYARROW_BUILD_${name}" ON)
      else()
        set("PYARROW_BUILD_${name}" OFF)
      endif()
    endif()
  else()
    if("${PYARROW_${name}}")
      set("PYARROW_BUILD_${name}" ON)
    else()
      set("PYARROW_BUILD_${name}" OFF)
    endif()
  endif()
endmacro()

define_option(ACERO "Build the PyArrow Acero integration" ARROW_ACERO)
define_option(CUDA "Build the PyArrow CUDA support" ARROW_CUDA)
define_option(DATASET "Build the PyArrow Dataset integration" ARROW_DATASET)
define_option(FLIGHT "Build the PyArrow Flight integration" ARROW_FLIGHT)
define_option(GANDIVA "Build the PyArrow Gandiva integration" ARROW_GANDIVA)
define_option(ORC "Build the PyArrow ORC integration" ARROW_ORC)
define_option(PARQUET "Build the PyArrow Parquet integration" ARROW_PARQUET)
define_option(PARQUET_ENCRYPTION "Build the PyArrow Parquet encryption integration"
              PARQUET_REQUIRE_ENCRYPTION)
define_option(SUBSTRAIT "Build the PyArrow Substrait integration" ARROW_SUBSTRAIT)
define_option(AZURE "Build the PyArrow Azure integration" ARROW_AZURE)
define_option(GCS "Build the PyArrow GCS integration" ARROW_GCS)
define_option(S3 "Build the PyArrow S3 integration" ARROW_S3)
define_option(HDFS "Build the PyArrow HDFS integration" ARROW_HDFS)
option(PYARROW_BUNDLE_ARROW_CPP "Bundle the Arrow C++ libraries" OFF)
option(PYARROW_BUNDLE_CYTHON_CPP "Bundle the C++ files generated by Cython" OFF)
option(PYARROW_GENERATE_COVERAGE "Build with Cython code coverage enabled" OFF)
set(PYARROW_CXXFLAGS
    ""
    CACHE STRING "Compiler flags to append when compiling PyArrow C++")

# enforce module dependencies
if(PYARROW_BUILD_SUBSTRAIT)
  set(PYARROW_BUILD_DATASET ON)
endif()
if(PYARROW_BUILD_DATASET)
  set(PYARROW_BUILD_ACERO ON)
endif()

# PyArrow C++
set(PYARROW_CPP_ROOT_DIR pyarrow/src)
set(PYARROW_CPP_SOURCE_DIR ${PYARROW_CPP_ROOT_DIR}/arrow/python)
set(PYARROW_CPP_SRCS
    ${PYARROW_CPP_SOURCE_DIR}/arrow_to_pandas.cc
    ${PYARROW_CPP_SOURCE_DIR}/benchmark.cc
    ${PYARROW_CPP_SOURCE_DIR}/common.cc
    ${PYARROW_CPP_SOURCE_DIR}/datetime.cc
    ${PYARROW_CPP_SOURCE_DIR}/decimal.cc
    ${PYARROW_CPP_SOURCE_DIR}/deserialize.cc
    ${PYARROW_CPP_SOURCE_DIR}/extension_type.cc
    ${PYARROW_CPP_SOURCE_DIR}/gdb.cc
    ${PYARROW_CPP_SOURCE_DIR}/helpers.cc
    ${PYARROW_CPP_SOURCE_DIR}/inference.cc
    ${PYARROW_CPP_SOURCE_DIR}/io.cc
    ${PYARROW_CPP_SOURCE_DIR}/ipc.cc
    ${PYARROW_CPP_SOURCE_DIR}/numpy_convert.cc
    ${PYARROW_CPP_SOURCE_DIR}/numpy_init.cc
    ${PYARROW_CPP_SOURCE_DIR}/numpy_to_arrow.cc
    ${PYARROW_CPP_SOURCE_DIR}/python_test.cc
    ${PYARROW_CPP_SOURCE_DIR}/python_to_arrow.cc
    ${PYARROW_CPP_SOURCE_DIR}/pyarrow.cc
    ${PYARROW_CPP_SOURCE_DIR}/serialize.cc
    ${PYARROW_CPP_SOURCE_DIR}/udf.cc)
set_source_files_properties(${PYARROW_CPP_SOURCE_DIR}/numpy_init.cc
                            PROPERTIES SKIP_PRECOMPILE_HEADERS ON
                                       SKIP_UNITY_BUILD_INCLUSION ON)

set(PYARROW_CPP_LINK_LIBS "")

#
# Arrow vs PyArrow C++ options
#

# Check all the options from Arrow and PyArrow C++ to be in line
#
# Order is important for "NOT ARROW_BUILD_SHARED". We must use
# depending modules -> depended modules order. For example,
# ArrowSubstrait depends on ArrowDataset. So PYARROW_CPP_LINK_LIBS
# must use
# "ArrowSubstrait::arrow_substrait_static;ArrowDataset::arrow_dataset_static"
# order.

if(PYARROW_BUILD_SUBSTRAIT)
  message(STATUS "Building PyArrow with Substrait")
  if(NOT ARROW_SUBSTRAIT)
    message(FATAL_ERROR "You must build Arrow C++ with ARROW_SUBSTRAIT=ON")
  endif()
  find_package(ArrowSubstrait REQUIRED)
  if(ARROW_BUILD_SHARED)
    list(APPEND PYARROW_CPP_LINK_LIBS ArrowSubstrait::arrow_substrait_shared)
  else()
    list(APPEND PYARROW_CPP_LINK_LIBS ArrowSubstrait::arrow_substrait_static)
  endif()
endif()

if(PYARROW_BUILD_DATASET)
  message(STATUS "Building PyArrow with Dataset")
  if(NOT ARROW_DATASET)
    message(FATAL_ERROR "You must build Arrow C++ with ARROW_DATASET=ON")
  endif()
  find_package(ArrowDataset REQUIRED)
  if(ARROW_BUILD_SHARED)
    list(APPEND PYARROW_CPP_LINK_LIBS ArrowDataset::arrow_dataset_shared)
  else()
    list(APPEND PYARROW_CPP_LINK_LIBS ArrowDataset::arrow_dataset_static)
  endif()
endif()

if(PYARROW_BUILD_ACERO)
  message(STATUS "Building PyArrow with Acero")
  if(NOT ARROW_ACERO)
    message(FATAL_ERROR "You must build Arrow C++ with ARROW_ACERO=ON")
  endif()
  find_package(ArrowAcero REQUIRED)
  if(ARROW_BUILD_SHARED)
    list(APPEND PYARROW_CPP_LINK_LIBS ArrowAcero::arrow_acero_shared)
  else()
    list(APPEND PYARROW_CPP_LINK_LIBS ArrowAcero::arrow_acero_static)
  endif()
endif()

if(PYARROW_BUILD_PARQUET)
  message(STATUS "Building PyArrow with Parquet")
  if(NOT ARROW_PARQUET)
    message(FATAL_ERROR "You must build Arrow C++ with ARROW_PARQUET=ON")
  endif()
  find_package(Parquet REQUIRED)
else()
  if(PYARROW_BUILD_PARQUET_ENCRYPTION)
    message(WARNING "Building PyArrow with Parquet Encryption is requested, but Parquet itself is not enabled. Ignoring the Parquet Encryption setting."
    )
    set(PYARROW_BUILD_PARQUET_ENCRYPTION OFF)
  endif()
endif()

# Check for only Arrow C++ options
if(ARROW_CSV)
  list(APPEND PYARROW_CPP_SRCS ${PYARROW_CPP_SOURCE_DIR}/csv.cc)
endif()

if(ARROW_FILESYSTEM)
  list(APPEND PYARROW_CPP_SRCS ${PYARROW_CPP_SOURCE_DIR}/filesystem.cc)
endif()

if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
  set_property(SOURCE ${PYARROW_CPP_SOURCE_DIR}/pyarrow.cc
               APPEND_STRING
               PROPERTY COMPILE_FLAGS " -Wno-cast-qual ")
endif()

if(NOT PYARROW_CPP_LINK_LIBS)
  if(ARROW_BUILD_SHARED)
    list(APPEND PYARROW_CPP_LINK_LIBS Arrow::arrow_shared)
  else()
    list(APPEND PYARROW_CPP_LINK_LIBS Arrow::arrow_static)
  endif()
endif()

add_library(arrow_python SHARED ${PYARROW_CPP_SRCS})
target_include_directories(arrow_python PUBLIC ${PYARROW_CPP_ROOT_DIR}
                                               ${CMAKE_CURRENT_BINARY_DIR}/pyarrow/src)
if(ARROW_USE_PRECOMPILED_HEADERS)
  target_precompile_headers(arrow_python PUBLIC
                            "$<$<COMPILE_LANGUAGE:CXX>:arrow/python/pch.h>")
endif()

# on static builds we need to be careful not to link PYARROW_CPP_LINK_LIBS
# into everything depending on arrow_python, or else we get duplicate
# libraries. Whereas conversely on shared builds, we need everything
# to depend on everything, as python loads modules separately
if(ARROW_BUILD_SHARED)
  target_link_libraries(arrow_python PUBLIC ${PYARROW_CPP_LINK_LIBS})
else()
  target_link_libraries(arrow_python PRIVATE ${PYARROW_CPP_LINK_LIBS})
endif()
target_link_libraries(arrow_python PUBLIC Python3::NumPy)
target_compile_definitions(arrow_python PRIVATE ARROW_PYTHON_EXPORTING)
install(TARGETS arrow_python
        ARCHIVE DESTINATION .
        LIBRARY DESTINATION .
        RUNTIME DESTINATION .)

set(PYARROW_CPP_ENCRYPTION_SRCS ${PYARROW_CPP_SOURCE_DIR}/parquet_encryption.cc)
if(NOT PYARROW_BUILD_PARQUET_ENCRYPTION)
  message(STATUS "Parquet Encryption is NOT Enabled")
else()
  if(PARQUET_REQUIRE_ENCRYPTION)
    add_library(arrow_python_parquet_encryption SHARED ${PYARROW_CPP_ENCRYPTION_SRCS})
    target_link_libraries(arrow_python_parquet_encryption PUBLIC arrow_python
                                                                 ${PARQUET_LINK_LIBS})
    target_compile_definitions(arrow_python_parquet_encryption
                               PRIVATE ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORTING)
    install(TARGETS arrow_python_parquet_encryption
            ARCHIVE DESTINATION .
            LIBRARY DESTINATION .
            RUNTIME DESTINATION .)
    message(STATUS "Parquet Encryption Enabled")
  else()
    message(FATAL_ERROR "You must build Arrow C++ with PARQUET_REQUIRE_ENCRYPTION=ON")
  endif()
endif()

set(PYARROW_CPP_FLIGHT_SRCS ${PYARROW_CPP_SOURCE_DIR}/flight.cc)
if(PYARROW_BUILD_FLIGHT)
  message(STATUS "Building PyArrow with Flight")
  if(NOT ARROW_FLIGHT)
    message(FATAL_ERROR "You must build Arrow C++ with ARROW_FLIGHT=ON")
  endif()
  # Must link to shared libarrow_flight: we don't want to link more than one
  # copy of gRPC into the eventual Cython shared object, otherwise gRPC calls
  # fail with weird errors due to multiple copies of global static state (The
  # other solution is to link gRPC shared everywhere instead of statically only
  # in Flight)
  if(NOT ARROW_BUILD_SHARED)
    message(FATAL_ERROR "You must build Arrow C++ with ARROW_BUILD_SHARED=ON")
  endif()
  find_package(ArrowFlight REQUIRED)

  add_library(arrow_python_flight SHARED ${PYARROW_CPP_FLIGHT_SRCS})
  target_link_libraries(arrow_python_flight PUBLIC arrow_python
                                                   ArrowFlight::arrow_flight_shared)
  target_compile_definitions(arrow_python_flight PRIVATE ARROW_PYFLIGHT_EXPORTING)
  install(TARGETS arrow_python_flight
          ARCHIVE DESTINATION .
          LIBRARY DESTINATION .
          RUNTIME DESTINATION .)
endif()

if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
  # Clang, be quiet. Python C API has lots of macros
  set_property(SOURCE ${PYARROW_CPP_SRCS} ${PYARROW_CPP_FLIGHT_SRCS}
               APPEND_STRING
               PROPERTY COMPILE_FLAGS -Wno-parentheses-equality)
endif()

install(DIRECTORY ${PYARROW_CPP_SOURCE_DIR}/
        DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/arrow/python
        FILES_MATCHING
        PATTERN "*internal.h" EXCLUDE
        PATTERN "*.h")

function(bundle_arrow_lib library_path)
  set(options)
  set(one_value_args SO_VERSION)
  set(multi_value_args)
  cmake_parse_arguments(ARG
                        "${options}"
                        "${one_value_args}"
                        "${multi_value_args}"
                        ${ARGN})
  if(ARG_UNPARSED_ARGUMENTS)
    message(SEND_ERROR "Error: unrecognized arguments: ${ARG_UNPARSED_ARGUMENTS}")
  endif()

  get_filename_component(LIBRARY_PATH_REAL ${library_path} REALPATH)
  get_filename_component(LIBRARY_NAME ${library_path} NAME_WE)

  # Only copy the shared library with ABI version on Linux and macOS

  if(MSVC)
    install(FILES ${LIBRARY_PATH_REAL}
            DESTINATION "."
            RENAME ${LIBRARY_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX})
  elseif(APPLE)
    install(FILES ${LIBRARY_PATH_REAL}
            DESTINATION "."
            RENAME ${LIBRARY_NAME}.${ARG_SO_VERSION}${CMAKE_SHARED_LIBRARY_SUFFIX})
  else()
    install(FILES ${LIBRARY_PATH_REAL}
            DESTINATION "."
            RENAME ${LIBRARY_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}.${ARG_SO_VERSION})
  endif()
endfunction()

function(bundle_arrow_import_lib library_path)
  get_filename_component(LIBRARY_NAME ${library_path} NAME_WE)
  install(FILES ${library_path}
          DESTINATION "."
          RENAME ${LIBRARY_NAME}.lib)
endfunction()

function(bundle_arrow_dependency library_name)
  if(MSVC)
    if(DEFINED ENV{CONDA_PREFIX})
      file(TO_CMAKE_PATH "$ENV{CONDA_PREFIX}\\Library" SHARED_LIB_HOME)
    endif()
  else()
    if(DEFINED ENV{CONDA_PREFIX})
      file(TO_CMAKE_PATH "$ENV{CONDA_PREFIX}" SHARED_LIB_HOME)
    endif()
  endif()
  if(DEFINED ENV{${library_name}_HOME})
    file(TO_CMAKE_PATH "$ENV{${library_name}_HOME}" SHARED_LIB_HOME)
  endif()
  arrow_build_shared_library_name(shared_lib_name "${library_name}")
  unset(SHARED_LIB_PATH CACHE)
  if(MSVC)
    set(CMAKE_SHARED_LIBRARY_SUFFIXES_ORIGINAL ${CMAKE_FIND_LIBRARY_SUFFIXES})
    # .dll isn't found by find_library with MSVC because .dll isn't included in
    # CMAKE_FIND_LIBRARY_SUFFIXES.
    list(APPEND CMAKE_FIND_LIBRARY_SUFFIXES "${CMAKE_SHARED_LIBRARY_SUFFIX}")
  endif()
  if(SHARED_LIB_HOME)
    find_library(SHARED_LIB_PATH
                 NAMES "${shared_lib_name}"
                 PATHS "${SHARED_LIB_HOME}"
                 PATH_SUFFIXES ${ARROW_SEARCH_LIB_PATH_SUFFIXES}
                 NO_DEFAULT_PATH)
  else()
    find_library(SHARED_LIB_PATH
                 NAMES "${shared_lib_name}"
                 PATH_SUFFIXES ${ARROW_SEARCH_LIB_PATH_SUFFIXES})
  endif()
  if(MSVC)
    set(CMAKE_SHARED_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES_ORIGINAL})
  endif()
  if(SHARED_LIB_PATH)
    get_filename_component(SHARED_LIB_REALPATH ${SHARED_LIB_PATH} REALPATH)
    get_filename_component(SHARED_LIB_NAME ${SHARED_LIB_PATH} NAME)
    message(STATUS "Bundle dependency ${library_name}: ${SHARED_LIB_REALPATH} as ${SHARED_LIB_NAME}"
    )
    install(FILES ${SHARED_LIB_REALPATH}
            DESTINATION "."
            RENAME ${SHARED_LIB_NAME})
  else()
    message(FATAL_ERROR "Unable to bundle dependency: ${library_name}")
  endif()
endfunction()

# Always bundle includes
get_filename_component(ARROW_INCLUDE_ARROW_DIR_REAL ${ARROW_INCLUDE_DIR}/arrow REALPATH)
install(DIRECTORY ${ARROW_INCLUDE_ARROW_DIR_REAL} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})

if(PYARROW_BUNDLE_ARROW_CPP)
  # Arrow
  bundle_arrow_lib(${ARROW_SHARED_LIB} SO_VERSION ${ARROW_SO_VERSION})

  if(MSVC)
    # TODO(kszucs): locate msvcp140.dll in a portable fashion and bundle it
    bundle_arrow_import_lib(${ARROW_IMPORT_LIB})
  endif()
endif()

#
# Cython modules
#

set(CYTHON_EXTENSIONS
    lib
    _compute
    _csv
    _feather
    _fs
    _json
    _pyarrow_cpp_tests)
set_source_files_properties(pyarrow/lib.pyx PROPERTIES CYTHON_API TRUE)

set(LINK_LIBS arrow_python)

if(PYARROW_BUILD_AZURE)
  message(STATUS "Building PyArrow with Azure")
  if(NOT ARROW_AZURE)
    message(FATAL_ERROR "You must build Arrow C++ with ARROW_AZURE=ON")
  endif()
  list(APPEND CYTHON_EXTENSIONS _azurefs)
endif()

if(PYARROW_BUILD_GCS)
  message(STATUS "Building PyArrow with GCS")
  if(NOT ARROW_GCS)
    message(FATAL_ERROR "You must build Arrow C++ with ARROW_GCS=ON")
  endif()
  list(APPEND CYTHON_EXTENSIONS _gcsfs)
endif()

if(PYARROW_BUILD_S3)
  message(STATUS "Building PyArrow with S3")
  if(NOT ARROW_S3)
    message(FATAL_ERROR "You must build Arrow C++ with ARROW_S3=ON")
  endif()
  list(APPEND CYTHON_EXTENSIONS _s3fs)
endif()

if(PYARROW_BUILD_HDFS)
  message(STATUS "Building PyArrow with HDFS")
  if(NOT ARROW_HDFS)
    message(FATAL_ERROR "You must build Arrow C++ with ARROW_HDFS=ON")
  endif()
  list(APPEND CYTHON_EXTENSIONS _hdfs)
endif()

if(PYARROW_BUILD_CUDA)
  message(STATUS "Building PyArrow with CUDA")
  if(NOT ARROW_CUDA)
    message(FATAL_ERROR "You must build Arrow C++ with ARROW_CUDA=ON")
  endif()
  find_package(ArrowCUDA REQUIRED)

  if(PYARROW_BUNDLE_ARROW_CPP)
    bundle_arrow_lib(${ARROW_CUDA_SHARED_LIB} SO_VERSION ${ARROW_SO_VERSION})
    if(MSVC)
      bundle_arrow_import_lib(${ARROW_CUDA_IMPORT_LIB})
    endif()
  endif()
  set(CUDA_LINK_LIBS ArrowCUDA::arrow_cuda_shared)
  list(APPEND CYTHON_EXTENSIONS _cuda)
  set_source_files_properties(pyarrow/_cuda.pyx PROPERTIES CYTHON_API TRUE)
endif()

# Acero
if(PYARROW_BUILD_ACERO)
  if(ARROW_BUILD_SHARED)
    if(PYARROW_BUNDLE_ARROW_CPP)
      bundle_arrow_lib(${ARROW_ACERO_SHARED_LIB} SO_VERSION ${ARROW_SO_VERSION})
      if(MSVC)
        bundle_arrow_import_lib(${ARROW_ACERO_IMPORT_LIB})
      endif()
    endif()

    set(ACERO_LINK_LIBS ArrowAcero::arrow_acero_shared)
  else()
    # Acero is statically linked into libarrow_python already
    set(ACERO_LINK_LIBS)
  endif()
  list(APPEND CYTHON_EXTENSIONS _acero)
endif()

# Dataset
if(PYARROW_BUILD_DATASET)
  if(ARROW_BUILD_SHARED)
    if(PYARROW_BUNDLE_ARROW_CPP)
      bundle_arrow_lib(${ARROW_DATASET_SHARED_LIB} SO_VERSION ${ARROW_SO_VERSION})
      if(MSVC)
        bundle_arrow_import_lib(${ARROW_DATASET_IMPORT_LIB})
      endif()
    endif()

    set(DATASET_LINK_LIBS ArrowDataset::arrow_dataset_shared)
  else()
    # dataset is statically linked into libarrow_python already
    set(DATASET_LINK_LIBS)
  endif()
  list(APPEND CYTHON_EXTENSIONS _dataset)
endif()

# Parquet
if(PYARROW_BUILD_PARQUET)
  if(PYARROW_BUNDLE_ARROW_CPP)
    get_filename_component(PARQUET_INCLUDE_PARQUET_DIR_REAL
                           ${PARQUET_INCLUDE_DIR}/parquet REALPATH)
    install(DIRECTORY ${PARQUET_INCLUDE_PARQUET_DIR_REAL}
            DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
  endif()

  if(ARROW_BUILD_SHARED)
    if(PYARROW_BUNDLE_ARROW_CPP)
      bundle_arrow_lib(${PARQUET_SHARED_LIB} SO_VERSION ${PARQUET_SO_VERSION})
      if(MSVC)
        bundle_arrow_import_lib(${PARQUET_IMPORT_LIB})
      endif()
    endif()
    set(PARQUET_LINK_LIBS Parquet::parquet_shared)
  else()
    # don't link the static lib as it is
    # already in arrow_python
    set(PARQUET_LINK_LIBS)
  endif()
  list(APPEND CYTHON_EXTENSIONS _parquet)
  if(PYARROW_BUILD_PARQUET_ENCRYPTION)
    list(APPEND CYTHON_EXTENSIONS _parquet_encryption)
  endif()
  if(PYARROW_BUILD_DATASET)
    list(APPEND CYTHON_EXTENSIONS _dataset_parquet)
    if(PYARROW_BUILD_PARQUET_ENCRYPTION)
      list(APPEND CYTHON_EXTENSIONS _dataset_parquet_encryption)
    endif()
  endif()
endif()

# ORC
if(PYARROW_BUILD_ORC)
  message(STATUS "Building PyArrow with ORC")
  if(NOT ARROW_ORC)
    message(FATAL_ERROR "You must build Arrow C++ with ARROW_ORC=ON")
  endif()
  list(APPEND CYTHON_EXTENSIONS _orc)
  if(PYARROW_BUILD_DATASET)
    list(APPEND CYTHON_EXTENSIONS _dataset_orc)
  endif()
endif()

# Flight
if(PYARROW_BUILD_FLIGHT)
  if(PYARROW_BUNDLE_ARROW_CPP)
    bundle_arrow_lib(${ARROW_FLIGHT_SHARED_LIB} SO_VERSION ${ARROW_SO_VERSION})
    if(MSVC)
      bundle_arrow_import_lib(${ARROW_FLIGHT_IMPORT_LIB})
      # XXX Hardcoded library names because CMake is too stupid to give us
      # the shared library paths.
      # https://gitlab.kitware.com/cmake/cmake/issues/16210
      # bundle_arrow_dependency(libcrypto-1_1-x64)
      # bundle_arrow_dependency(libssl-1_1-x64)
    endif()
  endif()

  set(FLIGHT_LINK_LIBS arrow_python_flight)
  list(APPEND CYTHON_EXTENSIONS _flight)
else()
  set(FLIGHT_LINK_LIBS "")
endif()

# Substrait
if(PYARROW_BUILD_SUBSTRAIT)
  message(STATUS "Building PyArrow with Substrait")

  if(ARROW_BUILD_SHARED)
    if(PYARROW_BUNDLE_ARROW_CPP)
      bundle_arrow_lib(${ARROW_SUBSTRAIT_SHARED_LIB} SO_VERSION ${ARROW_SO_VERSION})
      if(MSVC)
        bundle_arrow_import_lib(${ARROW_SUBSTRAIT_IMPORT_LIB})
      endif()
    endif()
    set(SUBSTRAIT_LINK_LIBS ArrowSubstrait::arrow_substrait_shared)
  else()
    # Arrow Substrait is statically linked into libarrow_python already
    set(SUBSTRAIT_LINK_LIBS)
  endif()

  list(APPEND CYTHON_EXTENSIONS _substrait)
endif()

# Gandiva
if(PYARROW_BUILD_GANDIVA)
  message(STATUS "Building PyArrow with Gandiva")
  if(NOT ARROW_GANDIVA)
    message(FATAL_ERROR "You must build Arrow C++ with ARROW_GANDIVA=ON")
  endif()
  find_package(Gandiva REQUIRED)

  if(PYARROW_BUNDLE_ARROW_CPP)
    get_filename_component(GANDIVA_INCLUDE_GANDIVA_DIR_REAL
                           ${GANDIVA_INCLUDE_DIR}/gandiva REALPATH)
    install(DIRECTORY ${GANDIVA_INCLUDE_GANDIVA_DIR_REAL}
            DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})

    bundle_arrow_lib(${GANDIVA_SHARED_LIB} SO_VERSION ${ARROW_SO_VERSION})

    if(MSVC)
      bundle_arrow_import_lib(${GANDIVA_IMPORT_LIB})
    endif()
  endif()

  set(GANDIVA_LINK_LIBS Gandiva::gandiva_shared)
  list(APPEND CYTHON_EXTENSIONS gandiva)
endif()

#
# Setup and build Cython modules
#

if(PYARROW_GENERATE_COVERAGE)
  set(CYTHON_FLAGS "${CYTHON_FLAGS}" "-Xlinetrace=True")
endif()

# Error on any warnings not already explicitly ignored.
set(CYTHON_FLAGS "${CYTHON_FLAGS}" "--warning-errors")
# GH-40236: make generated C++ code easier to compile by disabling an
# undocumented Cython feature.
set(CYTHON_FLAGS "${CYTHON_FLAGS}" "--no-c-in-traceback")

if(CYTHON_VERSION VERSION_GREATER_EQUAL "3.1.0a0")
  list(APPEND CYTHON_FLAGS "-Xfreethreading_compatible=True")
endif()

foreach(module ${CYTHON_EXTENSIONS})
  string(REPLACE "." ";" directories ${module})
  list(GET directories -1 module_name)
  list(REMOVE_AT directories -1)

  string(REPLACE "." "/" module_root "${module}")
  set(module_SRC pyarrow/${module_root}.pyx)
  set_source_files_properties(${module_SRC} PROPERTIES CYTHON_IS_CXX TRUE)

  cython_add_module(${module_name} ${module_name}_pyx ${module_name}_output ${module_SRC})

  if(directories)
    string(REPLACE ";" "/" module_output_directory ${directories})
    set_target_properties(${module_name} PROPERTIES LIBRARY_OUTPUT_DIRECTORY
                                                    ${module_output_directory})
  endif()

  # XXX(wesm): ARROW-2326 this logic is only needed when we have Cython
  # modules in interior directories. Since all of our C extensions and
  # bundled libraries are in the same place, we can skip this part

  # list(LENGTH directories i)
  # while(${i} GREATER 0)
  #   set(module_install_rpath "${module_install_rpath}/..")
  #   math(EXPR i "${i} - 1" )
  # endwhile(${i} GREATER 0)

  if(PYARROW_GENERATE_COVERAGE)
    set_target_properties(${module_name} PROPERTIES COMPILE_DEFINITIONS
                                                    "CYTHON_TRACE=1;CYTHON_TRACE_NOGIL=1")
  endif()

  target_link_libraries(${module_name} PRIVATE ${LINK_LIBS})

  install(TARGETS ${module_name} LIBRARY DESTINATION ".")
  foreach(output ${${module_name}_output})
    if(output MATCHES "\\.${CYTHON_CXX_EXTENSION}$")
      if(NOT PYARROW_BUNDLE_CYTHON_CPP)
        continue()
      endif()
    endif()
    install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${output} DESTINATION ".")
  endforeach()
endforeach()

set(ARROW_PYTHON_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/pyarrow/src/arrow/python")
file(MAKE_DIRECTORY ${ARROW_PYTHON_BINARY_DIR})
add_custom_command(OUTPUT "${ARROW_PYTHON_BINARY_DIR}/lib_api.h"
                          "${ARROW_PYTHON_BINARY_DIR}/lib.h"
                   COMMAND ${CMAKE_COMMAND} -E copy
                           "${CMAKE_CURRENT_BINARY_DIR}/lib_api.h"
                           "${CMAKE_CURRENT_BINARY_DIR}/lib.h"
                           "${ARROW_PYTHON_BINARY_DIR}/"
                   DEPENDS lib_pyx)
add_custom_target(cython_api_headers DEPENDS "${ARROW_PYTHON_BINARY_DIR}/lib_api.h"
                                             "${ARROW_PYTHON_BINARY_DIR}/lib.h")
add_dependencies(arrow_python cython_api_headers)
install(FILES "${ARROW_PYTHON_BINARY_DIR}/lib_api.h" "${ARROW_PYTHON_BINARY_DIR}/lib.h"
        DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/arrow/python)

# Additional link libraries

if(PYARROW_BUILD_CUDA)
  target_link_libraries(_cuda PRIVATE ${CUDA_LINK_LIBS})
endif()

if(PYARROW_BUILD_FLIGHT)
  target_link_libraries(_flight PRIVATE ${FLIGHT_LINK_LIBS})
endif()

if(PYARROW_BUILD_SUBSTRAIT)
  target_link_libraries(_substrait PRIVATE ${SUBSTRAIT_LINK_LIBS})
endif()

if(PYARROW_BUILD_ACERO)
  target_link_libraries(_acero PRIVATE ${ACERO_LINK_LIBS})
endif()

if(PYARROW_BUILD_DATASET)
  target_link_libraries(_dataset PRIVATE ${DATASET_LINK_LIBS})
  if(PYARROW_BUILD_ORC)
    target_link_libraries(_dataset_orc PRIVATE ${DATASET_LINK_LIBS})
  endif()
  if(PYARROW_BUILD_PARQUET)
    target_link_libraries(_dataset_parquet PRIVATE ${DATASET_LINK_LIBS})
  endif()
endif()

if(PYARROW_BUILD_GANDIVA)
  target_link_libraries(gandiva PRIVATE ${GANDIVA_LINK_LIBS})
endif()

if(PYARROW_BUILD_PARQUET)
  target_link_libraries(_parquet PRIVATE ${PARQUET_LINK_LIBS})
  if(PYARROW_BUILD_PARQUET_ENCRYPTION)
    target_link_libraries(_parquet_encryption PRIVATE arrow_python_parquet_encryption)
  endif()
endif()
