llama.cpp all in one!

This commit is contained in:
Orion 2026-02-14 18:51:08 +08:00
commit 3a4943ba09
2 changed files with 199 additions and 0 deletions

51
.SRCINFO Normal file
View file

@ -0,0 +1,51 @@
pkgbase = llama.cpp-aio
pkgdesc = Port of Facebook's LLaMA model in C/C++ (All in one: CUDA + ROCm + Vulkan)
pkgver = b8037
pkgrel = 1
url = https://github.com/ggml-org/llama.cpp
arch = x86_64
arch = armv7h
arch = aarch64
license = MIT
makedepends = cmake
makedepends = git
makedepends = rocm-hip-sdk
makedepends = shaderc
makedepends = vulkan-headers
depends = curl
depends = gcc-libs
depends = glibc
depends = python
depends = cuda
depends = nvidia-utils
depends = hip-runtime-amd
depends = hipblas
depends = openmp
depends = rocblas
depends = rocwmma
depends = vulkan-icd-loader
optdepends = python-numpy: needed for convert_hf_to_gguf.py
optdepends = python-safetensors: needed for convert_hf_to_gguf.py
optdepends = python-sentencepiece: needed for convert_hf_to_gguf.py
optdepends = python-pytorch: needed for convert_hf_to_gguf.py
optdepends = python-transformers: needed for convert_hf_to_gguf.py
optdepends = python-gguf: needed for convert_hf_to_gguf.py
provides = llama.cpp
provides = llama.cpp-cuda
provides = llama.cpp-hip
provides = llama.cpp-vulkan
conflicts = llama.cpp
conflicts = libggml
conflicts = ggml
conflicts = stable-diffusion.cpp
options = lto
options = !debug
backup = etc/conf.d/llama.cpp
source = llama.cpp-aio-b8037.tar.gz::https://github.com/ggml-org/llama.cpp/archive/refs/tags/b8037.tar.gz
source = https://raw.githubusercontent.com/Orion-zhen/aur-packages/refs/heads/main/assets/llama.cpp/llama.cpp.service
source = https://raw.githubusercontent.com/Orion-zhen/aur-packages/refs/heads/main/assets/llama.cpp/llama.cpp.conf
sha256sums = 84a8535ab935e03ae1e4812f6b7826828f8939d93c92cc53b66f277f4a4ede47
sha256sums = 0377d08a07bda056785981d3352ccd2dbc0387c4836f91fb73e6b790d836620d
sha256sums = e4856f186f69cd5dbfcc4edec9f6b6bd08e923bceedd8622eeae1a2595beb2ec
pkgname = llama.cpp-aio

148
PKGBUILD Normal file
View file

@ -0,0 +1,148 @@
# # Maintainer: Orion-zhen <https://github.com/Orion-zhen>
# Contributor: txtsd <aur.archlinux@ihavea.quest>
pkgname=llama.cpp-aio
_pkgname=${pkgname%%-aio}
pkgver=b8037
pkgrel=1
pkgdesc="Port of Facebook's LLaMA model in C/C++ (All in one: CUDA + ROCm + Vulkan)"
arch=(x86_64 armv7h aarch64)
url='https://github.com/ggml-org/llama.cpp'
license=('MIT')
depends=(
curl
gcc-libs
glibc
python
cuda
nvidia-utils
hip-runtime-amd
hipblas
openmp
rocblas
rocwmma
vulkan-icd-loader
)
makedepends=(
cmake
git
rocm-hip-sdk
shaderc
vulkan-headers
)
optdepends=(
'python-numpy: needed for convert_hf_to_gguf.py'
'python-safetensors: needed for convert_hf_to_gguf.py'
'python-sentencepiece: needed for convert_hf_to_gguf.py'
'python-pytorch: needed for convert_hf_to_gguf.py'
'python-transformers: needed for convert_hf_to_gguf.py'
'python-gguf: needed for convert_hf_to_gguf.py'
)
provides=(${_pkgname} ${_pkgname}-cuda ${_pkgname}-hip ${_pkgname}-vulkan)
conflicts=(${_pkgname} libggml ggml stable-diffusion.cpp)
options=(lto !debug)
backup=("etc/conf.d/llama.cpp")
source=(
"${pkgname}-${pkgver}.tar.gz::https://github.com/ggml-org/llama.cpp/archive/refs/tags/${pkgver}.tar.gz"
"https://raw.githubusercontent.com/Orion-zhen/aur-packages/refs/heads/main/assets/llama.cpp/llama.cpp.service"
"https://raw.githubusercontent.com/Orion-zhen/aur-packages/refs/heads/main/assets/llama.cpp/llama.cpp.conf"
)
sha256sums=('84a8535ab935e03ae1e4812f6b7826828f8939d93c92cc53b66f277f4a4ede47'
'0377d08a07bda056785981d3352ccd2dbc0387c4836f91fb73e6b790d836620d'
'e4856f186f69cd5dbfcc4edec9f6b6bd08e923bceedd8622eeae1a2595beb2ec')
prepare() {
ln -sf "${_pkgname}-${pkgver}" llama.cpp
}
build() {
# 配置环境
if [[ -z "${NVCC_CCBIN}" ]]; then
source /etc/profile
fi
if [[ -z "${ROCM_PATH}" ]]; then
source /etc/profile
fi
export HIP_PATH="$(hipconfig -R)"
export HIPCXX="$(hipconfig -l)/clang"
export HIP_PLATFORM=amd
local _cmake_options=(
-B build
-S "${_pkgname}"
-DCMAKE_BUILD_TYPE=Release
-DCMAKE_INSTALL_PREFIX='/usr'
-DBUILD_SHARED_LIBS=ON
-DLLAMA_BUILD_TESTS=OFF
-DLLAMA_USE_SYSTEM_GGML=OFF
-DGGML_ALL_WARNINGS=OFF
-DGGML_ALL_WARNINGS_3RD_PARTY=OFF
-DGGML_BUILD_EXAMPLES=OFF
-DGGML_BUILD_TESTS=OFF
-DGGML_LTO=ON
-DGGML_RPC=ON
# CUDA part
-DGGML_CUDA=ON
-DGGML_CUDA_GRAPHS=ON
# ROCm part
-DGGML_HIP=ON
-DGGML_HIP_GRAPHS=ON
-DGGML_HIP_ROCWMMA_FATTN=ON
-DHIP_PLATFORM=amd # 手动指定 AMD 平台, 防止因 rocm-nightly 禁用自动检测而报错
# Vulkan part
-DGGML_VULKAN=ON
-DGGML_CUDA_FA_ALL_QUANTS=ON
-DLLAMA_BUILD_NUMBER="${pkgver#b}" # 修正版本号
-Wno-dev
)
# 检查是否在 CI 环境中构建
if [ -n "$CI" ] && [ "$CI" != 0 ]; then
msg2 "CI = $CI detected, building universal package"
# 启用通用构建
_cmake_options+=(
-DGGML_BACKEND_DL=ON
-DGGML_CPU_ALL_VARIANTS=ON
-DGGML_NATIVE=OFF
# https://developer.nvidia.com/cuda-gpus
-DCMAKE_CUDA_ARCHITECTURES="75;86;89;120;121"
# https://llvm.org/docs/AMDGPUUsage.html
# gfx906: MI 50/60, Radeon VII
# gfx101x: RX 5000 Series
# gfx103x: RX 6000 Series
# gfx110x: RX 7000 Series
# gfx1151: Strix Halo
# gfx120x: RX 9000 Series
-DAMDGPU_TARGETS="gfx906;gfx1010;gfx1030;gfx1031;gfx1100;gfx1101;gfx1102;gfx1151;gfx1200;gfx1201"
-DGGML_ZENDNN=ON
)
else
# 本地构建, 针对当前设备优化
_cmake_options+=(
-DGGML_NATIVE=ON
)
fi
# 允许用户自定义构建选项
if [[ -n "$LLAMA_BUILD_EXTRA_ARGS" ]]; then
msg2 "Applied custom CMake build args: $LLAMA_BUILD_EXTRA_ARGS"
_cmake_options+=($LLAMA_BUILD_EXTRA_ARGS)
fi
cmake "${_cmake_options[@]}"
cmake --build build -- -j $(nproc)
}
package() {
DESTDIR="${pkgdir}" cmake --install build
install -Dm644 "${_pkgname}/LICENSE" "${pkgdir}/usr/share/licenses/${pkgname}/LICENSE"
install -Dm644 "llama.cpp.conf" "${pkgdir}/etc/conf.d/llama.cpp"
install -Dm644 "llama.cpp.service" "${pkgdir}/usr/lib/systemd/system/llama.cpp.service"
msg2 "llama.cpp.service is now available"
msg2 "llama-server arguments are in /etc/conf.d/llama.cpp"
}