1
0
mirror of https://github.com/openbsd/src.git synced 2026-06-18 15:23:33 +02:00

Add kernel support for the vector extension on riscv64.

The V extension (where supported) adds an additional 32 vector registers
that are variable length (up to 65,536 bits, but more commonly 2048 bits).
In order to support the use of the V extension (and additional vector
extensions) we need to enable the vector extension and save/restore the
vector registers.

ok kettenis@
This commit is contained in:
jsing
2026-05-09 17:38:50 +00:00
parent c044176a43
commit 33e6d7cf80
12 changed files with 284 additions and 35 deletions
+2 -1
View File
@@ -1,4 +1,4 @@
# $OpenBSD: files.riscv64,v 1.38 2026/04/13 12:03:19 kettenis Exp $
# $OpenBSD: files.riscv64,v 1.39 2026/05/09 17:38:50 jsing Exp $
# Standard stanzas config(8) can't run without
maxpartitions 16
@@ -35,6 +35,7 @@ file arch/riscv64/riscv64/pagezero.S
file arch/riscv64/riscv64/trap.c
file arch/riscv64/riscv64/sbi.c
file arch/riscv64/riscv64/fpu.c
file arch/riscv64/riscv64/vector.c
file arch/riscv64/riscv64/db_disasm.c ddb
file arch/riscv64/riscv64/db_interface.c ddb
+8 -1
View File
@@ -1,4 +1,4 @@
/* $OpenBSD: cpu.h,v 1.26 2026/05/02 14:09:17 jsing Exp $ */
/* $OpenBSD: cpu.h,v 1.27 2026/05/09 17:38:50 jsing Exp $ */
/*
* Copyright (c) 2019 Mike Larkin <mlarkin@openbsd.org>
@@ -265,9 +265,16 @@ void delay (unsigned);
extern void (*cpu_startclock_fcn)(void);
extern unsigned long riscv_hwcap;
extern size_t riscv_vlenb;
void fpu_save(struct proc *, struct trapframe *);
void fpu_load(struct proc *);
int vector_instruction(register_t stval);
void vector_save(struct proc *, struct trapframe *);
void vector_load(struct proc *);
extern int cpu_errata_sifive_cip_1200;
#define cpu_idle_enter() do { /* nothing */ } while (0)
+6 -4
View File
@@ -1,4 +1,4 @@
/* $OpenBSD: pcb.h,v 1.4 2024/10/15 09:16:39 jsg Exp $ */
/* $OpenBSD: pcb.h,v 1.5 2026/05/09 17:38:50 jsing Exp $ */
/*
* Copyright (c) 2016 Dale Rahn <drahn@dalerahn.com>
@@ -31,11 +31,13 @@ struct pcb {
u_int pcb_flags;
#define PCB_FPU 0x00000001 /* Process had FPU initialized */
#define PCB_SINGLESTEP 0x00000002 /* Single step process */
#define PCB_VECTOR 0x00000004 /* Process had vector initialized */
struct trapframe *pcb_tf;
register_t pcb_sp; // stack pointer of switchframe
register_t pcb_sp; /* stack pointer of switchframe */
caddr_t pcb_onfault; // On fault handler
struct fpreg pcb_fpstate; // Floating Point state */
caddr_t pcb_onfault; /* On fault handler */
struct fpreg pcb_fpstate; /* Floating Point state */
struct vreg *pcb_vstate; /* Vector state */
};
#endif /* _MACHINE_PCB_H_ */
+9 -1
View File
@@ -1,4 +1,4 @@
/* $OpenBSD: reg.h,v 1.2 2021/05/12 01:20:52 jsg Exp $ */
/* $OpenBSD: reg.h,v 1.3 2026/05/09 17:38:50 jsing Exp $ */
/*-
* Copyright (c) 2019 Brian Bamsch <bbamsch@google.com>
@@ -57,4 +57,12 @@ struct fpreg {
uint64_t fp_fcsr; /* floating-point control register */
};
struct vreg {
uint64_t v_vtype; /* vector type control register */
uint64_t v_vl; /* vector length control register */
uint64_t v_vstart; /* vector start control register */
uint64_t v_vcsr; /* vector control and status register */
uint8_t v_vdata[]; /* vector register data (sized based on vlenb) */
};
#endif /* !_MACHINE_REG_H_ */
+17 -1
View File
@@ -1,4 +1,4 @@
/* $OpenBSD: riscvreg.h,v 1.5 2022/08/29 02:01:18 jsg Exp $ */
/* $OpenBSD: riscvreg.h,v 1.6 2026/05/09 17:38:50 jsing Exp $ */
/*-
* Copyright (c) 2019 Brian Bamsch <bbamsch@google.com>
@@ -67,6 +67,8 @@
#define MSTATUS_SPIE (1 << 5)
#define MSTATUS_MPIE (1 << 7)
#define MSTATUS_SPP (1 << 8)
#define MSTATUS_VS_SHIFT 9
#define MSTATUS_VS_MASK (0x3 << MSTATUS_VS_SHIFT)
#define MSTATUS_MPP_SHIFT 11
#define MSTATUS_MPP_MASK (0x3 << MSTATUS_MPP_SHIFT)
#define MSTATUS_FS_SHIFT 13
@@ -90,6 +92,12 @@
#define SSTATUS_UPIE (1 << 4)
#define SSTATUS_SPIE (1 << 5)
#define SSTATUS_SPP (1 << 8)
#define SSTATUS_VS_SHIFT 9
#define SSTATUS_VS_MASK (0x3 << SSTATUS_VS_SHIFT)
#define SSTATUS_VS_OFF (0x0 << SSTATUS_VS_SHIFT)
#define SSTATUS_VS_INITIAL (0x1 << SSTATUS_VS_SHIFT)
#define SSTATUS_VS_CLEAN (0x2 << SSTATUS_VS_SHIFT)
#define SSTATUS_VS_DIRTY (0x3 << SSTATUS_VS_SHIFT)
#define SSTATUS_FS_SHIFT 13
#define SSTATUS_FS_MASK (0x3 << SSTATUS_FS_SHIFT)
#define SSTATUS_FS_OFF (0x0 << SSTATUS_FS_SHIFT)
@@ -191,6 +199,14 @@
#define INSN_SIZE 4
#define INSN_C_SIZE 2
/*
* The maximum vector register length in bits (VLEN) is 65,536, as
* documented in section 31.2 of the The RISC-V Instruction Set Manual,
* Volume I: Unprivileged Architecture (Version 20260120).
*/
#define VLEN_MAX 65536
#define VLEN_BYTES_MAX (VLEN_MAX / 8)
// Check if val can fit in the CSR immediate form
#define CSR_ZIMM(val) \
(__builtin_constant_p(val) && ((u_long)(val) < 32))
+22 -9
View File
@@ -1,4 +1,4 @@
/* $OpenBSD: cpu.c,v 1.27 2026/05/02 14:09:17 jsing Exp $ */
/* $OpenBSD: cpu.c,v 1.28 2026/05/09 17:38:50 jsing Exp $ */
/*
* Copyright (c) 2016 Dale Rahn <drahn@dalerahn.com>
@@ -90,6 +90,8 @@ const struct vendor {
{ 0, NULL, NULL }
};
size_t riscv_vlenb;
unsigned long riscv_hwcap;
unsigned long riscv_hwcap2;
@@ -168,6 +170,7 @@ cpu_identify(struct cpu_info *ci)
const char *arch_name = NULL;
struct arch *archlist = cpu_arch_none;
unsigned long cpu_hwcap, cpu_hwcap2;
size_t vlenb;
char *names;
char *name;
char *end;
@@ -274,6 +277,23 @@ cpu_identify(struct cpu_info *ci)
ci->ci_dev->dv_xname, riscv_hwcap2, cpu_hwcap2);
riscv_hwcap2 &= cpu_hwcap2;
}
if ((riscv_hwcap & HWCAP_ISA_V) != 0) {
vlenb = csr_read(vlenb);
if (CPU_IS_PRIMARY(ci)) {
if (vlenb > VLEN_BYTES_MAX) {
printf("%s: vlenb exceeds maximum (%lu > %d)\n",
ci->ci_dev->dv_xname, vlenb, VLEN_BYTES_MAX);
} else {
riscv_vlenb = vlenb;
}
} else if (riscv_vlenb != vlenb) {
printf("%s: mismatched vlenb (%zu != %lu)\n",
ci->ci_dev->dv_xname, riscv_vlenb, vlenb);
riscv_hwcap &= ~HWCAP_ISA_V;
}
}
}
void
@@ -290,13 +310,6 @@ cpu_identify_cleanup(void)
/* Remove H extension since userland does not need to know about it. */
hwcap &= ~HWCAP_ISA_H;
/* Remove V extensions since they require kernel support. */
hwcap &= ~HWCAP_ISA_V;
hwcap2 &= ~(HWCAP2_ISA_ZVBB | HWCAP2_ISA_ZVBC | HWCAP2_ISA_ZVFH |
HWCAP2_ISA_ZVKG | HWCAP2_ISA_ZVKNED | HWCAP2_ISA_ZVKNHA |
HWCAP2_ISA_ZVKNHB | HWCAP2_ISA_ZVKSED | HWCAP2_ISA_ZVKSH |
HWCAP2_ISA_ZVKT);
}
#ifdef MULTIPROCESSOR
@@ -658,7 +671,7 @@ cpu_start_secondary(void)
riscv_intr_cpu_enable();
cpu_startclock();
csr_clear(sstatus, SSTATUS_FS_MASK);
csr_clear(sstatus, SSTATUS_FS_MASK | SSTATUS_VS_MASK);
csr_set(sie, SIE_SSIE);
atomic_setbits_int(&ci->ci_flags, CPUF_RUNNING);
+2 -2
View File
@@ -1,4 +1,4 @@
/* $OpenBSD: exception.S,v 1.9 2024/10/17 01:57:18 jsg Exp $ */
/* $OpenBSD: exception.S,v 1.10 2026/05/09 17:38:50 jsing Exp $ */
/*-
* Copyright (c) 2015-2018 Ruslan Bukin <br@bsdpad.com>
@@ -107,7 +107,7 @@
li t0, SSTATUS_SUM
csrc sstatus, t0
.endif
li t0, SSTATUS_FS_MASK
li t0, (SSTATUS_FS_MASK | SSTATUS_VS_MASK)
csrc sstatus, t0
csrr t0, stval
sd t0, (TF_STVAL)(sp)
+15 -8
View File
@@ -1,4 +1,4 @@
/* $OpenBSD: machdep.c,v 1.42 2025/11/27 19:15:51 kettenis Exp $ */
/* $OpenBSD: machdep.c,v 1.43 2026/05/09 17:38:50 jsing Exp $ */
/*
* Copyright (c) 2014 Patrick Wildt <patrick@blueri.se>
@@ -293,10 +293,12 @@ cpu_switchto(struct proc *old, struct proc *new)
if (pcb->pcb_flags & PCB_FPU)
fpu_save(old, tf);
if (pcb->pcb_flags & PCB_VECTOR)
vector_save(old, tf);
/* drop FPU state */
tf->tf_sstatus &= ~SSTATUS_FS_MASK;
tf->tf_sstatus |= SSTATUS_FS_OFF;
/* Drop FPU and vector state */
tf->tf_sstatus &= ~(SSTATUS_FS_MASK | SSTATUS_VS_MASK);
tf->tf_sstatus |= SSTATUS_FS_OFF | SSTATUS_VS_OFF;
}
cpu_switchto_asm(old, new);
@@ -404,10 +406,15 @@ setregs(struct proc *p, struct exec_package *pack, u_long stack,
struct trapframe *tf = p->p_addr->u_pcb.pcb_tf;
struct pcb *pcb = &p->p_addr->u_pcb;
/* If we were using the FPU, forget about it. */
pcb->pcb_flags &= ~PCB_FPU;
tf->tf_sstatus &= ~SSTATUS_FS_MASK;
tf->tf_sstatus |= SSTATUS_FS_OFF;
/* If we were using the FPU or vector, forget about it. */
if (pcb->pcb_flags & PCB_VECTOR) {
free(pcb->pcb_vstate, M_SUBPROC,
sizeof(struct vreg) + 32 * riscv_vlenb);
pcb->pcb_vstate = NULL;
}
pcb->pcb_flags &= ~(PCB_FPU | PCB_VECTOR);
tf->tf_sstatus &= ~(SSTATUS_FS_MASK | SSTATUS_VS_MASK);
tf->tf_sstatus |= SSTATUS_FS_OFF | SSTATUS_VS_OFF;
memset(tf, 0, sizeof *tf);
tf->tf_sp = STACKALIGN(stack);
+4 -2
View File
@@ -1,4 +1,4 @@
/* $OpenBSD: sig_machdep.c,v 1.10 2022/03/22 06:49:25 miod Exp $ */
/* $OpenBSD: sig_machdep.c,v 1.11 2026/05/09 17:38:50 jsing Exp $ */
/*
* Copyright (c) 1990 The Regents of the University of California.
@@ -119,9 +119,11 @@ sendsig(sig_t catcher, int sig, sigset_t mask, const siginfo_t *ksip,
/* make the stack aligned */
fp = (struct sigframe *)STACKALIGN(fp);
/* Save FPU state to PCB if necessary. */
/* Save FPU and vector state to PCB if necessary. */
if (p->p_addr->u_pcb.pcb_flags & PCB_FPU)
fpu_save(p, tf);
if (p->p_addr->u_pcb.pcb_flags & PCB_VECTOR)
vector_save(p, tf);
/* Build stack frame for signal trampoline. */
bzero(&frame, sizeof(frame));
+9 -2
View File
@@ -1,4 +1,4 @@
/* $OpenBSD: trap.c,v 1.22 2026/04/05 22:13:21 kettenis Exp $ */
/* $OpenBSD: trap.c,v 1.23 2026/05/09 17:38:50 jsing Exp $ */
/*
* Copyright (c) 2020 Shivam Waghela <shivamwaghela@gmail.com>
@@ -29,6 +29,7 @@
#include <machine/riscvreg.h>
#include <machine/syscall.h>
#include <machine/db_machdep.h>
#include <machine/elf.h>
/* Called from exception.S */
void do_trap_supervisor(struct trapframe *);
@@ -113,9 +114,9 @@ do_trap_supervisor(struct trapframe *frame)
void
do_trap_user(struct trapframe *frame)
{
struct proc *p = curcpu()->ci_curproc;
uint64_t exception;
union sigval sv;
struct proc *p = curcpu()->ci_curproc;
p->p_addr->u_pcb.pcb_tf = frame;
@@ -151,6 +152,12 @@ do_trap_user(struct trapframe *frame)
svc_handler(frame);
break;
case EXCP_ILLEGAL_INSTRUCTION:
if ((riscv_hwcap & HWCAP_ISA_V) != 0 &&
vector_instruction(frame->tf_stval) &&
(frame->tf_sstatus & SSTATUS_VS_MASK) == SSTATUS_VS_OFF) {
vector_load(p);
break;
}
if ((frame->tf_sstatus & SSTATUS_FS_MASK) == SSTATUS_FS_OFF) {
fpu_load(p);
break;
+168
View File
@@ -0,0 +1,168 @@
/* $OpenBSD: vector.c,v 1.1 2026/05/09 17:38:50 jsing Exp $ */
/*
* Copyright (c) 2026 Joel Sing <jsing@openbsd.org>
* Copyright (c) 2020 Dale Rahn <drahn@openbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/malloc.h>
#include <sys/proc.h>
#include <sys/user.h>
#define CSR_VSTART 0x008
#define CSR_VXSAT 0x009
#define CSR_VXRM 0x00a
#define CSR_VCSR 0x00f
#define CSR_VL 0xc20
#define CSR_VTYPE 0xc21
#define CSR_VLENB 0xc22
int
vector_instruction(register_t stval)
{
register_t opcode, funct3, csr;
/*
* Indicate whether the instruction belongs to a vector extension,
* or is a CSR instruction that references a vector CSR register.
*/
opcode = stval & 0x7f;
funct3 = (stval >> 12) & 0x7;
/* LOAD-FP for V. */
if (opcode == 0b0000111 && (funct3 == 0b000 || funct3 == 0b101 ||
funct3 == 0b110 || funct3 == 0b111))
return 1;
/* STORE-FP for V. */
if (opcode == 0b0100111 && (funct3 == 0b000 || funct3 == 0b101 ||
funct3 == 0b110 || funct3 == 0b111))
return 1;
/* OP-V. */
if (opcode == 0b1010111)
return 1;
/* CSR instruction with vector CSR register. */
if (opcode == 0b1110011 && (funct3 == 0b001 || funct3 == 0b010 ||
funct3 == 0b011 || funct3 == 0b101 || funct3 == 0b110 ||
funct3 == 0b111)) {
csr = (stval >> 20) & 0xfff;
return csr == CSR_VSTART || csr == CSR_VXSAT ||
csr == CSR_VXRM || csr == CSR_VCSR || csr == CSR_VL ||
csr == CSR_VTYPE || csr == CSR_VLENB;
}
return 0;
}
void
vector_disable(void)
{
__asm volatile ("csrc sstatus, %0" :: "r"(SSTATUS_VS_MASK));
}
void
vector_enable_clean(void)
{
__asm volatile ("csrc sstatus, %0" :: "r"(SSTATUS_VS_MASK));
__asm volatile ("csrs sstatus, %0" :: "r"(SSTATUS_VS_CLEAN));
}
void
vector_save(struct proc *p, struct trapframe *tf)
{
struct pcb *pcb = &p->p_addr->u_pcb;
struct vreg *v;
if ((tf->tf_sstatus & SSTATUS_VS_MASK) == SSTATUS_VS_OFF ||
(tf->tf_sstatus & SSTATUS_VS_MASK) == SSTATUS_VS_CLEAN)
return;
v = pcb->pcb_vstate;
vector_enable_clean();
__asm volatile ("csrr %0, vtype" : "=r"(v->v_vtype));
__asm volatile ("csrr %0, vl" : "=r"(v->v_vl));
__asm volatile ("csrr %0, vstart" : "=r"(v->v_vstart));
__asm volatile ("csrr %0, vcsr" : "=r"(v->v_vcsr));
__asm volatile (
".option push \n"
".option arch, +zve32x \n"
"vs8r.v v0, (%0) \n"
"vs8r.v v8, (%1) \n"
"vs8r.v v16, (%2) \n"
"vs8r.v v24, (%3) \n"
".option pop \n"
: : "r"(&v->v_vdata[0 * riscv_vlenb]),
"r"(&v->v_vdata[8 * riscv_vlenb]),
"r"(&v->v_vdata[16 * riscv_vlenb]),
"r"(&v->v_vdata[24 * riscv_vlenb]) : "memory"
);
vector_disable();
/* Mark vector as disabled. */
p->p_addr->u_pcb.pcb_tf->tf_sstatus &= ~SSTATUS_VS_MASK;
p->p_addr->u_pcb.pcb_tf->tf_sstatus |= SSTATUS_VS_OFF;
}
void
vector_load(struct proc *p)
{
struct pcb *pcb = &p->p_addr->u_pcb;
struct vreg *v;
KASSERT((pcb->pcb_tf->tf_sstatus & SSTATUS_VS_MASK) == SSTATUS_VS_OFF);
if ((pcb->pcb_flags & PCB_VECTOR) == 0) {
pcb->pcb_vstate = malloc(sizeof(struct vreg) + 32 * riscv_vlenb,
M_SUBPROC, M_ZERO | M_WAITOK);
pcb->pcb_flags |= PCB_VECTOR;
}
v = pcb->pcb_vstate;
vector_enable_clean();
__asm volatile (
".option push \n"
".option arch, +zve32x \n"
"vsetvl x0, %0, %1 \n"
"vl8r.v v0, (%2) \n"
"vl8r.v v8, (%3) \n"
"vl8r.v v16, (%4) \n"
"vl8r.v v24, (%5) \n"
".option pop \n"
: : "r"(v->v_vl), "r"(v->v_vtype),
"r"(&v->v_vdata[0 * riscv_vlenb]),
"r"(&v->v_vdata[8 * riscv_vlenb]),
"r"(&v->v_vdata[16 * riscv_vlenb]),
"r"(&v->v_vdata[24 * riscv_vlenb]) : "memory"
);
__asm volatile ("csrw vstart, %0" : "=r"(v->v_vstart));
__asm volatile ("csrw vcsr, %0" : "=r"(v->v_vcsr));
vector_disable();
/* Mark vector as clean. */
p->p_addr->u_pcb.pcb_tf->tf_sstatus &= ~SSTATUS_VS_MASK;
p->p_addr->u_pcb.pcb_tf->tf_sstatus |= SSTATUS_VS_CLEAN;
}
+22 -4
View File
@@ -1,4 +1,4 @@
/* $OpenBSD: vm_machdep.c,v 1.12 2025/05/21 09:06:58 mpi Exp $ */
/* $OpenBSD: vm_machdep.c,v 1.13 2026/05/09 17:38:50 jsing Exp $ */
/*-
* Copyright (c) 1995 Charles M. Hannum. All rights reserved.
@@ -39,8 +39,9 @@
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/buf.h>
#include <sys/malloc.h>
#include <sys/proc.h>
#include <sys/user.h>
#include <uvm/uvm_extern.h>
@@ -66,12 +67,22 @@ cpu_fork(struct proc *p1, struct proc *p2, void *stack, void *tcb,
CTASSERT((sizeof(struct trapframe) & STACKALIGNBYTES) == 0);
CTASSERT((sizeof(struct switchframe) & STACKALIGNBYTES) == 0);
/* Save FPU state to PCB if necessary. */
/* Save FPU and vector state to PCB if necessary. */
if (pcb1->pcb_flags & PCB_FPU)
fpu_save(p1, pcb1->pcb_tf);
if (pcb1->pcb_flags & PCB_VECTOR)
vector_save(p1, pcb1->pcb_tf);
/* Copy the pcb. */
*pcb = p1->p_addr->u_pcb;
*pcb = *pcb1;
/* If vector state exists, we need to copy it. */
if (pcb1->pcb_flags & PCB_VECTOR) {
pcb->pcb_vstate = malloc(sizeof(struct vreg) + 32 * riscv_vlenb,
M_SUBPROC, M_WAITOK);
memcpy(pcb->pcb_vstate, pcb1->pcb_vstate,
sizeof(struct vreg) + 32 * riscv_vlenb);
}
pmap_activate(p2);
@@ -107,6 +118,13 @@ cpu_fork(struct proc *p1, struct proc *p2, void *stack, void *tcb,
void
cpu_exit(struct proc *p)
{
struct pcb *pcb = &p->p_addr->u_pcb;
if (pcb->pcb_flags & PCB_VECTOR) {
free(pcb->pcb_vstate, M_SUBPROC,
sizeof(struct vreg) + 32 * riscv_vlenb);
pcb->pcb_vstate = NULL;
}
}
struct kmem_va_mode kv_physwait = {