Compare commits

...

4 Commits

Author SHA1 Message Date
KK
8d1fb991fa Add voice control 2026-06-18 15:13:57 +02:00
KK
89e9f63019 remove tmux from .profile 2026-06-18 15:12:40 +02:00
KK
a00ad450bf Add script for scratchpad to ask stuff 2026-06-18 15:12:10 +02:00
KK
5ea5794ce6 Update command list to search internet and use @ 2026-06-18 15:11:49 +02:00
6 changed files with 154 additions and 19 deletions

View File

@@ -70,6 +70,7 @@ exec swayidle -w \
input * {
xkb_layout pl
xkb_options caps:none
repeat_rate 80
repeat_delay 300
}
@@ -86,14 +87,6 @@ input "1267:12734:ELAN06FA:00_04F3:31BE_Touchpad" {
scroll_factor 0.8
}
# Export important Wayland/Sway session variables to D-Bus and the systemd
# user manager. This lets apps and user services launched outside the shell
# know which Wayland display to use and that the current desktop is Sway.
# Helps with portals, screen sharing, notifications, and D-Bus activated apps.
exec dbus-update-activation-environment --systemd WAYLAND_DISPLAY XDG_CURRENT_DESKTOP=sway
exec systemctl --user import-environment WAYLAND_DISPLAY XDG_CURRENT_DESKTOP
### Key bindings
#
@@ -210,6 +203,9 @@ exec systemctl --user import-environment WAYLAND_DISPLAY XDG_CURRENT_DESKTOP
# Show the next scratchpad window or hide the focused scratchpad window.
# If there are multiple scratchpad windows, this command cycles through them.
bindsym $mod+minus scratchpad show
# Dedicated scratchpad terminal for voice queries to pi.
for_window [app_id="pi-voice"] border pixel 2, move scratchpad
#
# Resizing containers:
#
@@ -232,7 +228,9 @@ mode "resize" {
# Return to default mode
bindsym Return mode "default"
bindsym Escape mode "default"
bindsym $mod+r mode "default"
}
bindsym $mod+r mode "resize"
#
# Utilities:
@@ -249,6 +247,16 @@ bindsym $mod+r mode "resize"
bindsym Print exec sh -c 'mkdir -p ~/screenshots; f=~/screenshots/$(date +%F-%H%M%S).png; grim "$f"; wl-copy < "$f"'
bindsym Shift+Print exec sh -c 'mkdir -p ~/screenshots; f=~/screenshots/$(date +%F-%H%M%S).png; grim -g "$(slurp)" "$f"; wl-copy < "$f"'
# Push-to-talk: hold Caps Lock to record, release to transcribe.
# xkb_options caps:none above disables the normal Caps Lock toggle, so bind by keycode.
bindcode 66 exec sh -c 'voxtype record start; pkill -USR1 -f "$HOME/.config/sway/status.sh"'
bindcode --release 66 exec sh -c 'voxtype record stop; pkill -USR1 -f "$HOME/.config/sway/status.sh"'
# Voice query to pi in a dedicated tmux session/scratchpad terminal.
# Hold $mod+Shift+x, speak, release; transcription is submitted to pi.
bindsym $mod+Shift+x exec ~/bin/pi-voice-query start
bindsym --release $mod+Shift+x exec ~/bin/pi-voice-query stop
# Pick an entry from clipboard history and copy it back to the clipboard
bindsym $mod+c exec sh -c 'cliphist list | wmenu -l 10 | cliphist decode | wl-copy'

View File

@@ -7,14 +7,21 @@ print_status() {
brightness=$(brightnessctl -m 2>/dev/null | awk -F, '{print $4}')
printf 'VOL %s | BRI %s | %s\n' "${volume:-n/a}" "${brightness:-n/a}" "$(date +'%Y-%m-%d %H:%M:%S')"
voxtype_state=$(voxtype status 2>/dev/null)
case "$voxtype_state" in
recording) voxtype=' | VOX REC' ;;
transcribing) voxtype=' | VOX ...' ;;
*) voxtype='' ;;
esac
printf 'VOL %s | BRI %s%s | %s\n' "${volume:-n/a}" "${brightness:-n/a}" "$voxtype" "$(date +'%Y-%m-%d %H:%M:%S')"
}
trap print_status USR1
print_status
while :; do
sleep 60 &
sleep 1 &
wait $!
print_status
done

View File

@@ -33,6 +33,6 @@ fi
if [ -z "$WAYLAND_DISPLAY" ] && [ "$XDG_VTNR" = 1 ]; then
export WLR_NO_HARDWARE_CURSORS=1
exec tmux new-session -A -s debug sway --unsupported-gpu
exec sway --unsupported-gpu
fi

View File

@@ -168,11 +168,15 @@ if result.returncode != 0:
exit(result.returncode)
choice = result.stdout.strip()
val = vals[choice]
if val["kind"] == "bin":
subprocess.run([val["path"]])
elif val["kind"] == "app":
subprocess.run(["gtk-launch", val["name"]])
elif val["kind"] == "url":
subprocess.run(["xdg-open", val["url"]])
if choice[0] == '@':
choice = choice[1:]
val = vals.get(choice)
if val is not None:
if val["kind"] == "bin":
subprocess.run([val["path"]])
elif val["kind"] == "app":
subprocess.run(["gtk-launch", val["name"]])
elif val["kind"] == "url":
subprocess.run(["xdg-open", val["url"]])
else:
subprocess.run(["xdg-open", "https://www.google.com/search?q=" + choice])

95
bin/pi-voice-query Executable file
View File

@@ -0,0 +1,95 @@
#!/bin/sh
set -eu
SESSION="pi-voice"
APP_ID="pi-voice"
TITLE="Pi Voice"
OUT="${XDG_RUNTIME_DIR:-/tmp}/pi-voice-query.txt"
STATUS_SCRIPT="$HOME/.config/sway/status.sh"
refresh_bar() {
pkill -USR1 -f "$STATUS_SCRIPT" 2>/dev/null || true
}
position_pi_window() {
# Size scratchpad window to 60% of the focused output using absolute pixels.
# Doing this here is more reliable than a for_window percentage resize.
set -- $(swaymsg -t get_outputs 2>/dev/null | python3 -c '
import json, sys
outs = json.load(sys.stdin)
out = next((o for o in outs if o.get("focused")), None) or next((o for o in outs if o.get("active")), None)
rect = (out or {}).get("rect", {})
w = int(rect.get("width", 1200) * 0.6)
h = int(rect.get("height", 800) * 0.6)
print(w, h)
')
w=${1:-1200}
h=${2:-800}
swaymsg '[app_id="'"$APP_ID"'"] border pixel 2, resize set width '"$w"' px height '"$h"' px, move position center' >/dev/null 2>&1 || true
}
ensure_pi_terminal() {
if ! tmux has-session -t "$SESSION" 2>/dev/null; then
tmux new-session -d -s "$SESSION" 'pi'
fi
if command -v swaymsg >/dev/null 2>&1; then
if ! swaymsg -t get_tree 2>/dev/null | grep -q '"app_id": "'"$APP_ID"'"'; then
foot --app-id="$APP_ID" --title="$TITLE" \
--override=main.resize-by-cells=no \
--override=colors.alpha=1.0 \
--override=main.pad=0x0 \
tmux attach-session -t "$SESSION" >/dev/null 2>&1 &
# Give the window a moment to appear before trying to show/focus it.
sleep 0.5
fi
swaymsg '[app_id="'"$APP_ID"'"] scratchpad show, focus' >/dev/null 2>&1 || true
position_pi_window
fi
}
send_to_pi() {
text=$1
[ -n "$text" ] || exit 0
ensure_pi_terminal
# Paste literally, then press Enter to submit to pi.
printf '%s' "$text" | tmux load-buffer -
tmux paste-buffer -t "$SESSION"
tmux send-keys -t "$SESSION" Enter
}
case "${1:-}" in
start)
ensure_pi_terminal
: > "$OUT"
voxtype record start --file="$OUT"
refresh_bar
;;
stop)
voxtype record stop
refresh_bar
# Wait for transcription to land in OUT. Stop early once voxtype is idle
# and the file has text, otherwise allow slower large-model runs.
i=0
while [ "$i" -lt 180 ]; do
if [ -s "$OUT" ]; then
break
fi
state=$(voxtype status 2>/dev/null || true)
[ "$state" = idle ] && [ "$i" -gt 2 ] && break
sleep 0.5
i=$((i + 1))
done
refresh_bar
text=$(cat "$OUT" 2>/dev/null || true)
send_to_pi "$text"
;;
*)
echo "usage: $0 start|stop" >&2
exit 2
;;
esac

View File

@@ -82,3 +82,24 @@ ssh-keygen -t ed25519 -f ~/.ssh/id_ed25519 -N ""
# Extra
## Clipboard history
# sudo apt install cliphist
# Install
curl -LO https://github.com/peteonrails/voxtype/releases/download/v0.6.0/voxtype_0.6.0-1_amd64.deb
sudo dpkg -i voxtype_0.6.0-1_amd64.deb
rm voxtype_0.6.0-1_amd64.deb
sudo apt install wtype wl-clipboard libnotify-bin pipewire-alsa playerctl
sudo usermod -aG input $USER
read -r -p "Enable GPU support? [y/N] " answer
case "$answer" in
[yY]|[yY][eE][sS])
sudo voxtype setup gpu --enable
;;
esac
# Interactive model selection and systemd setup
voxtype setup
voxtype setup systemd
voxtype setup model