refractor gemini-categorize-wallpaper.sh

This commit is contained in:
end-4
2025-10-14 10:19:09 +02:00
parent 28756860aa
commit fd1d74ada1
@@ -1,33 +1,45 @@
#!/usr/bin/env bash #!/usr/bin/env bash
if [[ -z "$1" ]]; then if [[ -z "$1" ]]; then
echo "Usage: $0 <image_path>" echo "Usage: $0 <image_path> [model] [prompt]"
echo "Tip: set GEMINI_WALLPAPER_MODEL and/or GEMINI_WALLPAPER_PROMPT to provide defaults."
exit 1 exit 1
fi fi
# Variables
SOURCE_IMG_PATH="$1" SOURCE_IMG_PATH="$1"
MODEL="${2:-${GEMINI_WALLPAPER_MODEL:-gemini-2.5-flash-lite}}" # We use the flash variant so it's fast
WALLPAPER_NAME="$(basename "$SOURCE_IMG_PATH")" WALLPAPER_NAME="$(basename "$SOURCE_IMG_PATH")"
PROMPT="${3:-${GEMINI_WALLPAPER_PROMPT:-Categorize the wallpaper. Its file name is $WALLPAPER_NAME}}"
RESIZED_IMG_PATH="/tmp/quickshell/ai/wallpaper.jpg" RESIZED_IMG_PATH="/tmp/quickshell/ai/wallpaper.jpg"
# Resize image for speed
magick "$SOURCE_IMG_PATH" -resize 200x -quality 50 "$RESIZED_IMG_PATH" magick "$SOURCE_IMG_PATH" -resize 200x -quality 50 "$RESIZED_IMG_PATH"
# Get API key
API_KEY=$(secret-tool lookup 'application' 'illogical-impulse' | jq -r '.apiKeys.gemini') API_KEY=$(secret-tool lookup 'application' 'illogical-impulse' | jq -r '.apiKeys.gemini')
# Encode image to base64
if [[ "$(base64 --version 2>&1)" = *"FreeBSD"* ]]; then if [[ "$(base64 --version 2>&1)" = *"FreeBSD"* ]]; then
B64FLAGS="--input" B64FLAGS="--input"
else else
B64FLAGS="-w0" B64FLAGS="-w0"
fi fi
B64DATA="$(base64 $B64FLAGS $RESIZED_IMG_PATH)"
# echo $B64DATA
# Prepare request data
payload='{ payload='{
"contents": [{ "contents": [{
"parts":[ "parts":[
{ {
"inline_data": { "inline_data": {
"mime_type":"image/jpeg", "mime_type":"image/jpeg",
"data": "'"$(base64 $B64FLAGS $RESIZED_IMG_PATH)"'" "data": "'"$B64DATA"'"
} }
}, },
{"text": "Categorize the wallpaper. Its file name is '"$WALLPAPER_NAME"'"} {"text": "'"$PROMPT"'"}
] ]
}], }],
"generationConfig": { "generationConfig": {
"responseMimeType": "text/x.enum", "responseMimeType": "text/x.enum",
@@ -35,14 +47,18 @@ payload='{
"type": "string", "type": "string",
"enum": [ "abstract", "anime", "city", "minimalist", "landscape", "plants", "person", "space" ] "enum": [ "abstract", "anime", "city", "minimalist", "landscape", "plants", "person", "space" ]
}, },
"temperature": 0, "temperature": 0
} }
}' }'
# echo "$payload" | jq # echo "$payload" | jq
response=$(curl "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent" \
# Make the request
response=$(curl "https://generativelanguage.googleapis.com/v1beta/models/${MODEL}:generateContent" \
-H "x-goog-api-key: $API_KEY" \ -H "x-goog-api-key: $API_KEY" \
-H 'Content-Type: application/json' \ -H 'Content-Type: application/json' \
-X POST \ -X POST \
-d "$payload" 2> /dev/null) -d "$payload" 2> /dev/null)
# echo "$response" | jq
# Write the result
echo "$response" | jq -r '.candidates[0].content.parts[0].text' echo "$response" | jq -r '.candidates[0].content.parts[0].text'