use quickshell region selector for ocr

This commit is contained in:
end-4
2025-10-24 00:26:47 +02:00
parent 6f756f48cb
commit 3bd699c9e6
7 changed files with 259 additions and 164 deletions
+4 -1
View File
@@ -63,7 +63,10 @@ bind = Super+Shift, S, exec, qs -c $qsConfig ipc call TEST_ALIVE || pidof slurp
bind = Super+Shift, A, global, quickshell:regionSearch # Google Lens
bind = Super+Shift, A, exec, qs -c $qsConfig ipc call TEST_ALIVE || pidof slurp || ~/.config/hypr/hyprland/scripts/snip_to_search.sh # [hidden] Google Lens (fallback)
# OCR
bind = Super+Shift, T,exec,grim -g "$(slurp $SLURP_ARGS)" "/tmp/ocr_image.png" && tesseract "/tmp/ocr_image.png" - | wl-copy && rm "/tmp/ocr_image.png" # [hidden]
bind = Super+Shift, X, global, quickshell:regionOcr # Character recognition >> clipboard
bind = Super+Shift, T, global, quickshell:regionOcr # [hidden]
bind = Super+Shift, X,exec, qs -c $qsConfig ipc call TEST_ALIVE || pidof slurp || grim -g "$(slurp $SLURP_ARGS)" "/tmp/ocr_image.png" && tesseract "/tmp/ocr_image.png" - | wl-copy && rm "/tmp/ocr_image.png" # [hidden]
bind = Super+Shift, T,exec, qs -c $qsConfig ipc call TEST_ALIVE || pidof slurp || grim -g "$(slurp $SLURP_ARGS)" "/tmp/ocr_image.png" && tesseract "/tmp/ocr_image.png" - | wl-copy && rm "/tmp/ocr_image.png" # [hidden]
# Color picker
bindd = Super+Shift, C, Color picker, exec, hyprpicker -a # Pick color (Hex) >> clipboard
# Fullscreen screenshot
@@ -45,7 +45,7 @@ Flow {
rightmost: index === root.options.length - 1
buttonIcon: modelData.icon || ""
buttonText: modelData.displayName
toggled: root.currentValue === modelData.value
toggled: root.currentValue == modelData.value
onClicked: {
root.selected(modelData.value);
}
@@ -0,0 +1,70 @@
pragma ComponentBehavior: Bound
import qs
import qs.modules.common
import qs.modules.common.functions
import qs.modules.common.widgets
import qs.services
import QtQuick
import QtQuick.Controls
import QtQuick.Layouts
import Qt5Compat.GraphicalEffects
import Quickshell
import Quickshell.Io
import Quickshell.Wayland
import Quickshell.Hyprland
// Options toolbar
Toolbar {
id: root
// Use a synchronizer on these
property var action
property var selectionMode
MaterialCookie {
Layout.fillHeight: true
Layout.leftMargin: 2
Layout.rightMargin: 2
implicitSize: 36 // Intentionally smaller because this one is brighter than others
sides: 10
amplitude: implicitSize / 44
color: Appearance.colors.colPrimary
MaterialSymbol {
anchors.centerIn: parent
iconSize: 22
color: Appearance.colors.colOnPrimary
animateChange: true
text: switch (root.action) {
case RegionSelection.SnipAction.Copy:
case RegionSelection.SnipAction.Edit:
return "content_cut";
case RegionSelection.SnipAction.Search:
return "image_search";
case RegionSelection.SnipAction.CharRecognition:
return "document_scanner";
default:
return "";
}
}
}
IconAndTextToolbarButton {
iconText: "activity_zone"
text: Translation.tr("Rect")
toggled: root.selectionMode === RegionSelection.SelectionMode.RectCorners
onClicked: root.selectionMode = RegionSelection.SelectionMode.RectCorners
}
IconAndTextToolbarButton {
iconText: "gesture"
text: Translation.tr("Circle")
toggled: root.selectionMode === RegionSelection.SelectionMode.Circle
onClicked: root.selectionMode = RegionSelection.SelectionMode.Circle
}
IconToolbarButton {
text: "close"
colBackground: Appearance.colors.colLayer3
onClicked: root.dismiss();
}
}
@@ -0,0 +1,76 @@
pragma Singleton
import Quickshell
Singleton {
id: root
function intersectionOverUnion(regionA, regionB) {
// region: { at: [x, y], size: [w, h] }
const ax1 = regionA.at[0], ay1 = regionA.at[1];
const ax2 = ax1 + regionA.size[0], ay2 = ay1 + regionA.size[1];
const bx1 = regionB.at[0], by1 = regionB.at[1];
const bx2 = bx1 + regionB.size[0], by2 = by1 + regionB.size[1];
const interX1 = Math.max(ax1, bx1);
const interY1 = Math.max(ay1, by1);
const interX2 = Math.min(ax2, bx2);
const interY2 = Math.min(ay2, by2);
const interArea = Math.max(0, interX2 - interX1) * Math.max(0, interY2 - interY1);
const areaA = (ax2 - ax1) * (ay2 - ay1);
const areaB = (bx2 - bx1) * (by2 - by1);
const unionArea = areaA + areaB - interArea;
return unionArea > 0 ? interArea / unionArea : 0;
}
function filterOverlappingImageRegions(regions) {
let keep = [];
let removed = new Set();
for (let i = 0; i < regions.length; ++i) {
if (removed.has(i)) continue;
let regionA = regions[i];
for (let j = i + 1; j < regions.length; ++j) {
if (removed.has(j)) continue;
let regionB = regions[j];
if (intersectionOverUnion(regionA, regionB) > 0) {
// Compare areas
let areaA = regionA.size[0] * regionA.size[1];
let areaB = regionB.size[0] * regionB.size[1];
if (areaA <= areaB) {
removed.add(j);
} else {
removed.add(i);
}
}
}
}
for (let i = 0; i < regions.length; ++i) {
if (!removed.has(i)) keep.push(regions[i]);
}
return keep;
}
function filterWindowRegionsByLayers(windowRegions, layerRegions) {
return windowRegions.filter(windowRegion => {
for (let i = 0; i < layerRegions.length; ++i) {
if (intersectionOverUnion(windowRegion, layerRegions[i]) > 0)
return false;
}
return true;
});
}
function filterImageRegions(regions, windowRegions, threshold = 0.1) {
// Remove image regions that overlap too much with any window region
let filtered = regions.filter(region => {
for (let i = 0; i < windowRegions.length; ++i) {
if (intersectionOverUnion(region, windowRegions[i]) > threshold)
return false;
}
return true;
});
// Remove overlapping image regions, keep only the smaller one
return filterOverlappingImageRegions(filtered);
}
}
@@ -1,18 +1,15 @@
pragma ComponentBehavior: Bound
import qs
import qs.modules.common
import qs.modules.common.functions
import qs.modules.common.widgets
import qs.services
import QtQuick
import QtQuick.Controls
import QtQuick.Layouts
import Qt5Compat.GraphicalEffects
import Quickshell
import Quickshell.Io
import Quickshell.Wayland
import Quickshell.Widgets
import Quickshell.Hyprland
import Qt.labs.synchronizer
PanelWindow {
id: root
@@ -29,7 +26,7 @@ PanelWindow {
}
// TODO: Ask: sidebar AI; Ocr: tesseract
enum SnipAction { Copy, Edit, Search }
enum SnipAction { Copy, Edit, Search, CharRecognition }
enum SelectionMode { RectCorners, Circle }
property var action: RegionSelection.SnipAction.Copy
property var selectionMode: RegionSelection.SelectionMode.RectCorners
@@ -39,8 +36,6 @@ PanelWindow {
property string imageSearchEngineBaseUrl: Config.options.search.imageSearch.imageSearchEngineBaseUrl
property string fileUploadApiEndpoint: "https://uguu.se/upload"
property color overlayColor: "#88111111"
property color genericContentColor: Qt.alpha(root.overlayColor, 0.9)
property color genericContentForeground: "#ddffffff"
property color brightText: Appearance.m3colors.darkmode ? Appearance.colors.colOnLayer0 : Appearance.colors.colLayer0
property color brightSecondary: Appearance.m3colors.darkmode ? Appearance.colors.colSecondary : Appearance.colors.colOnSecondary
property color brightTertiary: Appearance.m3colors.darkmode ? Appearance.colors.colTertiary : Qt.lighter(Appearance.colors.colPrimary)
@@ -76,7 +71,7 @@ PanelWindow {
property list<point> points: []
property var mouseButton: null
property var imageRegions: []
readonly property list<var> windowRegions: filterWindowRegionsByLayers(
readonly property list<var> windowRegions: RegionFunctions.filterWindowRegionsByLayers(
root.windows.filter(w => w.workspace.id === root.activeWorkspaceId),
root.layerRegions
).map(window => {
@@ -109,6 +104,7 @@ PanelWindow {
});
return offsetAdjustedLayers;
}
property list<var> textRegions: []
property bool isCircleSelection: (root.selectionMode === RegionSelection.SelectionMode.Circle)
property bool enableWindowRegions: Config.options.regionSelector.targetRegions.windows && !isCircleSelection
@@ -131,76 +127,6 @@ PanelWindow {
root.regionHeight = root.targetedRegionHeight;
}
function intersectionOverUnion(regionA, regionB) {
// region: { at: [x, y], size: [w, h] }
const ax1 = regionA.at[0], ay1 = regionA.at[1];
const ax2 = ax1 + regionA.size[0], ay2 = ay1 + regionA.size[1];
const bx1 = regionB.at[0], by1 = regionB.at[1];
const bx2 = bx1 + regionB.size[0], by2 = by1 + regionB.size[1];
const interX1 = Math.max(ax1, bx1);
const interY1 = Math.max(ay1, by1);
const interX2 = Math.min(ax2, bx2);
const interY2 = Math.min(ay2, by2);
const interArea = Math.max(0, interX2 - interX1) * Math.max(0, interY2 - interY1);
const areaA = (ax2 - ax1) * (ay2 - ay1);
const areaB = (bx2 - bx1) * (by2 - by1);
const unionArea = areaA + areaB - interArea;
return unionArea > 0 ? interArea / unionArea : 0;
}
function filterOverlappingImageRegions(regions) {
let keep = [];
let removed = new Set();
for (let i = 0; i < regions.length; ++i) {
if (removed.has(i)) continue;
let regionA = regions[i];
for (let j = i + 1; j < regions.length; ++j) {
if (removed.has(j)) continue;
let regionB = regions[j];
if (intersectionOverUnion(regionA, regionB) > 0) {
// Compare areas
let areaA = regionA.size[0] * regionA.size[1];
let areaB = regionB.size[0] * regionB.size[1];
if (areaA <= areaB) {
removed.add(j);
} else {
removed.add(i);
}
}
}
}
for (let i = 0; i < regions.length; ++i) {
if (!removed.has(i)) keep.push(regions[i]);
}
return keep;
}
function filterWindowRegionsByLayers(windowRegions, layerRegions) {
return windowRegions.filter(windowRegion => {
for (let i = 0; i < layerRegions.length; ++i) {
if (intersectionOverUnion(windowRegion, layerRegions[i]) > 0)
return false;
}
return true;
});
}
function filterImageRegions(regions, windowRegions, threshold = 0.1) {
// Remove image regions that overlap too much with any window region
let filtered = regions.filter(region => {
for (let i = 0; i < windowRegions.length; ++i) {
if (intersectionOverUnion(region, windowRegions[i]) > threshold)
return false;
}
return true;
});
// Remove overlapping image regions, keep only the smaller one
return filterOverlappingImageRegions(filtered);
}
function updateTargetedRegion(x, y) {
// Image regions
const clickedRegion = root.imageRegions.find(region => {
@@ -255,7 +181,8 @@ PanelWindow {
command: ["bash", "-c", `mkdir -p '${StringUtils.shellSingleQuoteEscape(root.screenshotDir)}' && grim -o '${StringUtils.shellSingleQuoteEscape(root.screen.name)}' '${StringUtils.shellSingleQuoteEscape(root.screenshotPath)}'`]
onExited: (exitCode, exitStatus) => {
root.visible = true;
imageDetectionProcess.running = true;
if (root.enableContentRegions) imageDetectionProcess.running = true;
// if (root.action === RegionSelection.SnipAction.CharRecognition) ocrProc.running = true;
}
}
@@ -269,7 +196,7 @@ PanelWindow {
stdout: StdioCollector {
id: imageDimensionCollector
onStreamFinished: {
imageRegions = filterImageRegions(
imageRegions = RegionFunctions.filterImageRegions(
JSON.parse(imageDimensionCollector.text),
root.windowRegions
);
@@ -277,6 +204,41 @@ PanelWindow {
}
}
Process {
id: ocrProc
command: ["bash", "-c", `tesseract '${StringUtils.shellSingleQuoteEscape(root.screenshotPath)}' stdout tsv 2>/dev/null`]
stdout: StdioCollector {
id: outputCollector
onStreamFinished: {
// level page_num block_num par_num line_num word_num left top width height conf text
const output = outputCollector.text
const lines = output.split("\n").slice(1) // Skip header
const filteredLines = lines.filter(line => (!line.trim().endsWith("-1")))
let regions = filteredLines.map(line => {
const parts = line.split("\t")
return ({
"block_num": parseInt(parts[2]),
"line_num": parseInt(parts[4]),
"word_num": parseInt(parts[5]),
"left": parseInt(parts[6]),
"top": parseInt(parts[7]),
"width": parseInt(parts[8]),
"height": parseInt(parts[9]),
"conf": parseInt(parts[10]),
"text": parts.slice(11).join("\t")
})
}).filter(region => {
if (region === null) return false;
// if (region.text.length <= 3 && region.text.replace(/[^a-zA-Z0-9]/g, "").length < region.text.length / 2) return false;
// if (region.text.length < 2) return false;
return true;
})
// print(`[Region Selector] OCR Regions: ${JSON.stringify(regions, null, 2)}`)
root.textRegions = regions;
}
}
}
function snip() {
// Validity check
if (root.regionWidth <= 0 || root.regionHeight <= 0) {
@@ -314,6 +276,9 @@ PanelWindow {
case RegionSelection.SnipAction.Search:
snipProc.command = ["bash", "-c", `${cropInPlace} && xdg-open "${root.imageSearchEngineBaseUrl}$(${uploadAndGetUrl(root.screenshotPath)})" && ${cleanup}`]
break;
case RegionSelection.SnipAction.CharRecognition:
snipProc.command = ["bash", "-c", `${cropInPlace} && tesseract '${StringUtils.shellSingleQuoteEscape(root.screenshotPath)}' - | wl-copy && ${cleanup}`]
break;
default:
console.warn("[Region Selector] Unknown snip action, skipping snip.");
root.dismiss();
@@ -432,6 +397,7 @@ PanelWindow {
delegate: TargetRegion {
z: 2
required property var modelData
clientDimensions: modelData
showIcon: true
targeted: !root.draggedAway &&
(root.targetedRegionX === modelData.at[0]
@@ -439,21 +405,9 @@ PanelWindow {
&& root.targetedRegionWidth === modelData.size[0]
&& root.targetedRegionHeight === modelData.size[1])
colBackground: root.genericContentColor
colForeground: root.genericContentForeground
opacity: root.draggedAway ? 0 : root.targetRegionOpacity
visible: opacity > 0
Behavior on opacity {
animation: Appearance.animation.elementMoveFast.numberAnimation.createObject(this)
}
x: modelData.at[0]
y: modelData.at[1]
width: modelData.size[0]
height: modelData.size[1]
borderColor: root.windowBorderColor
fillColor: targeted ? root.windowFillColor : "transparent"
border.width: targeted ? 4 : 2
text: `${modelData.class}`
radius: Appearance.rounding.windowRounding
}
@@ -467,27 +421,16 @@ PanelWindow {
delegate: TargetRegion {
z: 3
required property var modelData
clientDimensions: modelData
targeted: !root.draggedAway &&
(root.targetedRegionX === modelData.at[0]
&& root.targetedRegionY === modelData.at[1]
&& root.targetedRegionWidth === modelData.size[0]
&& root.targetedRegionHeight === modelData.size[1])
colBackground: root.genericContentColor
colForeground: root.genericContentForeground
opacity: root.draggedAway ? 0 : root.targetRegionOpacity
visible: opacity > 0
Behavior on opacity {
animation: Appearance.animation.elementMoveFast.numberAnimation.createObject(this)
}
x: modelData.at[0]
y: modelData.at[1]
width: modelData.size[0]
height: modelData.size[1]
borderColor: root.windowBorderColor
fillColor: targeted ? root.windowFillColor : "transparent"
border.width: targeted ? 4 : 2
text: `${modelData.namespace}`
radius: Appearance.rounding.windowRounding
}
@@ -501,33 +444,48 @@ PanelWindow {
delegate: TargetRegion {
z: 4
required property var modelData
clientDimensions: modelData
targeted: !root.draggedAway &&
(root.targetedRegionX === modelData.at[0]
&& root.targetedRegionY === modelData.at[1]
&& root.targetedRegionWidth === modelData.size[0]
&& root.targetedRegionHeight === modelData.size[1])
colBackground: root.genericContentColor
colForeground: root.genericContentForeground
opacity: root.draggedAway ? 0 : root.contentRegionOpacity
visible: opacity > 0
Behavior on opacity {
animation: Appearance.animation.elementMoveFast.numberAnimation.createObject(this)
}
x: modelData.at[0]
y: modelData.at[1]
width: modelData.size[0]
height: modelData.size[1]
borderColor: root.imageBorderColor
fillColor: targeted ? root.imageFillColor : "transparent"
border.width: targeted ? 4 : 2
text: Translation.tr("Content region")
}
}
// OCR text regions
// Repeater {
// model: ScriptModel {
// values: root.textRegions
// }
// delegate: Rectangle {
// id: textRegionItem
// z: 5
// required property var modelData
// property real padding: 4
// color: ColorUtils.transparentize(Appearance.colors.colTooltip, 0.3)
// radius: 6
// x: modelData.left - padding
// y: modelData.top - padding
// width: modelData.width + padding
// height: modelData.height + padding
// StyledText {
// font.pixelSize: Appearance.font.pixelSize.smallie
// anchors.centerIn: parent
// text: textRegionItem.modelData.text
// color: ColorUtils.transparentize(Appearance.colors.colOnTooltip, 0.2)
// }
// }
// }
// Options toolbar
Toolbar {
OptionsToolbar {
id: toolbar
z: 9999
anchors {
@@ -551,49 +509,11 @@ PanelWindow {
animation: Appearance.animation.elementMove.numberAnimation.createObject(this)
}
MaterialCookie {
Layout.fillHeight: true
Layout.leftMargin: 2
Layout.rightMargin: 2
implicitSize: 36 // Intentionally smaller because this one is brighter than others
sides: 10
amplitude: implicitSize / 44
color: Appearance.colors.colPrimary
MaterialSymbol {
anchors.centerIn: parent
iconSize: 22
color: Appearance.colors.colOnPrimary
animateChange: true
text: switch (root.action) {
case RegionSelection.SnipAction.Copy:
case RegionSelection.SnipAction.Edit:
return "content_cut";
case RegionSelection.SnipAction.Search:
return "image_search";
default:
return "";
}
}
Synchronizer on action {
property alias source: root.action
}
IconAndTextToolbarButton {
iconText: "activity_zone"
text: Translation.tr("Rect")
toggled: root.selectionMode === RegionSelection.SelectionMode.RectCorners
onClicked: root.selectionMode = RegionSelection.SelectionMode.RectCorners
}
IconAndTextToolbarButton {
iconText: "gesture"
text: Translation.tr("Circle")
toggled: root.selectionMode === RegionSelection.SelectionMode.Circle
onClicked: root.selectionMode = RegionSelection.SelectionMode.Circle
}
IconToolbarButton {
text: "close"
colBackground: Appearance.colors.colLayer3
onClicked: root.dismiss();
Synchronizer on selectionMode {
property alias source: root.selectionMode
}
}
}
@@ -56,6 +56,12 @@ Scope {
GlobalStates.regionSelectorOpen = true
}
function ocr() {
root.action = RegionSelection.SnipAction.CharRecognition
root.selectionMode = RegionSelection.SelectionMode.RectCorners
GlobalStates.regionSelectorOpen = true
}
IpcHandler {
target: "region"
@@ -65,6 +71,10 @@ Scope {
function search() {
root.search()
}
function ocr() {
root.ocr()
}
}
GlobalShortcut {
@@ -77,4 +87,9 @@ Scope {
description: "Searches the selected region"
onPressed: root.search()
}
GlobalShortcut {
name: "regionOcr"
description: "Recognizes text in the selected region"
onPressed: root.ocr()
}
}
@@ -8,8 +8,10 @@ import Quickshell.Widgets
Rectangle {
id: root
required property color colBackground
required property color colForeground
required property var clientDimensions
property color colBackground: Qt.alpha("#88111111", 0.9)
property color colForeground: "#ddffffff"
property bool showLabel: Config.options.regionSelector.targetRegions.showLabel
property bool showIcon: false
property bool targeted: false
@@ -20,9 +22,18 @@ Rectangle {
z: 2
color: fillColor
border.color: borderColor
border.width: targeted ? 3 : 1
border.width: targeted ? 4 : 2
radius: 4
visible: opacity > 0
Behavior on opacity {
animation: Appearance.animation.elementMoveFast.numberAnimation.createObject(this)
}
x: clientDimensions.at[0]
y: clientDimensions.at[1]
width: clientDimensions.size[0]
height: clientDimensions.size[1]
Loader {
anchors {
top: parent.top