From 3bd699c9e6176b0298f6b91b3cacc91d049df9cf Mon Sep 17 00:00:00 2001 From: end-4 <97237370+end-4@users.noreply.github.com> Date: Fri, 24 Oct 2025 00:26:47 +0200 Subject: [PATCH] use quickshell region selector for ocr --- dots/.config/hypr/hyprland/keybinds.conf | 5 +- .../common/widgets/ConfigSelectionArray.qml | 2 +- .../modules/regionSelector/OptionsToolbar.qml | 70 ++++++ .../regionSelector/RegionFunctions.qml | 76 ++++++ .../regionSelector/RegionSelection.qml | 238 ++++++------------ .../modules/regionSelector/RegionSelector.qml | 15 ++ .../modules/regionSelector/TargetRegion.qml | 17 +- 7 files changed, 259 insertions(+), 164 deletions(-) create mode 100644 dots/.config/quickshell/ii/modules/regionSelector/OptionsToolbar.qml create mode 100644 dots/.config/quickshell/ii/modules/regionSelector/RegionFunctions.qml diff --git a/dots/.config/hypr/hyprland/keybinds.conf b/dots/.config/hypr/hyprland/keybinds.conf index 46d542399..50ec1e7f0 100644 --- a/dots/.config/hypr/hyprland/keybinds.conf +++ b/dots/.config/hypr/hyprland/keybinds.conf @@ -63,7 +63,10 @@ bind = Super+Shift, S, exec, qs -c $qsConfig ipc call TEST_ALIVE || pidof slurp bind = Super+Shift, A, global, quickshell:regionSearch # Google Lens bind = Super+Shift, A, exec, qs -c $qsConfig ipc call TEST_ALIVE || pidof slurp || ~/.config/hypr/hyprland/scripts/snip_to_search.sh # [hidden] Google Lens (fallback) # OCR -bind = Super+Shift, T,exec,grim -g "$(slurp $SLURP_ARGS)" "/tmp/ocr_image.png" && tesseract "/tmp/ocr_image.png" - | wl-copy && rm "/tmp/ocr_image.png" # [hidden] +bind = Super+Shift, X, global, quickshell:regionOcr # Character recognition >> clipboard +bind = Super+Shift, T, global, quickshell:regionOcr # [hidden] +bind = Super+Shift, X,exec, qs -c $qsConfig ipc call TEST_ALIVE || pidof slurp || grim -g "$(slurp $SLURP_ARGS)" "/tmp/ocr_image.png" && tesseract "/tmp/ocr_image.png" - | wl-copy && rm "/tmp/ocr_image.png" # [hidden] +bind = Super+Shift, T,exec, qs -c $qsConfig ipc call TEST_ALIVE || pidof slurp || grim -g "$(slurp $SLURP_ARGS)" "/tmp/ocr_image.png" && tesseract "/tmp/ocr_image.png" - | wl-copy && rm "/tmp/ocr_image.png" # [hidden] # Color picker bindd = Super+Shift, C, Color picker, exec, hyprpicker -a # Pick color (Hex) >> clipboard # Fullscreen screenshot diff --git a/dots/.config/quickshell/ii/modules/common/widgets/ConfigSelectionArray.qml b/dots/.config/quickshell/ii/modules/common/widgets/ConfigSelectionArray.qml index 943fadf2a..dd102070c 100644 --- a/dots/.config/quickshell/ii/modules/common/widgets/ConfigSelectionArray.qml +++ b/dots/.config/quickshell/ii/modules/common/widgets/ConfigSelectionArray.qml @@ -45,7 +45,7 @@ Flow { rightmost: index === root.options.length - 1 buttonIcon: modelData.icon || "" buttonText: modelData.displayName - toggled: root.currentValue === modelData.value + toggled: root.currentValue == modelData.value onClicked: { root.selected(modelData.value); } diff --git a/dots/.config/quickshell/ii/modules/regionSelector/OptionsToolbar.qml b/dots/.config/quickshell/ii/modules/regionSelector/OptionsToolbar.qml new file mode 100644 index 000000000..813c12013 --- /dev/null +++ b/dots/.config/quickshell/ii/modules/regionSelector/OptionsToolbar.qml @@ -0,0 +1,70 @@ +pragma ComponentBehavior: Bound +import qs +import qs.modules.common +import qs.modules.common.functions +import qs.modules.common.widgets +import qs.services +import QtQuick +import QtQuick.Controls +import QtQuick.Layouts +import Qt5Compat.GraphicalEffects +import Quickshell +import Quickshell.Io +import Quickshell.Wayland +import Quickshell.Hyprland + +// Options toolbar +Toolbar { + id: root + + // Use a synchronizer on these + property var action + property var selectionMode + + MaterialCookie { + Layout.fillHeight: true + Layout.leftMargin: 2 + Layout.rightMargin: 2 + implicitSize: 36 // Intentionally smaller because this one is brighter than others + sides: 10 + amplitude: implicitSize / 44 + color: Appearance.colors.colPrimary + MaterialSymbol { + anchors.centerIn: parent + iconSize: 22 + color: Appearance.colors.colOnPrimary + animateChange: true + text: switch (root.action) { + case RegionSelection.SnipAction.Copy: + case RegionSelection.SnipAction.Edit: + return "content_cut"; + case RegionSelection.SnipAction.Search: + return "image_search"; + case RegionSelection.SnipAction.CharRecognition: + return "document_scanner"; + default: + return ""; + } + } + } + + IconAndTextToolbarButton { + iconText: "activity_zone" + text: Translation.tr("Rect") + toggled: root.selectionMode === RegionSelection.SelectionMode.RectCorners + onClicked: root.selectionMode = RegionSelection.SelectionMode.RectCorners + } + + IconAndTextToolbarButton { + iconText: "gesture" + text: Translation.tr("Circle") + toggled: root.selectionMode === RegionSelection.SelectionMode.Circle + onClicked: root.selectionMode = RegionSelection.SelectionMode.Circle + } + + IconToolbarButton { + text: "close" + colBackground: Appearance.colors.colLayer3 + onClicked: root.dismiss(); + } +} diff --git a/dots/.config/quickshell/ii/modules/regionSelector/RegionFunctions.qml b/dots/.config/quickshell/ii/modules/regionSelector/RegionFunctions.qml new file mode 100644 index 000000000..ee1805d56 --- /dev/null +++ b/dots/.config/quickshell/ii/modules/regionSelector/RegionFunctions.qml @@ -0,0 +1,76 @@ +pragma Singleton +import Quickshell + +Singleton { + id: root + + function intersectionOverUnion(regionA, regionB) { + // region: { at: [x, y], size: [w, h] } + const ax1 = regionA.at[0], ay1 = regionA.at[1]; + const ax2 = ax1 + regionA.size[0], ay2 = ay1 + regionA.size[1]; + const bx1 = regionB.at[0], by1 = regionB.at[1]; + const bx2 = bx1 + regionB.size[0], by2 = by1 + regionB.size[1]; + + const interX1 = Math.max(ax1, bx1); + const interY1 = Math.max(ay1, by1); + const interX2 = Math.min(ax2, bx2); + const interY2 = Math.min(ay2, by2); + + const interArea = Math.max(0, interX2 - interX1) * Math.max(0, interY2 - interY1); + const areaA = (ax2 - ax1) * (ay2 - ay1); + const areaB = (bx2 - bx1) * (by2 - by1); + const unionArea = areaA + areaB - interArea; + + return unionArea > 0 ? interArea / unionArea : 0; + } + + function filterOverlappingImageRegions(regions) { + let keep = []; + let removed = new Set(); + for (let i = 0; i < regions.length; ++i) { + if (removed.has(i)) continue; + let regionA = regions[i]; + for (let j = i + 1; j < regions.length; ++j) { + if (removed.has(j)) continue; + let regionB = regions[j]; + if (intersectionOverUnion(regionA, regionB) > 0) { + // Compare areas + let areaA = regionA.size[0] * regionA.size[1]; + let areaB = regionB.size[0] * regionB.size[1]; + if (areaA <= areaB) { + removed.add(j); + } else { + removed.add(i); + } + } + } + } + for (let i = 0; i < regions.length; ++i) { + if (!removed.has(i)) keep.push(regions[i]); + } + return keep; + } + + function filterWindowRegionsByLayers(windowRegions, layerRegions) { + return windowRegions.filter(windowRegion => { + for (let i = 0; i < layerRegions.length; ++i) { + if (intersectionOverUnion(windowRegion, layerRegions[i]) > 0) + return false; + } + return true; + }); + } + + function filterImageRegions(regions, windowRegions, threshold = 0.1) { + // Remove image regions that overlap too much with any window region + let filtered = regions.filter(region => { + for (let i = 0; i < windowRegions.length; ++i) { + if (intersectionOverUnion(region, windowRegions[i]) > threshold) + return false; + } + return true; + }); + // Remove overlapping image regions, keep only the smaller one + return filterOverlappingImageRegions(filtered); + } +} diff --git a/dots/.config/quickshell/ii/modules/regionSelector/RegionSelection.qml b/dots/.config/quickshell/ii/modules/regionSelector/RegionSelection.qml index 00df73139..5995490a2 100644 --- a/dots/.config/quickshell/ii/modules/regionSelector/RegionSelection.qml +++ b/dots/.config/quickshell/ii/modules/regionSelector/RegionSelection.qml @@ -1,18 +1,15 @@ pragma ComponentBehavior: Bound -import qs import qs.modules.common import qs.modules.common.functions import qs.modules.common.widgets import qs.services import QtQuick import QtQuick.Controls -import QtQuick.Layouts -import Qt5Compat.GraphicalEffects import Quickshell import Quickshell.Io import Quickshell.Wayland -import Quickshell.Widgets import Quickshell.Hyprland +import Qt.labs.synchronizer PanelWindow { id: root @@ -29,7 +26,7 @@ PanelWindow { } // TODO: Ask: sidebar AI; Ocr: tesseract - enum SnipAction { Copy, Edit, Search } + enum SnipAction { Copy, Edit, Search, CharRecognition } enum SelectionMode { RectCorners, Circle } property var action: RegionSelection.SnipAction.Copy property var selectionMode: RegionSelection.SelectionMode.RectCorners @@ -39,8 +36,6 @@ PanelWindow { property string imageSearchEngineBaseUrl: Config.options.search.imageSearch.imageSearchEngineBaseUrl property string fileUploadApiEndpoint: "https://uguu.se/upload" property color overlayColor: "#88111111" - property color genericContentColor: Qt.alpha(root.overlayColor, 0.9) - property color genericContentForeground: "#ddffffff" property color brightText: Appearance.m3colors.darkmode ? Appearance.colors.colOnLayer0 : Appearance.colors.colLayer0 property color brightSecondary: Appearance.m3colors.darkmode ? Appearance.colors.colSecondary : Appearance.colors.colOnSecondary property color brightTertiary: Appearance.m3colors.darkmode ? Appearance.colors.colTertiary : Qt.lighter(Appearance.colors.colPrimary) @@ -76,7 +71,7 @@ PanelWindow { property list points: [] property var mouseButton: null property var imageRegions: [] - readonly property list windowRegions: filterWindowRegionsByLayers( + readonly property list windowRegions: RegionFunctions.filterWindowRegionsByLayers( root.windows.filter(w => w.workspace.id === root.activeWorkspaceId), root.layerRegions ).map(window => { @@ -109,6 +104,7 @@ PanelWindow { }); return offsetAdjustedLayers; } + property list textRegions: [] property bool isCircleSelection: (root.selectionMode === RegionSelection.SelectionMode.Circle) property bool enableWindowRegions: Config.options.regionSelector.targetRegions.windows && !isCircleSelection @@ -131,76 +127,6 @@ PanelWindow { root.regionHeight = root.targetedRegionHeight; } - function intersectionOverUnion(regionA, regionB) { - // region: { at: [x, y], size: [w, h] } - const ax1 = regionA.at[0], ay1 = regionA.at[1]; - const ax2 = ax1 + regionA.size[0], ay2 = ay1 + regionA.size[1]; - const bx1 = regionB.at[0], by1 = regionB.at[1]; - const bx2 = bx1 + regionB.size[0], by2 = by1 + regionB.size[1]; - - const interX1 = Math.max(ax1, bx1); - const interY1 = Math.max(ay1, by1); - const interX2 = Math.min(ax2, bx2); - const interY2 = Math.min(ay2, by2); - - const interArea = Math.max(0, interX2 - interX1) * Math.max(0, interY2 - interY1); - const areaA = (ax2 - ax1) * (ay2 - ay1); - const areaB = (bx2 - bx1) * (by2 - by1); - const unionArea = areaA + areaB - interArea; - - return unionArea > 0 ? interArea / unionArea : 0; - } - - function filterOverlappingImageRegions(regions) { - let keep = []; - let removed = new Set(); - for (let i = 0; i < regions.length; ++i) { - if (removed.has(i)) continue; - let regionA = regions[i]; - for (let j = i + 1; j < regions.length; ++j) { - if (removed.has(j)) continue; - let regionB = regions[j]; - if (intersectionOverUnion(regionA, regionB) > 0) { - // Compare areas - let areaA = regionA.size[0] * regionA.size[1]; - let areaB = regionB.size[0] * regionB.size[1]; - if (areaA <= areaB) { - removed.add(j); - } else { - removed.add(i); - } - } - } - } - for (let i = 0; i < regions.length; ++i) { - if (!removed.has(i)) keep.push(regions[i]); - } - return keep; - } - - function filterWindowRegionsByLayers(windowRegions, layerRegions) { - return windowRegions.filter(windowRegion => { - for (let i = 0; i < layerRegions.length; ++i) { - if (intersectionOverUnion(windowRegion, layerRegions[i]) > 0) - return false; - } - return true; - }); - } - - function filterImageRegions(regions, windowRegions, threshold = 0.1) { - // Remove image regions that overlap too much with any window region - let filtered = regions.filter(region => { - for (let i = 0; i < windowRegions.length; ++i) { - if (intersectionOverUnion(region, windowRegions[i]) > threshold) - return false; - } - return true; - }); - // Remove overlapping image regions, keep only the smaller one - return filterOverlappingImageRegions(filtered); - } - function updateTargetedRegion(x, y) { // Image regions const clickedRegion = root.imageRegions.find(region => { @@ -255,7 +181,8 @@ PanelWindow { command: ["bash", "-c", `mkdir -p '${StringUtils.shellSingleQuoteEscape(root.screenshotDir)}' && grim -o '${StringUtils.shellSingleQuoteEscape(root.screen.name)}' '${StringUtils.shellSingleQuoteEscape(root.screenshotPath)}'`] onExited: (exitCode, exitStatus) => { root.visible = true; - imageDetectionProcess.running = true; + if (root.enableContentRegions) imageDetectionProcess.running = true; + // if (root.action === RegionSelection.SnipAction.CharRecognition) ocrProc.running = true; } } @@ -269,7 +196,7 @@ PanelWindow { stdout: StdioCollector { id: imageDimensionCollector onStreamFinished: { - imageRegions = filterImageRegions( + imageRegions = RegionFunctions.filterImageRegions( JSON.parse(imageDimensionCollector.text), root.windowRegions ); @@ -277,6 +204,41 @@ PanelWindow { } } + Process { + id: ocrProc + command: ["bash", "-c", `tesseract '${StringUtils.shellSingleQuoteEscape(root.screenshotPath)}' stdout tsv 2>/dev/null`] + stdout: StdioCollector { + id: outputCollector + onStreamFinished: { + // level page_num block_num par_num line_num word_num left top width height conf text + const output = outputCollector.text + const lines = output.split("\n").slice(1) // Skip header + const filteredLines = lines.filter(line => (!line.trim().endsWith("-1"))) + let regions = filteredLines.map(line => { + const parts = line.split("\t") + return ({ + "block_num": parseInt(parts[2]), + "line_num": parseInt(parts[4]), + "word_num": parseInt(parts[5]), + "left": parseInt(parts[6]), + "top": parseInt(parts[7]), + "width": parseInt(parts[8]), + "height": parseInt(parts[9]), + "conf": parseInt(parts[10]), + "text": parts.slice(11).join("\t") + }) + }).filter(region => { + if (region === null) return false; + // if (region.text.length <= 3 && region.text.replace(/[^a-zA-Z0-9]/g, "").length < region.text.length / 2) return false; + // if (region.text.length < 2) return false; + return true; + }) + // print(`[Region Selector] OCR Regions: ${JSON.stringify(regions, null, 2)}`) + root.textRegions = regions; + } + } + } + function snip() { // Validity check if (root.regionWidth <= 0 || root.regionHeight <= 0) { @@ -314,6 +276,9 @@ PanelWindow { case RegionSelection.SnipAction.Search: snipProc.command = ["bash", "-c", `${cropInPlace} && xdg-open "${root.imageSearchEngineBaseUrl}$(${uploadAndGetUrl(root.screenshotPath)})" && ${cleanup}`] break; + case RegionSelection.SnipAction.CharRecognition: + snipProc.command = ["bash", "-c", `${cropInPlace} && tesseract '${StringUtils.shellSingleQuoteEscape(root.screenshotPath)}' - | wl-copy && ${cleanup}`] + break; default: console.warn("[Region Selector] Unknown snip action, skipping snip."); root.dismiss(); @@ -432,6 +397,7 @@ PanelWindow { delegate: TargetRegion { z: 2 required property var modelData + clientDimensions: modelData showIcon: true targeted: !root.draggedAway && (root.targetedRegionX === modelData.at[0] @@ -439,21 +405,9 @@ PanelWindow { && root.targetedRegionWidth === modelData.size[0] && root.targetedRegionHeight === modelData.size[1]) - colBackground: root.genericContentColor - colForeground: root.genericContentForeground opacity: root.draggedAway ? 0 : root.targetRegionOpacity - visible: opacity > 0 - Behavior on opacity { - animation: Appearance.animation.elementMoveFast.numberAnimation.createObject(this) - } - - x: modelData.at[0] - y: modelData.at[1] - width: modelData.size[0] - height: modelData.size[1] borderColor: root.windowBorderColor fillColor: targeted ? root.windowFillColor : "transparent" - border.width: targeted ? 4 : 2 text: `${modelData.class}` radius: Appearance.rounding.windowRounding } @@ -467,27 +421,16 @@ PanelWindow { delegate: TargetRegion { z: 3 required property var modelData + clientDimensions: modelData targeted: !root.draggedAway && (root.targetedRegionX === modelData.at[0] && root.targetedRegionY === modelData.at[1] && root.targetedRegionWidth === modelData.size[0] && root.targetedRegionHeight === modelData.size[1]) - colBackground: root.genericContentColor - colForeground: root.genericContentForeground opacity: root.draggedAway ? 0 : root.targetRegionOpacity - visible: opacity > 0 - Behavior on opacity { - animation: Appearance.animation.elementMoveFast.numberAnimation.createObject(this) - } - - x: modelData.at[0] - y: modelData.at[1] - width: modelData.size[0] - height: modelData.size[1] borderColor: root.windowBorderColor fillColor: targeted ? root.windowFillColor : "transparent" - border.width: targeted ? 4 : 2 text: `${modelData.namespace}` radius: Appearance.rounding.windowRounding } @@ -501,33 +444,48 @@ PanelWindow { delegate: TargetRegion { z: 4 required property var modelData + clientDimensions: modelData targeted: !root.draggedAway && (root.targetedRegionX === modelData.at[0] && root.targetedRegionY === modelData.at[1] && root.targetedRegionWidth === modelData.size[0] && root.targetedRegionHeight === modelData.size[1]) - colBackground: root.genericContentColor - colForeground: root.genericContentForeground opacity: root.draggedAway ? 0 : root.contentRegionOpacity - visible: opacity > 0 - Behavior on opacity { - animation: Appearance.animation.elementMoveFast.numberAnimation.createObject(this) - } - - x: modelData.at[0] - y: modelData.at[1] - width: modelData.size[0] - height: modelData.size[1] borderColor: root.imageBorderColor fillColor: targeted ? root.imageFillColor : "transparent" - border.width: targeted ? 4 : 2 text: Translation.tr("Content region") } } + // OCR text regions + // Repeater { + // model: ScriptModel { + // values: root.textRegions + // } + // delegate: Rectangle { + // id: textRegionItem + // z: 5 + // required property var modelData + // property real padding: 4 + // color: ColorUtils.transparentize(Appearance.colors.colTooltip, 0.3) + // radius: 6 + // x: modelData.left - padding + // y: modelData.top - padding + // width: modelData.width + padding + // height: modelData.height + padding + + // StyledText { + // font.pixelSize: Appearance.font.pixelSize.smallie + // anchors.centerIn: parent + // text: textRegionItem.modelData.text + // color: ColorUtils.transparentize(Appearance.colors.colOnTooltip, 0.2) + // } + // } + // } + // Options toolbar - Toolbar { + OptionsToolbar { id: toolbar z: 9999 anchors { @@ -551,49 +509,11 @@ PanelWindow { animation: Appearance.animation.elementMove.numberAnimation.createObject(this) } - MaterialCookie { - Layout.fillHeight: true - Layout.leftMargin: 2 - Layout.rightMargin: 2 - implicitSize: 36 // Intentionally smaller because this one is brighter than others - sides: 10 - amplitude: implicitSize / 44 - color: Appearance.colors.colPrimary - MaterialSymbol { - anchors.centerIn: parent - iconSize: 22 - color: Appearance.colors.colOnPrimary - animateChange: true - text: switch (root.action) { - case RegionSelection.SnipAction.Copy: - case RegionSelection.SnipAction.Edit: - return "content_cut"; - case RegionSelection.SnipAction.Search: - return "image_search"; - default: - return ""; - } - } + Synchronizer on action { + property alias source: root.action } - - IconAndTextToolbarButton { - iconText: "activity_zone" - text: Translation.tr("Rect") - toggled: root.selectionMode === RegionSelection.SelectionMode.RectCorners - onClicked: root.selectionMode = RegionSelection.SelectionMode.RectCorners - } - - IconAndTextToolbarButton { - iconText: "gesture" - text: Translation.tr("Circle") - toggled: root.selectionMode === RegionSelection.SelectionMode.Circle - onClicked: root.selectionMode = RegionSelection.SelectionMode.Circle - } - - IconToolbarButton { - text: "close" - colBackground: Appearance.colors.colLayer3 - onClicked: root.dismiss(); + Synchronizer on selectionMode { + property alias source: root.selectionMode } } } diff --git a/dots/.config/quickshell/ii/modules/regionSelector/RegionSelector.qml b/dots/.config/quickshell/ii/modules/regionSelector/RegionSelector.qml index 7cfd37349..0ccb26eb0 100644 --- a/dots/.config/quickshell/ii/modules/regionSelector/RegionSelector.qml +++ b/dots/.config/quickshell/ii/modules/regionSelector/RegionSelector.qml @@ -56,6 +56,12 @@ Scope { GlobalStates.regionSelectorOpen = true } + function ocr() { + root.action = RegionSelection.SnipAction.CharRecognition + root.selectionMode = RegionSelection.SelectionMode.RectCorners + GlobalStates.regionSelectorOpen = true + } + IpcHandler { target: "region" @@ -65,6 +71,10 @@ Scope { function search() { root.search() } + + function ocr() { + root.ocr() + } } GlobalShortcut { @@ -77,4 +87,9 @@ Scope { description: "Searches the selected region" onPressed: root.search() } + GlobalShortcut { + name: "regionOcr" + description: "Recognizes text in the selected region" + onPressed: root.ocr() + } } diff --git a/dots/.config/quickshell/ii/modules/regionSelector/TargetRegion.qml b/dots/.config/quickshell/ii/modules/regionSelector/TargetRegion.qml index f1043d13b..a1ecbcd0f 100644 --- a/dots/.config/quickshell/ii/modules/regionSelector/TargetRegion.qml +++ b/dots/.config/quickshell/ii/modules/regionSelector/TargetRegion.qml @@ -8,8 +8,10 @@ import Quickshell.Widgets Rectangle { id: root - required property color colBackground - required property color colForeground + required property var clientDimensions + + property color colBackground: Qt.alpha("#88111111", 0.9) + property color colForeground: "#ddffffff" property bool showLabel: Config.options.regionSelector.targetRegions.showLabel property bool showIcon: false property bool targeted: false @@ -20,9 +22,18 @@ Rectangle { z: 2 color: fillColor border.color: borderColor - border.width: targeted ? 3 : 1 + border.width: targeted ? 4 : 2 radius: 4 + visible: opacity > 0 + Behavior on opacity { + animation: Appearance.animation.elementMoveFast.numberAnimation.createObject(this) + } + x: clientDimensions.at[0] + y: clientDimensions.at[1] + width: clientDimensions.size[0] + height: clientDimensions.size[1] + Loader { anchors { top: parent.top