HyperAI

Interactive visualizations are invaluable for computer vision engineers who frequently need to iterate on image processing tasks and require immediate visual feedback. This article explores how to build a modern GUI application for computer vision using Python, focusing on OpenCV for basic interaction and CustomTkinter for a more advanced and visually appealing interface. Prerequisites To follow along, ensure your local environment is set up with the necessary packages: sh uv add numpy opencv-Python pillow customtkinter Goal The aim is to create an application that uses a webcam feed and allows users to apply various filters in real-time. The filtered images will be displayed in a window, complete with a user-friendly interface. Starting with OpenCV Basic GUI with Keyboard Input First, we'll implement a basic OpenCV window to display the webcam feed and add keyboard inputs to switch between different filters. ```python import cv2 cap = cv2.VideoCapture(0) while True: ret, frame = cap.read() if not ret: break filter_type = "normal" if filter_type == "grayscale": frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) elif filter_type == "normal": pass # Add a border and filter name border_height = 50 border_color = (0, 0, 0) frame = cv2.copyMakeBorder(frame, 0, border_height, 0, 0, cv2.BORDER_CONSTANT, value=border_color) cv2.putText(frame, filter_type, (frame.shape[1] // 2 - 50, frame.shape[0] - border_height // 2 + 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA) cv2.imshow("Video Feed", frame) key = cv2.waitKey(1) & 0xFF if key == ord('q'): break elif key == ord('1'): filter_type = "normal" elif key == ord('2'): filter_type = "grayscale" cap.release() cv2.destroyAllWindows() ``` This script fetches frames from the webcam, applies the selected filter (default is "normal"), adds a border with the filter name, and displays the result. Press '1' to switch to normal mode, '2' to switch to grayscale mode, and 'q' to quit. Adding Sliders OpenCV also offers basic UI elements like sliders (trackbars). We can use these to select different filters dynamically. ```python import cv2 cap = cv2.VideoCapture(0) cv2.namedWindow("Webcam Stream") filter_types = ["normal", "grayscale", "blur", "threshold", "canny", "sobel", "laplacian"] cv2.createTrackbar("Filter", "Webcam Stream", 0, len(filter_types) - 1, lambda _: None) def process_frame(frame, filter_type): if filter_type == "grayscale": frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) elif filter_type == "blur": frame = cv2.GaussianBlur(frame, ksize=(15, 15), sigmaX=0) elif filter_type == "threshold": gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) _, frame = cv2.threshold(gray, thresh=127, maxval=255, type=cv2.THRESH_BINARY) elif filter_type == "canny": frame = cv2.Canny(frame, threshold1=100, threshold2=200) elif filter_type == "sobel": frame = cv2.Sobel(frame, ddepth=cv2.CV_64F, dx=1, dy=0, ksize=5) elif filter_type == "laplacian": frame = cv2.Laplacian(frame, ddepth=cv2.CV_64F) elif filter_type == "normal": pass if len(frame.shape) == 2: # Convert grayscale to BGR frame = cv2.cvtColor(frame, cv2.COLOR_GRAY2BGR) if frame.dtype != np.uint8: # Scale the frame to uint8 if necessary cv2.normalize(frame, frame, 0, 255, cv2.NORM_MINMAX) frame = frame.astype(np.uint8) return frame while True: ret, frame = cap.read() if not ret: break filter_id = cv2.getTrackbarPos("Filter", "Webcam Stream") filter_type = filter_types[filter_id] frame = process_frame(frame, filter_type) # Add a border and filter name border_height = 50 border_color = (0, 0, 0) frame = cv2.copyMakeBorder(frame, 0, border_height, 0, 0, cv2.BORDER_CONSTANT, value=border_color) cv2.putText(frame, filter_type, (frame.shape[1] // 2 - 50, frame.shape[0] - border_height // 2 + 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA) cv2.imshow("Webcam Stream", frame) key = cv2.waitKey(1) & 0xFF if key == ord('q'): break cap.release() cv2.destroyAllWindows() ``` Modern GUI with CustomTkinter Basic Layout CustomTkinter provides a more modern and visually appealing interface compared to OpenCV. We start by creating a class for our application and setting up two frames: one for the filter selection and another for the image display. ```python import customtkinter class App(customtkinter.CTk): def init(self) -> None: super().init() self.title("Webcam Stream") self.geometry("800x600") self.filter_var = customtkinter.IntVar(value=0) # Frame for filters self.filters_frame = customtkinter.CTkFrame(self) self.filters_frame.pack(side="left", fill="both", expand=False, padx=10, pady=10) # Frame for image display self.image_frame = customtkinter.CTkFrame(self) self.image_frame.pack(side="right", fill="both", expand=True, padx=10, pady=10) self.image_display = customtkinter.CTkLabel(self.image_frame, text="Loading...") self.image_display.pack(fill="both", expand=True, padx=10, pady=10) app = App() app.mainloop() ``` Filter Radio Buttons Next, we add radio buttons to select the filter types. ```python class App(customtkinter.CTk): def init(self) -> None: super().init() self.title("Webcam Stream") self.geometry("800x600") self.filter_var = customtkinter.IntVar(value=0) # Frame for filters self.filters_frame = customtkinter.CTkFrame(self) self.filters_frame.pack(side="left", fill="both", expand=False, padx=10, pady=10) filter_types = ["normal", "grayscale", "blur", "threshold", "canny", "sobel", "laplacian"] for filter_id, filter_type in enumerate(filter_types): rb_filter = customtkinter.CTkRadioButton( self.filters_frame, text=filter_type.capitalize(), variable=self.filter_var, value=filter_id, ) rb_filter.pack(padx=10, pady=10) if filter_id == 0: rb_filter.select() # Frame for image display self.image_frame = customtkinter.CTkFrame(self) self.image_frame.pack(side="right", fill="both", expand=True, padx=10, pady=10) self.image_display = customtkinter.CTkLabel(self.image_frame, text="Loading...") self.image_display.pack(fill="both", expand=True, padx=10, pady=10) ``` Image Display Component We create a custom CTkImageDisplay class to handle the display of OpenCV frames. ```python class CTkImageDisplay(customtkinter.CTkLabel): def init(self, master) -> None: self._textvariable = customtkinter.StringVar(master, "Loading...") super().init(master, textvariable=self._textvariable, image=None) def set_frame(self, frame: npt.NDArray) -> None: target_width, target_height = frame.shape[1], frame.shape[0] frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frame_pil = Image.fromarray(frame_rgb, "RGB") ctk_image = customtkinter.CTkImage(light_image=frame_pil, dark_image=frame_pil, size=(target_width, target_height)) self.configure(image=ctk_image, text="") self._textvariable.set("") ``` Integrate this class into our main application: ```python class App(customtkinter.CTk): def init(self) -> None: super().init() self.title("Webcam Stream") self.geometry("800x600") self.filter_var = customtkinter.IntVar(value=0) # Frame for filters self.filters_frame = customtkinter.CTkFrame(self) self.filters_frame.pack(side="left", fill="both", expand=False, padx=10, pady=10) filter_types = ["normal", "grayscale", "blur", "threshold", "canny", "sobel", "laplacian"] for filter_id, filter_type in enumerate(filter_types): rb_filter = customtkinter.CTkRadioButton( self.filters_frame, text=filter_type.capitalize(), variable=self.filter_var, value=filter_id, ) rb_filter.pack(padx=10, pady=10) if filter_id == 0: rb_filter.select() # Frame for image display self.image_frame = customtkinter.CTkFrame(self) self.image_frame.pack(side="right", fill="both", expand=True, padx=10, pady=10) self.image_display = CTkImageDisplay(self.image_frame) self.image_display.pack(fill="both", expand=True, padx=10, pady=10) self.cap = cv2.VideoCapture(0) self.after(10, self.update_frame) def update_frame(self) -> None: ret, frame = self.cap.read() if not ret: return filter_id = self.filter_var.get() filter_type = filter_types[filter_id] if filter_type == "grayscale": frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) elif filter_type == "blur": frame = cv2.GaussianBlur(frame, ksize=(15, 15), sigmaX=0) elif filter_type == "threshold": gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) _, frame = cv2.threshold(gray, thresh=127, maxval=255, type=cv2.THRESH_BINARY) elif filter_type == "canny": frame = cv2.Canny(frame, threshold1=100, threshold2=200) elif filter_type == "sobel": frame = cv2.Sobel(frame, ddepth=cv2.CV_64F, dx=1, dy=0, ksize=5) elif filter_type == "laplacian": frame = cv2.Laplacian(frame, ddepth=cv2.CV_64F) elif filter_type == "normal": pass if frame.dtype != np.uint8: cv2.normalize(frame, frame, 0, 255, cv2.NORM_MINMAX) frame = frame.astype(np.uint8) if len(frame.shape) == 2: # Convert grayscale to BGR frame = cv2.cvtColor(frame, cv2.COLOR_GRAY2BGR) self.image_display.set_frame(frame) self.after(10, self.update_frame) app = App() app.mainloop() ``` Multithreading and Synchronization Running everything in the main thread can lead to performance issues and an unresponsive UI, especially with heavier image processing tasks. We address this by separating the image processing into a dedicated thread and synchronizing it with the UI using a single-slot queue. ```python import threading import queue import cv2 import customtkinter from PIL import Image import numpy.typing as npt class CTkImageDisplay(customtkinter.CTkLabel): def init(self, master) -> None: self._textvariable = customtkinter.StringVar(master, "Loading...") super().init(master, textvariable=self._textvariable, image=None) def set_frame(self, frame: npt.NDArray) -> None: target_width, target_height = frame.shape[1], frame.shape[0] frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frame_pil = Image.fromarray(frame_rgb, "RGB") ctk_image = customtkinter.CTkImage(light_image=frame_pil, dark_image=frame_pil, size=(target_width, target_height)) self.configure(image=ctk_image, text="") self._textvariable.set("") class App(customtkinter.CTk): def init(self) -> None: super().init() self.title("Webcam Stream") self.geometry("800x600") self.filter_var = customtkinter.IntVar(value=0) self.queue = queue.Queue(maxsize=1) # Frame for filters self.filters_frame = customtkinter.CTkFrame(self) self.filters_frame.pack(side="left", fill="both", expand=False, padx=10, pady=10) filter_types = ["normal", "grayscale", "blur", "threshold", "canny", "sobel", "laplacian"] for filter_id, filter_type in enumerate(filter_types): rb_filter = customtkinter.CTkRadioButton( self.filters_frame, text=filter_type.capitalize(), variable=self.filter_var, value=filter_id, ) rb_filter.pack(padx=10, pady=10) if filter_id == 0: rb_filter.select() # Frame for image display self.image_frame = customtkinter.CTkFrame(self) self.image_frame.pack(side="right", fill="both", expand=True, padx=10, pady=10) self.image_display = CTkImageDisplay(self.image_frame) self.image_display.pack(fill="both", expand=True, padx=10, pady=10) self.webcam_thread = threading.Thread(target=self.run_webcam_loop, daemon=True) self.webcam_thread.start() self.frame_loop_dt_ms = 16 # ~60 FPS self.after(self.frame_loop_dt_ms, self._update_frame) def _update_frame(self) -> None: try: frame = self.queue.get_nowait() self.image_display.set_frame(frame) except queue.Empty: pass self.after(self.frame_loop_dt_ms, self._update_frame) def run_webcam_loop(self) -> None: self.cap = cv2.VideoCapture(0) if not self.cap.isOpened(): return while True: ret, frame = self.cap.read() if not ret: break filter_id = self.filter_var.get() filter_type = filter_types[filter_id] if filter_type == "grayscale": frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) elif filter_type == "blur": frame = cv2.GaussianBlur(frame, ksize=(15, 15), sigmaX=0) elif filter_type == "threshold": gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) _, frame = cv2.threshold(gray, thresh=127, maxval=255, type=cv2.THRESH_BINARY) elif filter_type == "canny": frame = cv2.Canny(frame, threshold1=100, threshold2=200) elif filter_type == "sobel": frame = cv2.Sobel(frame, ddepth=cv2.CV_64F, dx=1, dy=0, ksize=5) elif filter_type == "laplacian": frame = cv2.Laplacian(frame, ddepth=cv2.CV_64F) elif filter_type == "normal": pass if frame.dtype != np.uint8: cv2.normalize(frame, frame, 0, 255, cv2.NORM_MINMAX) frame = frame.astype(np.uint8) if len(frame.shape) == 2: # Convert grayscale to BGR frame = cv2.cvtColor(frame, cv2.COLOR_GRAY2BGR) self.queue.put(frame) app = App() app.mainloop() ``` This solution ensures that the UI remains responsive, as the image processing and GUI updates are handled in separate threads. The queue.Queue provides thread-safe communication between these threads. Conclusion By combining a modern UI framework like CustomTkinter with OpenCV, we can create sleek and interactive GUI applications for computer vision tasks. Running image processing in a separate thread and using a single-slot queue for synchronization enhances the user experience and avoids blocking the main GUI thread. For further exploration and a modular version of this demo, refer to the GitHub repository below. Happy coding! Check out the full source code in the GitHub repo: https://github.com/trflorian/ctk-opencv Industry experts praise this approach for its robust and responsive design, noting that it effectively addresses the common challenge of keeping the UI smooth while handling computationally intensive tasks. Scale.ai, a leader in data labeling for AI models, has also recognized the importance of interactive tools in AI development, highlighting the growing trend of integrating real-time feedback mechanisms into AI pipelines.

Building Modern Interactive GUIs for Computer Vision Projects in Python with CustomTkinter

Related Links