Как обнаружить тело по локальному видео?
Моя идея: получить каждый кадр местного видео. Затем конвертируйте кадр в CGImage
guard let cgImage = self.imageFromFrame(sampleBuffer) else{return}
Затем используйте humanBodyPoseRequest для обнаружения тела из CGImage
let imageRequestHandler = VNImageRequestHandler(cgImage: cgImage)
// Use Vision to find human body poses in the frame.
do { try imageRequestHandler.perform([self.humanBodyPoseRequest]) } catch {
assertionFailure("Human Pose Request failed: \(error)")
}
let poses = Pose.fromObservations(self.humanBodyPoseRequest.results)
Затем нарисуйте обнаруженные позы на CGImage, затем получите UIImage и установите UIImageView.
private func drawPoses(_ poses: [Pose]?, onto frame: CGImage) {
print("+++++++++++++++++++++")
// Create a default render format at a scale of 1:1.
let renderFormat = UIGraphicsImageRendererFormat()
renderFormat.scale = 1.0
// Create a renderer with the same size as the frame.
let frameSize = CGSize(width: frame.width, height: frame.height)
let poseRenderer = UIGraphicsImageRenderer(size: frameSize,
format: renderFormat)
// Draw the frame first and then draw pose wireframes on top of it.
let frameWithPosesRendering = poseRenderer.image { rendererContext in
// The`UIGraphicsImageRenderer` instance flips the Y-Axis presuming
// we're drawing with UIKit's coordinate system and orientation.
let cgContext = rendererContext.cgContext
// Get the inverse of the current transform matrix (CTM).
let inverse = cgContext.ctm.inverted()
// Restore the Y-Axis by multiplying the CTM by its inverse to reset
// the context's transform matrix to the identity.
cgContext.concatenate(inverse)
// Draw the camera image first as the background.
let imageRectangle = CGRect(origin: .zero, size: frameSize)
cgContext.draw(frame, in: imageRectangle)
// Create a transform that converts the poses' normalized point
// coordinates `[0.0, 1.0]` to properly fit the frame's size.
let pointTransform = CGAffineTransform(scaleX: frameSize.width,
y: frameSize.height)
guard let poses = poses else { return }
// Draw all the poses Vision found in the frame.
for pose in poses {
// Draw each pose as a wireframe at the scale of the image.
pose.drawWireframeToContext(cgContext, applying: pointTransform)
}
}
// Update the UI's full-screen image view on the main thread.
print("??????????????????????????")
// self.uiimages.append(frameWithPosesRendering)
DispatchQueue.main.async { self.imageView!.image = frameWithPosesRendering }
}
Проблема в том, что после рисования видео становится медленным, пожалуйста, посмотрите видео с эффектом на https://youtu.be/vYj6f4bKrCU , ниже все мои коды:
import AVFoundation
import UIKit
import Vision
class VideoViewController: UIViewController {
private var imageView: UIImageView?
var videoUrl: URL? // use your own url
var frames: [CGImage]?
var uiimages: [UIImage] = []
private var generator: AVAssetImageGenerator?
private let humanBodyPoseRequest = VNDetectHumanBodyPoseRequest()
private func drawPoses(_ poses: [Pose]?, onto frame: CGImage) {
print("+++++++++++++++++++++")
// Create a default render format at a scale of 1:1.
let renderFormat = UIGraphicsImageRendererFormat()
renderFormat.scale = 1.0
// Create a renderer with the same size as the frame.
let frameSize = CGSize(width: frame.width, height: frame.height)
let poseRenderer = UIGraphicsImageRenderer(size: frameSize,
format: renderFormat)
// Draw the frame first and then draw pose wireframes on top of it.
let frameWithPosesRendering = poseRenderer.image { rendererContext in
// The`UIGraphicsImageRenderer` instance flips the Y-Axis presuming
// we're drawing with UIKit's coordinate system and orientation.
let cgContext = rendererContext.cgContext
// Get the inverse of the current transform matrix (CTM).
let inverse = cgContext.ctm.inverted()
// Restore the Y-Axis by multiplying the CTM by its inverse to reset
// the context's transform matrix to the identity.
cgContext.concatenate(inverse)
// Draw the camera image first as the background.
let imageRectangle = CGRect(origin: .zero, size: frameSize)
cgContext.draw(frame, in: imageRectangle)
// Create a transform that converts the poses' normalized point
// coordinates `[0.0, 1.0]` to properly fit the frame's size.
let pointTransform = CGAffineTransform(scaleX: frameSize.width,
y: frameSize.height)
guard let poses = poses else { return }
// Draw all the poses Vision found in the frame.
for pose in poses {
// Draw each pose as a wireframe at the scale of the image.
pose.drawWireframeToContext(cgContext, applying: pointTransform)
}
}
// Update the UI's full-screen image view on the main thread.
print("??????????????????????????")
// self.uiimages.append(frameWithPosesRendering)
DispatchQueue.main.async { self.imageView!.image = frameWithPosesRendering }
}
private func imageFromFrame(_ buffer: Frame) -> CGImage? {
guard let imageBuffer = buffer.imageBuffer else {
print("The frame doesn't have an underlying image buffer.")
return nil
}
// Create a Core Image context.
let ciContext = CIContext(options: nil)
// Create a Core Image image from the sample buffer.
let ciImage = CIImage(cvPixelBuffer: imageBuffer)
// Generate a Core Graphics image from the Core Image image.
guard let cgImage = ciContext.createCGImage(ciImage,
from: ciImage.extent) else {
print("Unable to create an image from a frame.")
return nil
}
return cgImage
}
override func viewDidLoad() {
super.viewDidLoad()
// Do any additional setup after loading the view.
view.backgroundColor = .white
imageView = UIImageView(frame: safeAreaFrame(self))
imageView!.contentMode = .scaleAspectFit
view.addSubview(imageView!)
DispatchQueue.global().async {
// 获取视频地址
guard let videoPath = Bundle.main.path(forResource: "jumpjack", ofType: "mp4") else {
print("get video path failed")
return
}
self.videoUrl = URL(fileURLWithPath: videoPath)
let asset = AVAsset(url: self.videoUrl!)
let reader = try! AVAssetReader(asset: asset)
let videoTrack = asset.tracks(withMediaType: AVMediaType.video)[0]
// read video frames as BGRA
let trackReaderOutput = AVAssetReaderTrackOutput(track: videoTrack, outputSettings: [String(kCVPixelBufferPixelFormatTypeKey): NSNumber(value: kCVPixelFormatType_32BGRA)])
trackReaderOutput.supportsRandomAccess = true
reader.add(trackReaderOutput)
reader.startReading()
while let sampleBuffer = trackReaderOutput.copyNextSampleBuffer() {
// CMSampleBuffer
print("sample at time \(CMSampleBufferGetPresentationTimeStamp(sampleBuffer))")
//CVImageBuffer
if let imageBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) {
// process each CVPixelBufferRef here
// see CVPixelBufferGetWidth, CVPixelBufferLockBaseAddress, CVPixelBufferGetBaseAddress, etc
guard let cgImage = self.imageFromFrame(sampleBuffer) else{return}
let imageRequestHandler = VNImageRequestHandler(cgImage: cgImage)
// Use Vision to find human body poses in the frame.
do { try imageRequestHandler.perform([self.humanBodyPoseRequest]) } catch {
assertionFailure("Human Pose Request failed: \(error)")
}
let poses = Pose.fromObservations(self.humanBodyPoseRequest.results)
// Send the frame and poses, if any, to the delegate on the main queue.
self.drawPoses(poses, onto: cgImage)
}
}
}
}}
Кажется, после получения каждого кадра локального видео
trackReaderOutput.copyNextSampleBuffer()
имеет некоторые трудоемкие задачи, поэтому видео становится медленным. Может быть, моя идея неверна, кто подскажет, как решить эту проблему или даст мне образец пыльника для достижения этой цели?