Класс DocumentSegmentationCommand
Обнаруживает области различных типов изображения документа.
Вот C#/VB.NET код, который демонстрирует, как сегментировать изображение и распознавать текст на изображении.
Class OcrEngineManagerExample
''' <summary>
''' Segments an image and recognizes text in the image.
''' </summary>
''' <param name="filename">The name of file, which stores images with text.</param>
''' <param name="ocrLanguage">The language of recognizing text.</param>
Public Shared Sub SegmentsAndOcrImages(filename As String, ocrLanguage As Vintasoft.Imaging.Ocr.OcrLanguage)
' create an image collection
Using images As New Vintasoft.Imaging.ImageCollection()
' add images from file to the image collection
images.Add(filename)
System.Console.WriteLine("Create Tesseract OCR engine...")
' create the Tesseract OCR engine
Using tesseractOcr As New Vintasoft.Imaging.Ocr.Tesseract.TesseractOcr()
' create the OCR engine manager
Dim engineManager As New Vintasoft.Imaging.Ocr.OcrEngineManager(tesseractOcr)
' create the OCR engine settings
Dim settings As New Vintasoft.Imaging.Ocr.OcrEngineSettings(ocrLanguage)
' for each image
For Each image As Vintasoft.Imaging.VintasoftImage In images
System.Console.WriteLine("Segmentation of the image...")
' create the processing command for segmentation of document image
Dim segmentation As New Vintasoft.Imaging.ImageProcessing.Info.DocumentSegmentationCommand()
' execute the processing command on document image
segmentation.ExecuteInPlace(image)
System.Console.WriteLine("Recognize the image...")
' recognize text in segmented image
Dim ocrResult As Vintasoft.Imaging.Ocr.Results.OcrPage = engineManager.Recognize(image, settings, segmentation.Regions)
' output the recognized text
System.Console.WriteLine("Page Text:")
System.Console.WriteLine(ocrResult.GetText())
System.Console.WriteLine()
Next
End Using
' free images
images.ClearAndDisposeItems()
End Using
End Sub
End Class
class OcrEngineManagerExample
{
/// <summary>
/// Segments an image and recognizes text in the image.
/// </summary>
/// <param name="filename">The name of file, which stores images with text.</param>
/// <param name="ocrLanguage">The language of recognizing text.</param>
public static void SegmentsAndOcrImages(string filename, Vintasoft.Imaging.Ocr.OcrLanguage ocrLanguage)
{
// create an image collection
using (Vintasoft.Imaging.ImageCollection images = new Vintasoft.Imaging.ImageCollection())
{
// add images from file to the image collection
images.Add(filename);
System.Console.WriteLine("Create Tesseract OCR engine...");
// create the Tesseract OCR engine
using (Vintasoft.Imaging.Ocr.Tesseract.TesseractOcr tesseractOcr =
new Vintasoft.Imaging.Ocr.Tesseract.TesseractOcr())
{
// create the OCR engine manager
Vintasoft.Imaging.Ocr.OcrEngineManager engineManager =
new Vintasoft.Imaging.Ocr.OcrEngineManager(tesseractOcr);
// create the OCR engine settings
Vintasoft.Imaging.Ocr.OcrEngineSettings settings =
new Vintasoft.Imaging.Ocr.OcrEngineSettings(ocrLanguage);
// for each image
foreach (Vintasoft.Imaging.VintasoftImage image in images)
{
System.Console.WriteLine("Segmentation of the image...");
// create the processing command for segmentation of document image
Vintasoft.Imaging.ImageProcessing.Info.DocumentSegmentationCommand segmentation =
new Vintasoft.Imaging.ImageProcessing.Info.DocumentSegmentationCommand();
// execute the processing command on document image
segmentation.ExecuteInPlace(image);
System.Console.WriteLine("Recognize the image...");
// recognize text in segmented image
Vintasoft.Imaging.Ocr.Results.OcrPage ocrResult = engineManager.Recognize(
image, settings, segmentation.Regions);
// output the recognized text
System.Console.WriteLine("Page Text:");
System.Console.WriteLine(ocrResult.GetText());
System.Console.WriteLine();
}
}
// free images
images.ClearAndDisposeItems();
}
}
}
System.Object
 Vintasoft.Imaging.ImageProcessing.ProcessingCommandBase
   Vintasoft.Imaging.ImageProcessing.Info.DocumentSegmentationCommand
Целевые платформы: .NET 8; .NET 7; .NET 6; .NET Framework 4.8, 4.7, 4.6, 4.5, 4.0, 3.5