VintaSoft Imaging .NET SDK 14.0: Документация для .NET разработчика
В этом разделе
    Определение и распознавание документа, содержащего заполненную форму с текстом
    В этом разделе
    Если вы хотите определить и распознать документ, содержащий заполненную форму с текстом, вам необходимо выполнить следующие шаги:
    Вот C#/VB.NET код, демонстрирующий, как идентифицировать и распознать заполненную форму, содержащую текст.
    /// <summary>
    /// Recognizes the form with OCR fields.
    /// </summary>
    /// <param name="formRecognitionManager">The form recognition manager.</param>
    /// <param name="image">The image.</param>
    public static void RecognizeFormWithOcrFields(
        Vintasoft.Imaging.FormsProcessing.FormRecognitionManager formRecognitionManager,
        Vintasoft.Imaging.VintasoftImage image)
    {
        // check whether OCR engine manager of the OCR field templates is initialized
        // (this initialization can be moved to the start of your application)
        if (Vintasoft.Imaging.FormsProcessing.FormRecognition.Ocr.OcrFieldTemplate.OcrEngineManager == null)
        {
            // get or create text OCR engine
            Vintasoft.Imaging.Ocr.OcrEngine textOcrEngine = GetOcrEngine();
    
            // create Handwritten digits OCR engine
            Vintasoft.Imaging.Ocr.OcrEngine handwrittenDigitsOcrEngine = new Vintasoft.Imaging.Ocr.ML.HandwrittenDigits.HandwrittenDigitsOcrEngine();
    
            // create and set OCR engine manager of the OCR field templates
            Vintasoft.Imaging.FormsProcessing.FormRecognition.Ocr.OcrFieldTemplate.OcrEngineManager = 
                new Vintasoft.Imaging.Ocr.OcrEngineManager(textOcrEngine, handwrittenDigitsOcrEngine);
        }
    
        // recognize filled form in an image
        Vintasoft.Imaging.FormsProcessing.FormRecognitionResult recognitionResult = 
            formRecognitionManager.Recognize(image);
    
        // get the result of image comparison
        Vintasoft.Imaging.FormsProcessing.TemplateMatching.ImageImprintCompareResult imageCompareResult =
            recognitionResult.TemplateMatchingResult.ImageCompareResult;
        // if result is not reliable
        if (!imageCompareResult.IsReliable)
        {
            // matching template is not found
            System.Console.WriteLine("Matching template is not found.");
        }
        else
        {
            // get recognized page
            Vintasoft.Imaging.FormsProcessing.FormRecognition.FormPage recognizedPage = recognitionResult.RecognizedPage;
            // get form field count
            if (recognizedPage.Items.Count == 0)
            {
                System.Console.WriteLine("No form fields were recognized.");
            }
            else
            {
                System.Console.WriteLine(string.Format(
                    "Recognized form field count: {0}",
                    recognizedPage.Items.Count));
                // for each recognized form field
                foreach (Vintasoft.Imaging.FormsProcessing.FormRecognition.FormField recognizedField in recognizedPage.Items)
                {
                    if (recognizedField is Vintasoft.Imaging.FormsProcessing.FormRecognition.Ocr.OcrField)
                    {
                        Vintasoft.Imaging.FormsProcessing.FormRecognition.Ocr.OcrField ocrField = 
                            (Vintasoft.Imaging.FormsProcessing.FormRecognition.Ocr.OcrField)recognizedField;
                        // write field info
                        System.Console.WriteLine(string.Format(
                            "  OCR field: name: {0}; value: {1}; confidence: {2:F1}%",
                            ocrField.Name,
                            ocrField.Value,
                            ocrField.Confidence * 100));
                        Vintasoft.Imaging.Ocr.Results.OcrPage ocrResult = ocrField.OcrResult;
                        // get all words
                        Vintasoft.Imaging.Ocr.Results.OcrObject[] words = ocrResult.GetWords(75, 75);
                        // write words info
                        for (int i = 0; i < words.Length; i++)
                        {
                            Vintasoft.Imaging.Ocr.Results.OcrObject word = words[i];
                            System.Console.WriteLine(string.Format(
                                "    OCR word: {0}; confidence: {1:F1}%",
                                word.ToString(),
                                word.Confidence));
                        }
                    }
                }
            }
        }
    }
    
    /// <summary>
    /// Gets the OCR engine used for OCR field recognition.
    /// </summary>
    /// <remarks>
    /// To create a Tesseract OCR engine,
    /// add a reference to Vintasoft.Imaging.Ocr.Tesseract.dll
    /// into your project.
    /// </remarks>
    private static Vintasoft.Imaging.Ocr.OcrEngine GetOcrEngine()
    {
        // full path to the Tesseract5.Vintasoft.xXX.dll files
        // NOTE: specify here the actual path to the Tesseract OCR dll files
        string tesseractDllDirectory = @"C:\Program Files\VintaSoft\VintaSoft Imaging .NET\Bin\TesseractOCR\";
        // create Tesseract OCR engine (Vintasoft.Imaging.Ocr.Tesseract.dll is required)
        return new Vintasoft.Imaging.Ocr.Tesseract.TesseractOcr(tesseractDllDirectory);
    }
    
    ''' <summary>
    ''' Recognizes the form with OCR fields.
    ''' </summary>
    ''' <param name="formRecognitionManager">The form recognition manager.</param>
    ''' <param name="image">The image.</param>
    Public Shared Sub RecognizeFormWithOcrFields(formRecognitionManager As Vintasoft.Imaging.FormsProcessing.FormRecognitionManager, image As Vintasoft.Imaging.VintasoftImage)
        ' check whether OCR engine manager of the OCR field templates is initialized
        ' (this initialization can be moved to the start of your application)
        If Vintasoft.Imaging.FormsProcessing.FormRecognition.Ocr.OcrFieldTemplate.OcrEngineManager Is Nothing Then
            ' get or create text OCR engine
            Dim textOcrEngine As Vintasoft.Imaging.Ocr.OcrEngine = GetOcrEngine()
    
            ' create Handwritten digits OCR engine
            Dim handwrittenDigitsOcrEngine As Vintasoft.Imaging.Ocr.OcrEngine = New Vintasoft.Imaging.Ocr.ML.HandwrittenDigits.HandwrittenDigitsOcrEngine()
    
            ' create and set OCR engine manager of the OCR field templates
            Vintasoft.Imaging.FormsProcessing.FormRecognition.Ocr.OcrFieldTemplate.OcrEngineManager = New Vintasoft.Imaging.Ocr.OcrEngineManager(textOcrEngine, handwrittenDigitsOcrEngine)
        End If
    
        ' recognize filled form in an image
        Dim recognitionResult As Vintasoft.Imaging.FormsProcessing.FormRecognitionResult = formRecognitionManager.Recognize(image)
    
        ' get the result of image comparison
        Dim imageCompareResult As Vintasoft.Imaging.FormsProcessing.TemplateMatching.ImageImprintCompareResult = recognitionResult.TemplateMatchingResult.ImageCompareResult
        ' if result is not reliable
        If Not imageCompareResult.IsReliable Then
            ' matching template is not found
            System.Console.WriteLine("Matching template is not found.")
        Else
            ' get recognized page
            Dim recognizedPage As Vintasoft.Imaging.FormsProcessing.FormRecognition.FormPage = recognitionResult.RecognizedPage
            ' get form field count
            If recognizedPage.Items.Count = 0 Then
                System.Console.WriteLine("No form fields were recognized.")
            Else
                System.Console.WriteLine(String.Format("Recognized form field count: {0}", recognizedPage.Items.Count))
                ' for each recognized form field
                For Each recognizedField As Vintasoft.Imaging.FormsProcessing.FormRecognition.FormField In recognizedPage.Items
                    If TypeOf recognizedField Is Vintasoft.Imaging.FormsProcessing.FormRecognition.Ocr.OcrField Then
                        Dim ocrField As Vintasoft.Imaging.FormsProcessing.FormRecognition.Ocr.OcrField = DirectCast(recognizedField, Vintasoft.Imaging.FormsProcessing.FormRecognition.Ocr.OcrField)
                        ' write field info
                        System.Console.WriteLine(String.Format("  OCR field: name: {0}; value: {1}; confidence: {2:F1}%", ocrField.Name, ocrField.Value, ocrField.Confidence * 100))
                        Dim ocrResult As Vintasoft.Imaging.Ocr.Results.OcrPage = ocrField.OcrResult
                        ' get all words
                        Dim words As Vintasoft.Imaging.Ocr.Results.OcrObject() = ocrResult.GetWords(75, 75)
                        ' write words info
                        For i As Integer = 0 To words.Length - 1
                            Dim word As Vintasoft.Imaging.Ocr.Results.OcrObject = words(i)
                            System.Console.WriteLine(String.Format("    OCR word: {0}; confidence: {1:F1}%", word.ToString(), word.Confidence))
                        Next
                    End If
                Next
            End If
        End If
    End Sub
    
    ''' <summary>
    ''' Gets the OCR engine used for OCR field recognition.
    ''' </summary>
    ''' <remarks>
    ''' To create a Tesseract OCR engine,
    ''' add a reference to Vintasoft.Imaging.Ocr.Tesseract.dll
    ''' into your project.
    ''' </remarks>
    Private Shared Function GetOcrEngine() As Vintasoft.Imaging.Ocr.OcrEngine
        ' full path to the Tesseract5.Vintasoft.xXX.dll files
        ' NOTE: specify here the actual path to the Tesseract OCR dll files
        Dim tesseractDllDirectory As String = "C:\Program Files\VintaSoft\VintaSoft Imaging .NET\Bin\TesseractOCR\"
        ' create Tesseract OCR engine (Vintasoft.Imaging.Ocr.Tesseract.dll is required)
        Return New Vintasoft.Imaging.Ocr.Tesseract.TesseractOcr(tesseractDllDirectory)
    End Function