VintaSoft Imaging .NET SDK 14.0: Документация для .NET разработчика
Vintasoft.Imaging.Ocr.Results Namespace / OcrObject Class / Confidence Property
Синтаксис Example Требования Смотрите также
В этом разделе
    Confidence Свойство (OcrObject)
    В этом разделе
    Возвращает достоверность (в процентах) этого объекта.
    Синтаксис
    'Declaration
    
    Public ReadOnly Property Confidence As Single
    
    
    public float Confidence { get; }
    
    
    public: __property float get_Confidence();
    
    
    
    public:
    property float Confidence { float get(); }
    Пример

    Вот C#/VB.NET код, который демонстрирует, как фильтровать результаты распознавания.

    
    ''' <summary>
    ''' Recognizes text in images,
    ''' removes words with low confidence from recognized text and
    ''' returns recognized text.
    ''' </summary>
    ''' <param name="filename">The name of the file containing image to OCR.</param>
    Public Function RecognizeTextAndFilterRecognitionResult(filename As String) As String
        ' minimum confidence
        Const  MIN_CONFIDENCE As Single = 75F
    
        ' create image collection
        Using images As New Vintasoft.Imaging.ImageCollection()
            ' add images from file to image collection
            images.Add(filename)
    
            ' create tesseract OCR engine
            Using tesseractOcr As New Vintasoft.Imaging.Ocr.Tesseract.TesseractOcr()
                ' create tesseract OCR settings
                Dim settings As New Vintasoft.Imaging.Ocr.OcrEngineSettings(Vintasoft.Imaging.Ocr.OcrLanguage.English)
                tesseractOcr.Init(settings)
    
                ' create result builder
                Dim result As New System.Text.StringBuilder()
    
                ' for each image in image collection
                For Each image As Vintasoft.Imaging.VintasoftImage In images
                    ' recognize the image
                    Dim page As Vintasoft.Imaging.Ocr.Results.OcrPage = tesseractOcr.Recognize(image)
    
                    ' get all words in recognized text
                    Dim ocrObjects As Vintasoft.Imaging.Ocr.Results.OcrObject() = page.GetObjects(Vintasoft.Imaging.Ocr.OcrObjectType.Word)
                    ' create list of words to remove
                    Dim removeObjects As New System.Collections.Generic.List(Of Vintasoft.Imaging.Ocr.Results.OcrObject)()
                    ' for each word
                    For Each word As Vintasoft.Imaging.Ocr.Results.OcrObject In ocrObjects
                        ' if word confidence is less than minimum confidence
                        If word.Confidence < MIN_CONFIDENCE Then
                            ' add word to a list of words to remove
                            removeObjects.Add(word)
                        End If
                    Next
    
                    ' validate recognition results (remove words with low confidence)
    
                    Dim editor As New Vintasoft.Imaging.Ocr.Results.OcrResultsEditor(page)
                    editor.RemoveObjects(removeObjects.ToArray())
                    editor.ValidateResults()
    
                    ' get recognized text
                    Dim text As String = page.GetText()
                    ' add recognized text to result
                    result.Append(text)
                    result.AppendLine()
                Next
    
                ' dispose images and clear image collection
                images.ClearAndDisposeItems()
    
                ' return result
                Return result.ToString()
            End Using
        End Using
    End Function
    
    
    
    /// <summary>
    /// Recognizes text in images,
    /// removes words with low confidence from recognized text and
    /// returns recognized text.
    /// </summary>
    /// <param name="filename">The name of the file containing image to OCR.</param>
    public string RecognizeTextAndFilterRecognitionResult(string filename)
    {
        // minimum confidence
        const float MIN_CONFIDENCE = 75.0f;
    
        // create image collection
        using (Vintasoft.Imaging.ImageCollection images = new Vintasoft.Imaging.ImageCollection())
        {
            // add images from file to image collection
            images.Add(filename);
    
            // create tesseract OCR engine
            using (Vintasoft.Imaging.Ocr.Tesseract.TesseractOcr tesseractOcr = 
                new Vintasoft.Imaging.Ocr.Tesseract.TesseractOcr())
            {
                // create tesseract OCR settings
                Vintasoft.Imaging.Ocr.OcrEngineSettings settings = 
                    new Vintasoft.Imaging.Ocr.OcrEngineSettings(Vintasoft.Imaging.Ocr.OcrLanguage.English);
                tesseractOcr.Init(settings);
    
                // create result builder
                System.Text.StringBuilder result = new System.Text.StringBuilder();
    
                // for each image in image collection
                foreach (Vintasoft.Imaging.VintasoftImage image in images)
                {
                    // recognize the image
                    Vintasoft.Imaging.Ocr.Results.OcrPage page = tesseractOcr.Recognize(image);
    
                    // get all words in recognized text
                    Vintasoft.Imaging.Ocr.Results.OcrObject[] ocrObjects = page.GetObjects(
                        Vintasoft.Imaging.Ocr.OcrObjectType.Word);
                    // create list of words to remove
                    System.Collections.Generic.List<Vintasoft.Imaging.Ocr.Results.OcrObject> removeObjects = 
                        new System.Collections.Generic.List<Vintasoft.Imaging.Ocr.Results.OcrObject>();
                    // for each word
                    foreach (Vintasoft.Imaging.Ocr.Results.OcrObject word in ocrObjects)
                    {
                        // if word confidence is less than minimum confidence
                        if (word.Confidence < MIN_CONFIDENCE)
                            // add word to a list of words to remove
                            removeObjects.Add(word);
                    }
    
                    // validate recognition results (remove words with low confidence)
    
                    Vintasoft.Imaging.Ocr.Results.OcrResultsEditor editor = 
                        new Vintasoft.Imaging.Ocr.Results.OcrResultsEditor(page);
                    editor.RemoveObjects(removeObjects.ToArray());
                    editor.ValidateResults();
    
                    // get recognized text
                    string text = page.GetText();
                    // add recognized text to result
                    result.Append(text);
                    result.AppendLine();
                }
    
                // dispose images and clear image collection
                images.ClearAndDisposeItems();
    
                // return result
                return result.ToString();
            }
        }
    }
    
    

    Требования

    Целевые платформы: .NET 8; .NET 7; .NET 6; .NET Framework 4.8, 4.7, 4.6, 4.5, 4.0, 3.5

    Смотрите также