Python pytesseract 模块，image_to_string() 实例源码

我们从Python开源项目中，提取了以下49个代码示例，用于说明如何使用pytesseract.image_to_string()。

项目：F1-Telemetry 作者：MrPranz | 项目源码 | 文件源码

def velocity_ocr(image,coords,f1app):
    # crop and convert image to greyscale
    img = Image.fromarray(image).crop(coords).convert('L')
    img = img.resize([img.width*2,img.height*2])

    if f1app:
        # filters for video from the f1 app 
        img = ImageEnhance.Brightness(img).enhance(3.0)
        img = ImageEnhance.Contrast(img).enhance(2.0)
    else:
        # filters for onboard video graphic
        img = ImageEnhance.Brightness(img).enhance(0.1)
        img = ImageEnhance.Contrast(img).enhance(2.0)
        img = ImageEnhance.Contrast(img).enhance(4.0)
        img = ImageEnhance.Brightness(img).enhance(0.2)
        img = ImageEnhance.Contrast(img).enhance(16.0)

    try:
        # vel = pytesseract.image_to_string(img,config='digits')
        vel = pytesseract.image_to_string(img)
    except UnicodeDecodeError:
        vel = -1

    return vel

项目：base_function 作者：Rockyzsu | 项目源码 | 文件源码

def image_recognize():
    import pytesseract
    from PIL import Image

    class GetImageDate(object):
        def m(self):
            image = Image.open("data/0.jpg")
            text = pytesseract.image_to_string(image)
            return text

        def SaveResultToDocument(self):
            text = self.m()
            f = open(u"Verification.txt", "w")
            print text
            f.write(str(text))
            f.close()

    g = GetImageDate()
    g.SaveResultToDocument()

项目：electsys-splinter 作者：fztfztfztfzt | 项目源码 | 文件源码

def recognize(self):
        def format_captcha(captcha):
            temp = ''
            for i in captcha:
                if (ord(i)>=48 and ord(i)<=57) or (ord(i)>=65 and ord(i)<=90) or (ord(i)>=97 and ord(i)<=122):
                    temp = temp + i
            if temp=='':
                temp = 'aaaa'
            return temp
        cookie = self.browser.cookies.all()
        opener = urllib2.build_opener()
        opener.addheaders.append(('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'))
        opener.addheaders.append(('Host','jaccount.sjtu.edu.cn'))
        opener.addheaders.append(('Referer',self.browser.url))
        opener.addheaders.append(('Cookie',  "; ".join('%s=%s' % (k,v) for k,v in cookie.items())))
        f = opener.open("https://jaccount.sjtu.edu.cn/jaccount/captcha?1488154642719")
        data = f.read()
        with file('captcha.png','wb') as f:
            f.write(data)
        img = Image.open("captcha.png").convert('L')
        result = format_captcha(pytesseract.image_to_string(img,lang="eng"))
        return result

项目：OdooQuant 作者：haogefeifei | 项目源码 | 文件源码

def detect_gf_result(image_path):
    from PIL import ImageFilter, Image
    import pytesseract
    img = Image.open(image_path)
    for x in range(img.width):
        for y in range(img.height):
            if img.getpixel((x, y)) < (100, 100, 100):
                img.putpixel((x, y), (256, 256, 256))
    gray = img.convert('L')
    two = gray.point(lambda x: 0 if 68 < x < 90 else 256)
    min_res = two.filter(ImageFilter.MinFilter)
    med_res = min_res.filter(ImageFilter.MedianFilter)
    for _ in range(2):
        med_res = med_res.filter(ImageFilter.MedianFilter)
    res = pytesseract.image_to_string(med_res, config='-psm 6')
    return res.replace(' ', '')

项目：MyPython 作者：fupinglee | 项目源码 | 文件源码

def readCaptcha(self):#?????


        headers = {
            'Accept-Language':'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3',
            'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:53.0) Gecko/20100101 Firefox/53.0',
            'Referer':'http://******/login.jsp',
            'Content-Type':'application/x-www-form-urlencoded; charset=UTF-8',
            'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
            'X-Forwarded-For':self.captchaId
        }           
        url = 'http://******/common/captcha.jhtml?captchaId='+self.captchaId
        try:    
            res = requests.get(url,headers=headers)  
        except requests.exceptions.ConnectionError:
            print '??????'

        path = "i:/img/"+self.captchaId+".png"
        fp = open(path,'wb')
        fp.write(res.content)
        fp.close()
        image = Image.open(path)
        code = pytesseract.image_to_string(image)
        self.captcha = code
        #print code

项目：Mac-Python-3.X 作者：L1nwatch | 项目源码 | 文件源码

def test(image_name):
    """
    ??????????????
    :param image_name:
    :return:
    """
    with Image.open(image_name) as image:
        # ??????????????????????????????????RBG???HSI???????L???
        image = image.convert("L")

        # ???????????????????????????????????????threshold?????1??????0??????????????????????????
        image = cut_noise(image)

        tools = pyocr.get_available_tools()
        if len(tools) == 0:
            print("No OCR tool found")
            sys.exit(1)
        tool = tools[0]
        image.save("test.jpg")

        txt = tool.image_to_string(image, lang="eng", builder=pyocr.builders.TextBuilder())
        # Digits - Only Tesseract
        digits = tool.image_to_string(image, lang="eng", builder=pyocr.tesseract.DigitBuilder())
        print(txt)
        print(digits)

项目：brutepwdbyflow 作者：spoock1024 | 项目源码 | 文件源码

def __post_data_with_captcha(self,postdata,captchaurl):
        session = requests.session()
        response = session.get(captchaurl)
        imagedata = response.content
        time = datetime.datetime.now().time()
        f = open('image.jpg','wb')
        f.write(imagedata)
        f.close()
        #decode the captcha
        try:
            imgstr =image_to_string(Image.open('image.jpg'))
            print(imgstr)
            postdata[self.formdata.maindata['captcha']] = imgstr
        except UnicodeDecodeError:
            pass
        postdata.update(self.formdata.extradata)
        postresponse = session.post(self.formdata.post_url,data=postdata,headers=self.formdata.headers)
        responseHtml = postresponse.text
        pageLength = len(responseHtml)
        return pageLength

项目：brutepwdbyflow 作者：spoock1024 | 项目源码 | 文件源码

def __post_data_with_captcha(self,postdata,captchaurl):
        session = requests.session()
        response = session.get(captchaurl)
        imagedata = response.content
        time = datetime.datetime.now().time()
        f = open('image.jpg','wb')
        f.write(imagedata)
        f.close()
        #decode the captcha
        try:
            imgstr =image_to_string(Image.open('image.jpg'))
            print(imgstr)
            postdata[self.formdata.maindata['captcha']] = imgstr
        except UnicodeDecodeError:
            pass
        postdata.update(self.formdata.extradata)
        postresponse = session.post(self.formdata.post_url,data=postdata,headers=self.formdata.headers)
        responseHtml = postresponse.text
        pageLength = len(responseHtml)
        return pageLength

项目：pytesseractID 作者：iChenwin | 项目源码 | 文件源码

def main():
    # parse command line options
    if len(sys.argv) != 2:
        print 'Usage: python input_name output_name'
        exit(1)
    filePath = sys.argv[1]

    print "<----- processing %s ----->" % filePath

    #???????????????????????????????
    img = cv2.imread(filePath, 0)
    img = cv2.resize(img, (1200, 900)) 

    # ??????
    # imgArr = np.array(img)
    # imgMean = np.mean(img)
    # imgcopy = imgArr - imgMean
    # imgcopy = imgcopy * 2 + imgMean * 3
    # imgcopy = imgcopy / 255

    canny = cv2.Canny(img, 60, 300)  
    inverted = cv2.bitwise_not(canny)
    cv2.imshow('Canny', inverted)

    test1 = Image.fromarray(canny)
    test2 = Image.fromarray(inverted)

    result = pytesseract.image_to_string(test1, lang="eng", config="-c tessedit_char_whitelist=0123456789X")
    print result
    print "-------"
    result = pytesseract.image_to_string(test2, lang="eng")
    print result

    k = cv2.waitKey(0)

项目：vxTrader 作者：vex1023 | 项目源码 | 文件源码

def vcode(self):

        r = self._session.get(
            'https://jy.yongjinbao.com.cn/winner_gj/gjzq/user/extraCode.jsp',
            params={'randomStamp': random.random()}
        )
        r.raise_for_status()

        # ????????
        img_buffer = BytesIO(r.content)
        img = Image.open(img_buffer)
        code = pytesseract.image_to_string(img)
        img.close()
        img_buffer.close()

        if self.code_rule.findall(code) == []:
            raise VerifyCodeError('Wrong verify code: %s' % code)
        else:
            logger.debug('Verify Code is: %s' % code)
            return code

项目：zhihu_spider 作者：wzqnls | 项目源码 | 文件源码

def gg(name):
    # ????
    im = Image.open(name)
    # ??????
    imgry = im.convert('L')
    # ????
    imgry.save(name)
    # ????????????threshold????
    out = imgry.point(table, '1')
    out.save(name)
    # ??
    text = pytesseract.image_to_string(out)
    # ????
    text = text.strip()
    text = text.upper()
    for r in rep:
        text = text.replace(r, rep[r])
        # out.save(text+'.jpg')
    print(text)

# gg(CAPTCHA_PATH)

项目：brutepwdbyhtml 作者：spoock1024 | 项目源码 | 文件源码

def __post_data_with_captcha(self,postdata,captchaurl):
        session = requests.session()
        response = session.get(captchaurl)
        imagedata = response.content
        time = datetime.datetime.now().time()
        f = open('image.jpg','wb')
        f.write(imagedata)
        f.close()
        #decode the captcha
        try:
            imgstr =image_to_string(Image.open('image.jpg'))
            print(imgstr)
            postdata[self.formdata.maindata['captcha']] = imgstr
        except UnicodeDecodeError:
            pass
        postdata.update(self.formdata.extradata)
        postresponse = session.post(self.formdata.post_url,data=postdata,headers=self.formdata.headers)
        responseHtml = postresponse.text
        pageLength = len(responseHtml)
        return pageLength

项目：sia-cog 作者：deepakkumar1984 | 项目源码 | 文件源码

def extracttext(imgpath, preprocess):
    if imgpath.startswith('http://') or imgpath.startswith('https://') or imgpath.startswith('ftp://'):
        image = url_to_image(imgpath)
    else:
        image = cv2.imread(imgpath)

    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    if preprocess == "thresh":
        gray = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
    elif preprocess == "blur":
        gray = cv2.medianBlur(gray, 3)

    filename = "{}.png".format(os.getpid())
    cv2.imwrite(filename, gray)
    text = pytesseract.image_to_string(Image.open(filename))

    os.remove(filename)
    return {"text": text}

项目：douban_robot 作者：zhangzheng88 | 项目源码 | 文件源码

def recognize_url(url):
    import urllib
    urllib.urlretrieve(url, "imgs/tmp-img.jpg")
    img = Image.open('imgs/tmp-img.jpg')
    img = img.convert('RGBA')
    w, h = img.size[0], img.size[1]
    print w, h
    point_list = gen_white_black_points(img)
    print_char_pic(w, h, point_list)
    reduce_noisy(w, h, point_list)
    print_char_pic(w, h, point_list)

    img.putdata(point_list)
    img.save("imgs/rebuild.jpg")

    return pytesseract.image_to_string(Image.open('imgs/rebuild.jpg'))

项目：WowSign 作者：shark526 | 项目源码 | 文件源码

def rec_img(imgPath):
    img = Image.open(imgPath).convert("L")
    binarizing(img,170)
    # img.save('C:\\NotBackedUp\\00.png')
    depoint(img)
    depoint(img,True)
    # img.save('C:\\NotBackedUp\\01.png')
    seperated_img = split_image(img,save_temp=True)
    recdString = ""
    for cur_img in seperated_img:
        recNum = pytesseract.image_to_string(cur_img,config='-psm 10 outputbase digits')
        recdString = recdString + recNum

    print recdString
    #img.save('temp/%s.png' % recdString)
    if len(recdString)==4:
        # img.save('temp/%s.png' % recdString)
        print "success"
    else:
        print "error ..."
        img.save('temp/error_%s.png' % recdString)
    return recdString

项目：RunescapeBots 作者：lukegarbutt | 项目源码 | 文件源码

def tesser_money_image(image):
    image = cv2.resize(image, (0,0), fx=2, fy=2)
    image = PIL.Image.fromarray(image)
    txt = pytesseract.image_to_string(image, config='-psm 7')
    txt_list = list(txt)
    for i in range(len(txt_list)):
        if txt_list[i] == 'o':
            txt_list[i] = '0'
        elif txt_list[i] == 'O':
            txt_list[i] = '0'
        elif txt_list[i] == 'l':
            txt_list[i] = '1'
        elif txt_list[i] == 'I':
            txt_list[i] = '1'
        elif txt_list[i] == 'i':
            txt_list[i] = '1'
        elif txt_list[i] == 'M':
            txt_list[i] = '000000'
        elif txt_list[i] == 'K':
            txt_list[i] = '000'
        elif txt_list[i] == 'm':
            txt_list[i] = '000000'
        elif txt_list[i] == 'k':
            txt_list[i] = '000'
        elif txt_list[i] == 's':
            txt_list[i] = '5'
        elif txt_list[i] == 'S':
            txt_list[i] = '5'
        elif txt_list[i] == 'W':
            txt_list[i] = '40'
    txt = int(''.join(txt_list))
    return(txt)

项目：RunescapeBots 作者：lukegarbutt | 项目源码 | 文件源码

def tesser(image):
    txt = pytesseract.image_to_string(image, config='-psm 7')
    print(txt)
    return(txt)

项目：1ibrary-gzhu 作者：1ibrary | 项目源码 | 文件源码

def get_capture(self):
        data = requests.get(self.capture_url, cookies=self.cookies)
        with open("./img_cache/" + self.cookies['PHPSESSID'] + ".gif", "wb+") as f:
            f.write(data.content)

        gif = Image.open("./img_cache/" + self.cookies['PHPSESSID'] + ".gif")

        png = Image.new("RGB", gif.size)
        png.paste(gif)

        str = image_to_string(png).strip()
        remove("./img_cache/" + self.cookies['PHPSESSID'] + ".gif")

        return str

项目：bib-tagger 作者：KateRita | 项目源码 | 文件源码

def getOcr(filename):
    #,config='-psm 10') option for single digit recognition
    #return image_to_string(Image.open(filename),config="-psm 6") #5
    #return image_to_string(Image.open(filename)) #3
    #return image_to_string(Image.open(filename),config="-psm 7") #5
    return image_to_string(Image.open(filename),config="-psm 8 digits") #4

项目：Nightchord 作者：theriley106 | 项目源码 | 文件源码

def genNC(image=None, listofwords=[], artist=None, song=None):
    Words = {}
    Information = {}
    for i, image in enumerate(image):
        i = i + 1
        Words[i] = pytesseract.image_to_string(Image.open(image))
    Information['GuessedWords'] = Words
    Information["Real_Lyrics"] = listofwords
    with open('{}Transcript.json'.format(Words[1]), 'w') as f:
        json.dump(Information, f)

项目：Nightchord 作者：theriley106 | 项目源码 | 文件源码

def ocrList(image):
    response = pytesseract.image_to_string(Image.open(image)).encode('utf-8','replace')
    if len(response) > 5:
        response = response.replace('\n', ' ').replace('  ', ' ').split(' ')
    return response

项目：Nightchord 作者：theriley106 | 项目源码 | 文件源码

def calcSpaces(image):
    response = pytesseract.image_to_string(Image.open(image)).encode('utf-8','replace')
    print response
    response = response.replace('\n', '').split(' ')
    return len(response)

项目：Nightchord 作者：theriley106 | 项目源码 | 文件源码

def genNC(image=None, listofwords=[], artist=None, song=None):
    threads = []
    Words = {}

    def batchExtract(listofimages):
        for image in listofimages:
            try:
                extractText(image)
            except Exception as exp:
                print(exp)
                pass

    def doCommand(image, listofwords):
        a = pytesseract.image_to_string(Image.open(image)).encode('utf-8','replace').split(' ')
        for a in a:
            if len(a) > 3:
                print difflib.get_close_matches(str(a), listofwords)[0]

    Information = {}
    listofwords = GrabSongLyrics(artist, song)
    d = []

    for i in range(len(image) / 5):
        t = threading.Thread(target=batchExtract, args=([image[i*5:(i*5) + 4]]))
        d.append(t)
        t.start()

    for t in d:
        t.join()

    for i, image in enumerate(image):
        t = threading.Thread(target=doCommand, args=(image, i))
        threads.append(t)
        t.start()

    for t in threads:
        t.join()
    Information["GuessedWords"] = Words
    Information["Real_Lyrics"] = listofwords
    with open('{}Transcript.json'.format(Words[1]), 'w') as f:
        json.dump(Information, f)

项目：WebScraping 作者：liinnux | 项目源码 | 文件源码

def ocr(img):
    # threshold the image to ignore background and keep text
    gray = img.convert('L')
    #gray.save('captcha_greyscale.png')
    bw = gray.point(lambda x: 0 if x < 1 else 255, '1')
    #bw.save('captcha_threshold.png')
    word = pytesseract.image_to_string(bw)
    ascii_word = ''.join(c for c in word if c in string.letters).lower()
    return ascii_word

项目：beryl 作者：DanielJDufour | 项目源码 | 文件源码

def is_text_on_screen(target, notify=True):

    if notify:
        _notify("starting is_text_on_screen")

    if isinstance(target, str):
        target = target.decode('utf-8')

    #GET SCREENSHOT
    path_to_screenshot = take_a_screenshot()
    sleep(1)

    #FIND TEXTS
    im = cv2.imread(path_to_screenshot)
    im = cv2.resize(im, (0,0), fx=2, fy=2)
    imgray = cv2.cvtColor(im,cv2.COLOR_BGR2GRAY)
    #ret,thresh = cv2.threshold(imgray,127,255,0)
    ret,thresh = cv2.threshold(imgray,127,255,cv2.THRESH_BINARY)
    contours, hierarchy = find_contours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)


    for index, contour in enumerate(contours):
        b = Box(cv2.boundingRect(contour))
        if b.width > 10 and b.height > 6:
            cropped = im[b.ymin:b.ymax, b.xmin:b.xmax]
            text = image_to_string(Image.fromarray(cropped))
            print("text:", text)
            if target in text.decode("utf-8"):
                return True
    return False

项目：TalosLibraryMate 作者：jim-hart | 项目源码 | 文件源码

def process_ocr(force=False):
    """Invokes tesseract-ocr and translate_hex_values(), after which, the
    converted text is displayed to the screen

    :param force: If True, all images will be scanned regardless if they've been processed previously.
                  force=True also skips the message/filename process

     """

    path = TesseractConfig()
    scan_logs = ScanLogs()

    skipped_files = 0
    tessdata = '--tessdata-dir "{}"'.format(path.tessdata)
    for file in os.listdir(path.screenshots):
        if force or not scan_logs.check_if_scanned(file):
            image = Image.open('{}\\{}'.format(path.screenshots, file))
            converted_text = TextConversion(pytesseract.image_to_string(image, config=tessdata))

            print(converted_text)
            if not force:
                scan_logs.logs = {file: str(converted_text)}
        else:
            skipped_files += 1

    print("\n----------Scan Complete-----------")
    if skipped_files:
        print_delay("{} Files Skipped (Already Scanned)".format(skipped_files))

项目：doc2text 作者：jlsutherland | 项目源码 | 文件源码

def extract_text(self):
        temp_path = 'text_temp.png'
        cv2.imwrite(temp_path, self.image)
        self.text = pytesseract.image_to_string(Image.open(temp_path), lang=self.lang)
        os.remove(temp_path)
        return self.text

项目：idmatch 作者：maddevsio | 项目源码 | 文件源码

def recognize_card(original_image, country='kg', preview=False):
    from processing.border_removal import resize
    from processing.crop import process_image
    result = []
    cropped_image = "croped-image.jpg"
    process_image(original_image, cropped_image)
    idcard = cv2.imread(cropped_image, cv2.COLOR_BGR2GRAY)
    idcard = resize(idcard, width=720)

    scale_down = (8 * 170 / detect_dpi(idcard))
    if scale_down <= 4:
        rows, cols = idcard.shape[:2]
        idcard = cv2.resize(idcard, (scale_down * cols / 8, scale_down * rows / 8))

    contours, hierarchy = recognize_text(idcard)
    for index, contour in enumerate(contours):
        [x, y, w, h] = cv2.boundingRect(contour)
        gray = cv2.cvtColor(idcard, cv2.COLOR_RGB2GRAY)
        roi = gray[y:y + h, x:x + w]
        if cv2.countNonZero(roi) / h * w > 0.55:
            if h > 16 and w > 16:
                filename = '%s.jpg' % index
                cv2.imwrite(filename, roi)
                text = pytesseract.image_to_string(
                    Image.open(filename), lang="kir+eng", config="-psm 7"
                )                
                item = {'x': x, 'y': y, 'w': w, 'h': h, 'text': text}
                result.append(item)
                cv2.rectangle(idcard, (x, y), (x + w, y + h), (255, 0, 255), 2)
    if preview:
        original_image = original_image.split('/')[-1]
        location = save_image('regions' + original_image, idcard)
        return location, regionskir(result)
    return regionskir(result)

项目：pyqt5 作者：yurisnm | 项目源码 | 文件源码

def init_ui(self):
        self._fileDialog = QFileDialog(self)
        self._v_layout = QVBoxLayout()
        self._v_layout.setSpacing(2)
        self.setLayout(self._v_layout)
        self._path = "TEXT.png"
        self._pixmap = QPixmap(self._path)
        self._btnFile = QPushButton("Open")
        self._hWidget = QWidget()
        self._hLayout = QHBoxLayout()
        self._hWidget.setLayout(self._hLayout)

        self._image = Image.open(self._path)
        self._line = QLineEdit()

        self._hLayout.addWidget(self._btnFile)
        self._hLayout.addWidget(self._line)
        size = QSize(160, 90)
        pix = self._pixmap.scaled(size, transformMode=Qt.SmoothTransformation)

        self._lbl = QLabel()
        self._lbl.setPixmap(pix)
        self._v_layout.addWidget(self._lbl)
        self._v_layout.addWidget(self._hWidget)
        self._btnFile.clicked.connect(self.openFilePressed)

        self._line.setText(pytesseract.image_to_string(Image.open('TEXT.png')))

项目：pyqt5 作者：yurisnm | 项目源码 | 文件源码

def openFilePressed(self):
        self._path = self._fileDialog.\
            getOpenFileName(self, "Image Files (*.png *.jpg)")
        if self._path[0] != "":
            self._pixmap = QPixmap(self._path[0])
            size = QSize(160, 90)
            pix = self._pixmap.scaled(size,
                                      transformMode=Qt.SmoothTransformation)
            self._lbl.setPixmap(pix)
            self._image = Image.open(self._path[0])
            text = pytesseract.image_to_string(self._image)
            self._line.setText(text)

项目：pyqt5 作者：yurisnm | 项目源码 | 文件源码

def updateText(self):

        self._pixmap = QPixmap('TEXT.png')
        size = QSize(160, 90)

        pix = self._pixmap.scaled(size,
                                  transformMode=Qt.SmoothTransformation)
        self._lbl.setPixmap(pix)
        self._image = Image.open('TEXT.png')
        text = pytesseract.image_to_string(self._image, lang='eng', config='-psm 8', )
        self._line.setText(text)
        self.signal_send_text.emit(text)

项目：captcha_project 作者：zhanghe06 | 项目源码 | 文件源码

def img_to_string(self):
        """
        ???????
        :return:
        """
        # ????
        self.crop_img()
        # ????
        self.optimize_img()
        # ????
        self.img_text = pytesseract.image_to_string(self.img_fp)
        # ????
        print '??????%s' % self.img_text
        self.optimize_text()
        print '??????%s' % self.img_text

项目：Mac-Python-3.X 作者：L1nwatch | 项目源码 | 文件源码

def test2(image_name):
    """
    ?????????????
    :param image_name:
    :return:
    """
    with Image.open(image_name) as image:
        image = image.convert("RGBA")
        pixdata = image.load()

        # Make the letters bolder for easier recognition
        for y in range(image.size[1]):
            for x in range(image.size[0]):
                if pixdata[x, y][0] < 90:
                    pixdata[x, y] = (0, 0, 0, 255)

        for y in range(image.size[1]):
            for x in range(image.size[0]):
                if pixdata[x, y][1] < 136:
                    pixdata[x, y] = (0, 0, 0, 255)

        for y in range(image.size[1]):
            for x in range(image.size[0]):
                if pixdata[x, y][2] > 0:
                    pixdata[x, y] = (255, 255, 255, 255)

        # image.save("input-black.gif", "GIF")
        print(pytesseract.image_to_string(image))

项目：Mac-Python-3.X 作者：L1nwatch | 项目源码 | 文件源码

def image_to_string(image):
    """
    ?????????? 4 ???
    :param image:
    :return:
    """
    global font
    test1 = convert_black_white(image)
    text = str()
    for each in cut(test1):
        for num in range(10):
            if create_pix_tables(each) == font[num]:
                text += str(num)
                break
    return text

项目：Mac-Python-3.X 作者：L1nwatch | 项目源码 | 文件源码

def solve():
    """
    ?? WP ??, ??????????????????, ?????????
    :return:
    """
    global font
    font = get_font()
    path = "/Users/L1n/Desktop/bmp"
    sum = 0
    for i in range(1, 10000):
        with Image.open(path + os.sep + str(i) + ".bmp") as image:
            sum += i * int(image_to_string(image))
    print("Sum: {}".format(sum))

项目：pytesseractID 作者：iChenwin | 项目源码 | 文件源码

def recognizeImage(results, cvimage ,rect, language, charWhiteList=None):
    config = "-psm 7"   # single line mode
    if charWhiteList is not None:
        config += " -c tessedit_char_whitelist=" + charWhiteList

    image = Image.fromarray(cvimage)

    result = pytesseract.image_to_string(image, lang=language, config=config)

    item = ImageRecognizerItem(result, rect)
    results.append(item)

# ??ImageRecognizerItem

项目：Spider 作者：poluo | 项目源码 | 文件源码

def recognize_url(url):
    import urllib.request
    urllib.request.urlretrieve(url, './img.jpg')
    img = Image.open('./img.jpg')
    img = img.convert('RGBA')
    w, h = img.size[0], img.size[1]
    point_list = gen_white_black_points(img)
    print_char_pic(w, h, point_list)
    reduce_noisy(w, h, point_list)
    print_char_pic(w, h, point_list)

    img.putdata(point_list)
    img.save("C:\\Users\\poluo\\PycharmProjects\\douban\\douban\\processed.jpg")
    tmp=Image.open('C:\\Users\\poluo\\PycharmProjects\\douban\\douban\\processed.jpg')
    return pytesseract.image_to_string(tmp)

项目：captcha-breaker 作者：Detry322 | 项目源码 | 文件源码

def solution_from_image(image):
    pieces = filter_split(image)
    if len(pieces) != 4:
        return '????'
    string = ''
    for piece in pieces:
        try:
            solved = pytesseract.image_to_string(piece, config='-psm 10 -c tessedit_char_whitelist=0123456789abcdefghijklmnopqrstuvwxyz')
        except pytesseract.pytesseract.TesseractError:
            solved = None
        if not solved:
            solved = '?'
        string += solved
    return string