Python pytesseract 模块,image_to_string() 实例源码

我们从Python开源项目中,提取了以下49个代码示例,用于说明如何使用pytesseract.image_to_string()

项目:F1-Telemetry    作者:MrPranz    | 项目源码 | 文件源码
def velocity_ocr(image,coords,f1app):
    # crop and convert image to greyscale
    img = Image.fromarray(image).crop(coords).convert('L')
    img = img.resize([img.width*2,img.height*2])

    if f1app:
        # filters for video from the f1 app 
        img = ImageEnhance.Brightness(img).enhance(3.0)
        img = ImageEnhance.Contrast(img).enhance(2.0)
    else:
        # filters for onboard video graphic
        img = ImageEnhance.Brightness(img).enhance(0.1)
        img = ImageEnhance.Contrast(img).enhance(2.0)
        img = ImageEnhance.Contrast(img).enhance(4.0)
        img = ImageEnhance.Brightness(img).enhance(0.2)
        img = ImageEnhance.Contrast(img).enhance(16.0)

    try:
        # vel = pytesseract.image_to_string(img,config='digits')
        vel = pytesseract.image_to_string(img)
    except UnicodeDecodeError:
        vel = -1

    return vel
项目:base_function    作者:Rockyzsu    | 项目源码 | 文件源码
def image_recognize():
    import pytesseract
    from PIL import Image

    class GetImageDate(object):
        def m(self):
            image = Image.open("data/0.jpg")
            text = pytesseract.image_to_string(image)
            return text

        def SaveResultToDocument(self):
            text = self.m()
            f = open(u"Verification.txt", "w")
            print text
            f.write(str(text))
            f.close()

    g = GetImageDate()
    g.SaveResultToDocument()
项目:electsys-splinter    作者:fztfztfztfzt    | 项目源码 | 文件源码
def recognize(self):
        def format_captcha(captcha):
            temp = ''
            for i in captcha:
                if (ord(i)>=48 and ord(i)<=57) or (ord(i)>=65 and ord(i)<=90) or (ord(i)>=97 and ord(i)<=122):
                    temp = temp + i
            if temp=='':
                temp = 'aaaa'
            return temp
        cookie = self.browser.cookies.all()
        opener = urllib2.build_opener()
        opener.addheaders.append(('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'))
        opener.addheaders.append(('Host','jaccount.sjtu.edu.cn'))
        opener.addheaders.append(('Referer',self.browser.url))
        opener.addheaders.append(('Cookie',  "; ".join('%s=%s' % (k,v) for k,v in cookie.items())))
        f = opener.open("https://jaccount.sjtu.edu.cn/jaccount/captcha?1488154642719")
        data = f.read()
        with file('captcha.png','wb') as f:
            f.write(data)
        img = Image.open("captcha.png").convert('L')
        result = format_captcha(pytesseract.image_to_string(img,lang="eng"))
        return result
项目:OdooQuant    作者:haogefeifei    | 项目源码 | 文件源码
def detect_gf_result(image_path):
    from PIL import ImageFilter, Image
    import pytesseract
    img = Image.open(image_path)
    for x in range(img.width):
        for y in range(img.height):
            if img.getpixel((x, y)) < (100, 100, 100):
                img.putpixel((x, y), (256, 256, 256))
    gray = img.convert('L')
    two = gray.point(lambda x: 0 if 68 < x < 90 else 256)
    min_res = two.filter(ImageFilter.MinFilter)
    med_res = min_res.filter(ImageFilter.MedianFilter)
    for _ in range(2):
        med_res = med_res.filter(ImageFilter.MedianFilter)
    res = pytesseract.image_to_string(med_res, config='-psm 6')
    return res.replace(' ', '')
项目:MyPython    作者:fupinglee    | 项目源码 | 文件源码
def readCaptcha(self):#?????


        headers = {
            'Accept-Language':'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3',
            'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:53.0) Gecko/20100101 Firefox/53.0',
            'Referer':'http://******/login.jsp',
            'Content-Type':'application/x-www-form-urlencoded; charset=UTF-8',
            'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
            'X-Forwarded-For':self.captchaId
        }           
        url = 'http://******/common/captcha.jhtml?captchaId='+self.captchaId
        try:    
            res = requests.get(url,headers=headers)  
        except requests.exceptions.ConnectionError:
            print '??????'

        path = "i:/img/"+self.captchaId+".png"
        fp = open(path,'wb')
        fp.write(res.content)
        fp.close()
        image = Image.open(path)
        code = pytesseract.image_to_string(image)
        self.captcha = code
        #print code
项目:Mac-Python-3.X    作者:L1nwatch    | 项目源码 | 文件源码
def test(image_name):
    """
    ??????????????
    :param image_name:
    :return:
    """
    with Image.open(image_name) as image:
        # ??????????????????????????????????RBG???HSI???????L???
        image = image.convert("L")

        # ???????????????????????????????????????threshold?????1??????0??????????????????????????
        image = cut_noise(image)

        tools = pyocr.get_available_tools()
        if len(tools) == 0:
            print("No OCR tool found")
            sys.exit(1)
        tool = tools[0]
        image.save("test.jpg")

        txt = tool.image_to_string(image, lang="eng", builder=pyocr.builders.TextBuilder())
        # Digits - Only Tesseract
        digits = tool.image_to_string(image, lang="eng", builder=pyocr.tesseract.DigitBuilder())
        print(txt)
        print(digits)
项目:brutepwdbyflow    作者:spoock1024    | 项目源码 | 文件源码
def __post_data_with_captcha(self,postdata,captchaurl):
        session = requests.session()
        response = session.get(captchaurl)
        imagedata = response.content
        time = datetime.datetime.now().time()
        f = open('image.jpg','wb')
        f.write(imagedata)
        f.close()
        #decode the captcha
        try:
            imgstr =image_to_string(Image.open('image.jpg'))
            print(imgstr)
            postdata[self.formdata.maindata['captcha']] = imgstr
        except UnicodeDecodeError:
            pass
        postdata.update(self.formdata.extradata)
        postresponse = session.post(self.formdata.post_url,data=postdata,headers=self.formdata.headers)
        responseHtml = postresponse.text
        pageLength = len(responseHtml)
        return pageLength
项目:brutepwdbyflow    作者:spoock1024    | 项目源码 | 文件源码
def __post_data_with_captcha(self,postdata,captchaurl):
        session = requests.session()
        response = session.get(captchaurl)
        imagedata = response.content
        time = datetime.datetime.now().time()
        f = open('image.jpg','wb')
        f.write(imagedata)
        f.close()
        #decode the captcha
        try:
            imgstr =image_to_string(Image.open('image.jpg'))
            print(imgstr)
            postdata[self.formdata.maindata['captcha']] = imgstr
        except UnicodeDecodeError:
            pass
        postdata.update(self.formdata.extradata)
        postresponse = session.post(self.formdata.post_url,data=postdata,headers=self.formdata.headers)
        responseHtml = postresponse.text
        pageLength = len(responseHtml)
        return pageLength
项目:pytesseractID    作者:iChenwin    | 项目源码 | 文件源码
def main():
    # parse command line options
    if len(sys.argv) != 2:
        print 'Usage: python input_name output_name'
        exit(1)
    filePath = sys.argv[1]

    print "<----- processing %s ----->" % filePath

    #???????????????????????????????
    img = cv2.imread(filePath, 0)
    img = cv2.resize(img, (1200, 900)) 

    # ??????
    # imgArr = np.array(img)
    # imgMean = np.mean(img)
    # imgcopy = imgArr - imgMean
    # imgcopy = imgcopy * 2 + imgMean * 3
    # imgcopy = imgcopy / 255

    canny = cv2.Canny(img, 60, 300)  
    inverted = cv2.bitwise_not(canny)
    cv2.imshow('Canny', inverted)

    test1 = Image.fromarray(canny)
    test2 = Image.fromarray(inverted)

    result = pytesseract.image_to_string(test1, lang="eng", config="-c tessedit_char_whitelist=0123456789X")
    print result
    print "-------"
    result = pytesseract.image_to_string(test2, lang="eng")
    print result

    k = cv2.waitKey(0)
项目:vxTrader    作者:vex1023    | 项目源码 | 文件源码
def vcode(self):

        r = self._session.get(
            'https://jy.yongjinbao.com.cn/winner_gj/gjzq/user/extraCode.jsp',
            params={'randomStamp': random.random()}
        )
        r.raise_for_status()

        # ????????
        img_buffer = BytesIO(r.content)
        img = Image.open(img_buffer)
        code = pytesseract.image_to_string(img)
        img.close()
        img_buffer.close()

        if self.code_rule.findall(code) == []:
            raise VerifyCodeError('Wrong verify code: %s' % code)
        else:
            logger.debug('Verify Code is: %s' % code)
            return code
项目:zhihu_spider    作者:wzqnls    | 项目源码 | 文件源码
def gg(name):
    # ????
    im = Image.open(name)
    # ??????
    imgry = im.convert('L')
    # ????
    imgry.save(name)
    # ????????????threshold????
    out = imgry.point(table, '1')
    out.save(name)
    # ??
    text = pytesseract.image_to_string(out)
    # ????
    text = text.strip()
    text = text.upper()
    for r in rep:
        text = text.replace(r, rep[r])
        # out.save(text+'.jpg')
    print(text)

# gg(CAPTCHA_PATH)
项目:brutepwdbyhtml    作者:spoock1024    | 项目源码 | 文件源码
def __post_data_with_captcha(self,postdata,captchaurl):
        session = requests.session()
        response = session.get(captchaurl)
        imagedata = response.content
        time = datetime.datetime.now().time()
        f = open('image.jpg','wb')
        f.write(imagedata)
        f.close()
        #decode the captcha
        try:
            imgstr =image_to_string(Image.open('image.jpg'))
            print(imgstr)
            postdata[self.formdata.maindata['captcha']] = imgstr
        except UnicodeDecodeError:
            pass
        postdata.update(self.formdata.extradata)
        postresponse = session.post(self.formdata.post_url,data=postdata,headers=self.formdata.headers)
        responseHtml = postresponse.text
        pageLength = len(responseHtml)
        return pageLength
项目:sia-cog    作者:deepakkumar1984    | 项目源码 | 文件源码
def extracttext(imgpath, preprocess):
    if imgpath.startswith('http://') or imgpath.startswith('https://') or imgpath.startswith('ftp://'):
        image = url_to_image(imgpath)
    else:
        image = cv2.imread(imgpath)

    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    if preprocess == "thresh":
        gray = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
    elif preprocess == "blur":
        gray = cv2.medianBlur(gray, 3)

    filename = "{}.png".format(os.getpid())
    cv2.imwrite(filename, gray)
    text = pytesseract.image_to_string(Image.open(filename))

    os.remove(filename)
    return {"text": text}
项目:douban_robot    作者:zhangzheng88    | 项目源码 | 文件源码
def recognize_url(url):
    import urllib
    urllib.urlretrieve(url, "imgs/tmp-img.jpg")
    img = Image.open('imgs/tmp-img.jpg')
    img = img.convert('RGBA')
    w, h = img.size[0], img.size[1]
    print w, h
    point_list = gen_white_black_points(img)
    print_char_pic(w, h, point_list)
    reduce_noisy(w, h, point_list)
    print_char_pic(w, h, point_list)

    img.putdata(point_list)
    img.save("imgs/rebuild.jpg")

    return pytesseract.image_to_string(Image.open('imgs/rebuild.jpg'))
项目:WowSign    作者:shark526    | 项目源码 | 文件源码
def rec_img(imgPath):
    img = Image.open(imgPath).convert("L")
    binarizing(img,170)
    # img.save('C:\\NotBackedUp\\00.png')
    depoint(img)
    depoint(img,True)
    # img.save('C:\\NotBackedUp\\01.png')
    seperated_img = split_image(img,save_temp=True)
    recdString = ""
    for cur_img in seperated_img:
        recNum = pytesseract.image_to_string(cur_img,config='-psm 10 outputbase digits')
        recdString = recdString + recNum

    print recdString
    #img.save('temp/%s.png' % recdString)
    if len(recdString)==4:
        # img.save('temp/%s.png' % recdString)
        print "success"
    else:
        print "error ..."
        img.save('temp/error_%s.png' % recdString)
    return recdString
项目:RunescapeBots    作者:lukegarbutt    | 项目源码 | 文件源码
def tesser_money_image(image):
    image = cv2.resize(image, (0,0), fx=2, fy=2)
    image = PIL.Image.fromarray(image)
    txt = pytesseract.image_to_string(image, config='-psm 7')
    txt_list = list(txt)
    for i in range(len(txt_list)):
        if txt_list[i] == 'o':
            txt_list[i] = '0'
        elif txt_list[i] == 'O':
            txt_list[i] = '0'
        elif txt_list[i] == 'l':
            txt_list[i] = '1'
        elif txt_list[i] == 'I':
            txt_list[i] = '1'
        elif txt_list[i] == 'i':
            txt_list[i] = '1'
        elif txt_list[i] == 'M':
            txt_list[i] = '000000'
        elif txt_list[i] == 'K':
            txt_list[i] = '000'
        elif txt_list[i] == 'm':
            txt_list[i] = '000000'
        elif txt_list[i] == 'k':
            txt_list[i] = '000'
        elif txt_list[i] == 's':
            txt_list[i] = '5'
        elif txt_list[i] == 'S':
            txt_list[i] = '5'
        elif txt_list[i] == 'W':
            txt_list[i] = '40'
    txt = int(''.join(txt_list))
    return(txt)
项目:RunescapeBots    作者:lukegarbutt    | 项目源码 | 文件源码
def tesser(image):
    txt = pytesseract.image_to_string(image, config='-psm 7')
    print(txt)
    return(txt)
项目:1ibrary-gzhu    作者:1ibrary    | 项目源码 | 文件源码
def get_capture(self):
        data = requests.get(self.capture_url, cookies=self.cookies)
        with open("./img_cache/" + self.cookies['PHPSESSID'] + ".gif", "wb+") as f:
            f.write(data.content)

        gif = Image.open("./img_cache/" + self.cookies['PHPSESSID'] + ".gif")

        png = Image.new("RGB", gif.size)
        png.paste(gif)

        str = image_to_string(png).strip()
        remove("./img_cache/" + self.cookies['PHPSESSID'] + ".gif")

        return str
项目:bib-tagger    作者:KateRita    | 项目源码 | 文件源码
def getOcr(filename):
    #,config='-psm 10') option for single digit recognition
    #return image_to_string(Image.open(filename),config="-psm 6") #5
    #return image_to_string(Image.open(filename)) #3
    #return image_to_string(Image.open(filename),config="-psm 7") #5
    return image_to_string(Image.open(filename),config="-psm 8 digits") #4
项目:Nightchord    作者:theriley106    | 项目源码 | 文件源码
def genNC(image=None, listofwords=[], artist=None, song=None):
    Words = {}
    Information = {}
    for i, image in enumerate(image):
        i = i + 1
        Words[i] = pytesseract.image_to_string(Image.open(image))
    Information['GuessedWords'] = Words
    Information["Real_Lyrics"] = listofwords
    with open('{}Transcript.json'.format(Words[1]), 'w') as f:
        json.dump(Information, f)
项目:Nightchord    作者:theriley106    | 项目源码 | 文件源码
def ocrList(image):
    response = pytesseract.image_to_string(Image.open(image)).encode('utf-8','replace')
    if len(response) > 5:
        response = response.replace('\n', ' ').replace('  ', ' ').split(' ')
    return response
项目:Nightchord    作者:theriley106    | 项目源码 | 文件源码
def calcSpaces(image):
    response = pytesseract.image_to_string(Image.open(image)).encode('utf-8','replace')
    print response
    response = response.replace('\n', '').split(' ')
    return len(response)
项目:Nightchord    作者:theriley106    | 项目源码 | 文件源码
def genNC(image=None, listofwords=[], artist=None, song=None):
    threads = []
    Words = {}

    def batchExtract(listofimages):
        for image in listofimages:
            try:
                extractText(image)
            except Exception as exp:
                print(exp)
                pass

    def doCommand(image, listofwords):
        a = pytesseract.image_to_string(Image.open(image)).encode('utf-8','replace').split(' ')
        for a in a:
            if len(a) > 3:
                print difflib.get_close_matches(str(a), listofwords)[0]

    Information = {}
    listofwords = GrabSongLyrics(artist, song)
    d = []

    for i in range(len(image) / 5):
        t = threading.Thread(target=batchExtract, args=([image[i*5:(i*5) + 4]]))
        d.append(t)
        t.start()

    for t in d:
        t.join()

    for i, image in enumerate(image):
        t = threading.Thread(target=doCommand, args=(image, i))
        threads.append(t)
        t.start()

    for t in threads:
        t.join()
    Information["GuessedWords"] = Words
    Information["Real_Lyrics"] = listofwords
    with open('{}Transcript.json'.format(Words[1]), 'w') as f:
        json.dump(Information, f)
项目:WebScraping    作者:liinnux    | 项目源码 | 文件源码
def ocr(img):
    # threshold the image to ignore background and keep text
    gray = img.convert('L')
    #gray.save('captcha_greyscale.png')
    bw = gray.point(lambda x: 0 if x < 1 else 255, '1')
    #bw.save('captcha_threshold.png')
    word = pytesseract.image_to_string(bw)
    ascii_word = ''.join(c for c in word if c in string.letters).lower()
    return ascii_word
项目:beryl    作者:DanielJDufour    | 项目源码 | 文件源码
def is_text_on_screen(target, notify=True):

    if notify:
        _notify("starting is_text_on_screen")

    if isinstance(target, str):
        target = target.decode('utf-8')

    #GET SCREENSHOT
    path_to_screenshot = take_a_screenshot()
    sleep(1)

    #FIND TEXTS
    im = cv2.imread(path_to_screenshot)
    im = cv2.resize(im, (0,0), fx=2, fy=2)
    imgray = cv2.cvtColor(im,cv2.COLOR_BGR2GRAY)
    #ret,thresh = cv2.threshold(imgray,127,255,0)
    ret,thresh = cv2.threshold(imgray,127,255,cv2.THRESH_BINARY)
    contours, hierarchy = find_contours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)


    for index, contour in enumerate(contours):
        b = Box(cv2.boundingRect(contour))
        if b.width > 10 and b.height > 6:
            cropped = im[b.ymin:b.ymax, b.xmin:b.xmax]
            text = image_to_string(Image.fromarray(cropped))
            print("text:", text)
            if target in text.decode("utf-8"):
                return True
    return False
项目:TalosLibraryMate    作者:jim-hart    | 项目源码 | 文件源码
def process_ocr(force=False):
    """Invokes tesseract-ocr and translate_hex_values(), after which, the
    converted text is displayed to the screen

    :param force: If True, all images will be scanned regardless if they've been processed previously.
                  force=True also skips the message/filename process

     """

    path = TesseractConfig()
    scan_logs = ScanLogs()

    skipped_files = 0
    tessdata = '--tessdata-dir "{}"'.format(path.tessdata)
    for file in os.listdir(path.screenshots):
        if force or not scan_logs.check_if_scanned(file):
            image = Image.open('{}\\{}'.format(path.screenshots, file))
            converted_text = TextConversion(pytesseract.image_to_string(image, config=tessdata))

            print(converted_text)
            if not force:
                scan_logs.logs = {file: str(converted_text)}
        else:
            skipped_files += 1

    print("\n----------Scan Complete-----------")
    if skipped_files:
        print_delay("{} Files Skipped (Already Scanned)".format(skipped_files))
项目:doc2text    作者:jlsutherland    | 项目源码 | 文件源码
def extract_text(self):
        temp_path = 'text_temp.png'
        cv2.imwrite(temp_path, self.image)
        self.text = pytesseract.image_to_string(Image.open(temp_path), lang=self.lang)
        os.remove(temp_path)
        return self.text
项目:idmatch    作者:maddevsio    | 项目源码 | 文件源码
def recognize_card(original_image, country='kg', preview=False):
    from processing.border_removal import resize
    from processing.crop import process_image
    result = []
    cropped_image = "croped-image.jpg"
    process_image(original_image, cropped_image)
    idcard = cv2.imread(cropped_image, cv2.COLOR_BGR2GRAY)
    idcard = resize(idcard, width=720)

    scale_down = (8 * 170 / detect_dpi(idcard))
    if scale_down <= 4:
        rows, cols = idcard.shape[:2]
        idcard = cv2.resize(idcard, (scale_down * cols / 8, scale_down * rows / 8))

    contours, hierarchy = recognize_text(idcard)
    for index, contour in enumerate(contours):
        [x, y, w, h] = cv2.boundingRect(contour)
        gray = cv2.cvtColor(idcard, cv2.COLOR_RGB2GRAY)
        roi = gray[y:y + h, x:x + w]
        if cv2.countNonZero(roi) / h * w > 0.55:
            if h > 16 and w > 16:
                filename = '%s.jpg' % index
                cv2.imwrite(filename, roi)
                text = pytesseract.image_to_string(
                    Image.open(filename), lang="kir+eng", config="-psm 7"
                )                
                item = {'x': x, 'y': y, 'w': w, 'h': h, 'text': text}
                result.append(item)
                cv2.rectangle(idcard, (x, y), (x + w, y + h), (255, 0, 255), 2)
    if preview:
        original_image = original_image.split('/')[-1]
        location = save_image('regions' + original_image, idcard)
        return location, regionskir(result)
    return regionskir(result)
项目:pyqt5    作者:yurisnm    | 项目源码 | 文件源码
def init_ui(self):
        self._fileDialog = QFileDialog(self)
        self._v_layout = QVBoxLayout()
        self._v_layout.setSpacing(2)
        self.setLayout(self._v_layout)
        self._path = "TEXT.png"
        self._pixmap = QPixmap(self._path)
        self._btnFile = QPushButton("Open")
        self._hWidget = QWidget()
        self._hLayout = QHBoxLayout()
        self._hWidget.setLayout(self._hLayout)

        self._image = Image.open(self._path)
        self._line = QLineEdit()

        self._hLayout.addWidget(self._btnFile)
        self._hLayout.addWidget(self._line)
        size = QSize(160, 90)
        pix = self._pixmap.scaled(size, transformMode=Qt.SmoothTransformation)

        self._lbl = QLabel()
        self._lbl.setPixmap(pix)
        self._v_layout.addWidget(self._lbl)
        self._v_layout.addWidget(self._hWidget)
        self._btnFile.clicked.connect(self.openFilePressed)

        self._line.setText(pytesseract.image_to_string(Image.open('TEXT.png')))
项目:pyqt5    作者:yurisnm    | 项目源码 | 文件源码
def openFilePressed(self):
        self._path = self._fileDialog.\
            getOpenFileName(self, "Image Files (*.png *.jpg)")
        if self._path[0] != "":
            self._pixmap = QPixmap(self._path[0])
            size = QSize(160, 90)
            pix = self._pixmap.scaled(size,
                                      transformMode=Qt.SmoothTransformation)
            self._lbl.setPixmap(pix)
            self._image = Image.open(self._path[0])
            text = pytesseract.image_to_string(self._image)
            self._line.setText(text)
项目:pyqt5    作者:yurisnm    | 项目源码 | 文件源码
def updateText(self):

        self._pixmap = QPixmap('TEXT.png')
        size = QSize(160, 90)

        pix = self._pixmap.scaled(size,
                                  transformMode=Qt.SmoothTransformation)
        self._lbl.setPixmap(pix)
        self._image = Image.open('TEXT.png')
        text = pytesseract.image_to_string(self._image, lang='eng', config='-psm 8', )
        self._line.setText(text)
        self.signal_send_text.emit(text)
项目:captcha_project    作者:zhanghe06    | 项目源码 | 文件源码
def img_to_string(self):
        """
        ???????
        :return:
        """
        # ????
        self.crop_img()
        # ????
        self.optimize_img()
        # ????
        self.img_text = pytesseract.image_to_string(self.img_fp)
        # ????
        print '??????%s' % self.img_text
        self.optimize_text()
        print '??????%s' % self.img_text
项目:Mac-Python-3.X    作者:L1nwatch    | 项目源码 | 文件源码
def test2(image_name):
    """
    ?????????????
    :param image_name:
    :return:
    """
    with Image.open(image_name) as image:
        image = image.convert("RGBA")
        pixdata = image.load()

        # Make the letters bolder for easier recognition
        for y in range(image.size[1]):
            for x in range(image.size[0]):
                if pixdata[x, y][0] < 90:
                    pixdata[x, y] = (0, 0, 0, 255)

        for y in range(image.size[1]):
            for x in range(image.size[0]):
                if pixdata[x, y][1] < 136:
                    pixdata[x, y] = (0, 0, 0, 255)

        for y in range(image.size[1]):
            for x in range(image.size[0]):
                if pixdata[x, y][2] > 0:
                    pixdata[x, y] = (255, 255, 255, 255)

        # image.save("input-black.gif", "GIF")
        print(pytesseract.image_to_string(image))
项目:Mac-Python-3.X    作者:L1nwatch    | 项目源码 | 文件源码
def image_to_string(image):
    """
    ?????????? 4 ???
    :param image:
    :return:
    """
    global font
    test1 = convert_black_white(image)
    text = str()
    for each in cut(test1):
        for num in range(10):
            if create_pix_tables(each) == font[num]:
                text += str(num)
                break
    return text
项目:Mac-Python-3.X    作者:L1nwatch    | 项目源码 | 文件源码
def solve():
    """
    ?? WP ??, ??????????????????, ?????????
    :return:
    """
    global font
    font = get_font()
    path = "/Users/L1n/Desktop/bmp"
    sum = 0
    for i in range(1, 10000):
        with Image.open(path + os.sep + str(i) + ".bmp") as image:
            sum += i * int(image_to_string(image))
    print("Sum: {}".format(sum))
项目:pytesseractID    作者:iChenwin    | 项目源码 | 文件源码
def recognizeImage(results, cvimage ,rect, language, charWhiteList=None):
    config = "-psm 7"   # single line mode
    if charWhiteList is not None:
        config += " -c tessedit_char_whitelist=" + charWhiteList

    image = Image.fromarray(cvimage)

    result = pytesseract.image_to_string(image, lang=language, config=config)

    item = ImageRecognizerItem(result, rect)
    results.append(item)

# ??ImageRecognizerItem
项目:Spider    作者:poluo    | 项目源码 | 文件源码
def recognize_url(url):
    import urllib.request
    urllib.request.urlretrieve(url, './img.jpg')
    img = Image.open('./img.jpg')
    img = img.convert('RGBA')
    w, h = img.size[0], img.size[1]
    point_list = gen_white_black_points(img)
    print_char_pic(w, h, point_list)
    reduce_noisy(w, h, point_list)
    print_char_pic(w, h, point_list)

    img.putdata(point_list)
    img.save("C:\\Users\\poluo\\PycharmProjects\\douban\\douban\\processed.jpg")
    tmp=Image.open('C:\\Users\\poluo\\PycharmProjects\\douban\\douban\\processed.jpg')
    return pytesseract.image_to_string(tmp)
项目:captcha-breaker    作者:Detry322    | 项目源码 | 文件源码
def solution_from_image(image):
    pieces = filter_split(image)
    if len(pieces) != 4:
        return '????'
    string = ''
    for piece in pieces:
        try:
            solved = pytesseract.image_to_string(piece, config='-psm 10 -c tessedit_char_whitelist=0123456789abcdefghijklmnopqrstuvwxyz')
        except pytesseract.pytesseract.TesseractError:
            solved = None
        if not solved:
            solved = '?'
        string += solved
    return string
项目:amazon    作者:parul1931    | 项目源码 | 文件源码
def parse_captcha(self, link, status):
        print "\n\n status in captcha : ", status
        print "\n link in captcha : ", link
        try:
            if status == 0:
                #proxies = ['http://43.242.104.43', 'http://115.113.43.215', 'http://115.113.43.215']
                #proxy = random.choice(proxies)
                proxy = urllib2.ProxyHandler({'http': 'http://14.142.4.33'})
                opener = urllib2.build_opener(proxy)
                header = ua.random
                print "\n header : ", header
                print "\n link : ", link
                opener.addheaders = [('User-agent', header)]
                data = opener.open(link).read()

                soup = BeautifulSoup(data, 'html.parser')
                div1 = soup.find("div", {"class": "a-row a-text-center"})
                if div1 is not None:
                    img = div1.find("img")
                    image = img["src"]
                    print "\n captcha.."
                    print "image : ", image
                    image = Image.open(StringIO(requests.get(image).content))
                    image.filter(ImageFilter.SHARPEN)
                    captcha = pytesseract.image_to_string(image)
                    print "captcha : ", captcha
                    values = {'field-keywords' : captcha}
                    data = urllib.urlencode(values)
                    req = urllib2.Request(link, data, {'User-agent': header})
                    resp = urllib2.urlopen(req)
                    the_page = resp.read()
                    self.parse_captcha(link, status)
                else:
                    status = 1
                    return
        except Exception as e:
            print "\n Exception : ", e
项目:amazon    作者:parul1931    | 项目源码 | 文件源码
def parse_captcha(self, link, status):
        print "\n\n status in captcha : ", status
        print "\n link in captcha : ", link
        try:
            if status == 0:
                #proxies = ['http://43.242.104.43', 'http://115.113.43.215', 'http://115.113.43.215']
                #proxy = random.choice(proxies)
                proxy = urllib2.ProxyHandler({'http': 'http://14.142.4.33'})
                opener = urllib2.build_opener(proxy)
                header = ua.random
                print "\n header : ", header
                print "\n link : ", link
                opener.addheaders = [('User-agent', header)]
                data = opener.open(link).read()

                soup = BeautifulSoup(data, 'html.parser')
                div1 = soup.find("div", {"class": "a-row a-text-center"})
                if div1 is not None:
                    img = div1.find("img")
                    image = img["src"]
                    print "\n captcha.."
                    print "image : ", image
                    image = Image.open(StringIO(requests.get(image).content))
                    image.filter(ImageFilter.SHARPEN)
                    captcha = pytesseract.image_to_string(image)
                    print "captcha : ", captcha
                    values = {'field-keywords' : captcha}
                    data = urllib.urlencode(values)
                    req = urllib2.Request(link, data, {'User-agent': header})
                    resp = urllib2.urlopen(req)
                    the_page = resp.read()
                    self.parse_captcha(link, status)
                else:
                    status = 1
                    return
        except Exception as e:
            print "\n Exception : ", e
项目:amazon    作者:parul1931    | 项目源码 | 文件源码
def parse_captcha(self, link, status):
        try:
            if status == 0:
                #proxies = ['http://43.242.104.43', 'http://115.113.43.215', 'http://115.113.43.215']
                #proxy = random.choice(proxies)
                proxy = urllib2.ProxyHandler({'http': 'http://14.142.4.33'})
                opener = urllib2.build_opener()
                header = ua.random
                print "\n header : ", header
                print "\n link : ", link
                opener.addheaders = [('User-agent', header)]
                data = opener.open(link).read()

                soup = BeautifulSoup(data, 'html.parser')
                div1 = soup.find("div", {"class": "a-row a-text-center"})
                if div1 is not None:
                    print "\n\n status in captcha : ", status
                    print "\n link in captcha : ", link
                    img = div1.find("img")
                    image = img["src"]
                    print "\n captcha.."
                    print "image : ", image
                    image = Image.open(StringIO(requests.get(image).content))
                    image.filter(ImageFilter.SHARPEN)
                    captcha = pytesseract.image_to_string(image)
                    print "captcha : ", captcha
                    values = {'field-keywords' : captcha}
                    data = urllib.urlencode(values)
                    req = urllib2.Request(link, data, {'User-agent': header})
                    resp = urllib2.urlopen(req)
                    the_page = resp.read()
                    self.parse_captcha(link, status)
                else:
                    status = 1
                    return
        except Exception as e:
            print "\n Exception : ", e
项目:amazon    作者:parul1931    | 项目源码 | 文件源码
def parse_captcha(self, link, status):
        try:
            if status == 0:
                #proxies = ['http://43.242.104.43', 'http://115.113.43.215', 'http://115.113.43.215']
                #proxy = random.choice(proxies)
                proxy = urllib2.ProxyHandler({'http': 'http://14.142.4.33'})
                opener = urllib2.build_opener()
                header = ua.random
                print "\n header : ", header
                print "\n link : ", link
                opener.addheaders = [('User-agent', header)]
                data = opener.open(link).read()

                soup = BeautifulSoup(data, 'html.parser')
                div1 = soup.find("div", {"class": "a-row a-text-center"})
                if div1 is not None:
                    print "\n\n status in captcha : ", status
                    print "\n link in captcha : ", link
                    img = div1.find("img")
                    image = img["src"]
                    print "\n captcha.."
                    print "image : ", image
                    image = Image.open(StringIO(requests.get(image).content))
                    image.filter(ImageFilter.SHARPEN)
                    captcha = pytesseract.image_to_string(image)
                    print "captcha : ", captcha
                    values = {'field-keywords' : captcha}
                    data = urllib.urlencode(values)
                    req = urllib2.Request(link, data, {'User-agent': header})
                    resp = urllib2.urlopen(req)
                    the_page = resp.read()
                    self.parse_captcha(link, status)
                else:
                    status = 1
                    return
        except Exception as e:
            print "\n Exception : ", e
项目:amazon    作者:parul1931    | 项目源码 | 文件源码
def parse_captcha(self, link, status):
        print "\n\n status in captcha : ", status
        print "\n link in captcha : ", link
        try:
            if status == 0:
                #proxies = ['http://43.242.104.43', 'http://115.113.43.215', 'http://115.113.43.215']
                #proxy = random.choice(proxies)
                proxy = urllib2.ProxyHandler({'http': 'http://14.142.4.33'})
                opener = urllib2.build_opener()
                header = ua.random
                print "\n header : ", header
                print "\n link : ", link
                opener.addheaders = [('User-agent', header)]
                data = opener.open(link).read()

                soup = BeautifulSoup(data, 'html.parser')
                div1 = soup.find("div", {"class": "a-row a-text-center"})
                if div1 is not None:
                    img = div1.find("img")
                    image = img["src"]
                    print "\n captcha.."
                    print "image : ", image
                    image = Image.open(StringIO(requests.get(image).content))
                    image.filter(ImageFilter.SHARPEN)
                    captcha = pytesseract.image_to_string(image)
                    print "captcha : ", captcha
                    values = {'field-keywords' : captcha}
                    data = urllib.urlencode(values)
                    req = urllib2.Request(link, data, {'User-agent': header})
                    resp = urllib2.urlopen(req)
                    the_page = resp.read()
                    self.parse_captcha(link, status)
                else:
                    status = 1
                    return
        except Exception as e:
            print "\n Exception : ", e
项目:amazon    作者:parul1931    | 项目源码 | 文件源码
def parse_captcha(self, link, status):
        try:
            if status == 0:
                #proxies = ['http://43.242.104.43', 'http://115.113.43.215', 'http://115.113.43.215']
                #proxy = random.choice(proxies)
                proxy = urllib2.ProxyHandler({'http': 'http://14.142.4.33'})
                opener = urllib2.build_opener()
                header = ua.random
                print "\n header : ", header
                print "\n link : ", link
                opener.addheaders = [('User-agent', header)]
                data = opener.open(link).read()

                soup = BeautifulSoup(data, 'html.parser')
                div1 = soup.find("div", {"class": "a-row a-text-center"})
                if div1 is not None:
                    print "\n\n status in captcha : ", status
                    print "\n link in captcha : ", link
                    img = div1.find("img")
                    image = img["src"]
                    print "\n captcha.."
                    print "image : ", image
                    image = Image.open(StringIO(requests.get(image).content))
                    image.filter(ImageFilter.SHARPEN)
                    captcha = pytesseract.image_to_string(image)
                    print "captcha : ", captcha
                    values = {'field-keywords' : captcha}
                    data = urllib.urlencode(values)
                    req = urllib2.Request(link, data, {'User-agent': header})
                    resp = urllib2.urlopen(req)
                    the_page = resp.read()
                    self.parse_captcha(link, status)
                else:
                    status = 1
                    return
        except Exception as e:
            print "\n Exception : ", e
项目:amazon    作者:parul1931    | 项目源码 | 文件源码
def parse_captcha(self, link, status):
        print "\n\n status in captcha : ", status
        print "\n link in captcha : ", link
        try:
            if status == 0:
                #proxies = ['http://43.242.104.43', 'http://115.113.43.215', 'http://115.113.43.215']
                #proxy = random.choice(proxies)
                #proxy = urllib2.ProxyHandler({'http': 'http://115.113.43.215'})
                opener = urllib2.build_opener()
                header = ua.random
                print "\n header : ", header
                print "\n link : ", link
                opener.addheaders = [('User-agent', header)]
                data = opener.open(link).read()

                soup = BeautifulSoup(data, 'html.parser')
                div1 = soup.find("div", {"class": "a-row a-text-center"})
                if div1 is not None:
                    img = div1.find("img")
                    image = img["src"]
                    print "\n captcha.."
                    print "image : ", image
                    image = Image.open(StringIO(requests.get(image).content))
                    image.filter(ImageFilter.SHARPEN)
                    captcha = pytesseract.image_to_string(image)
                    print "captcha : ", captcha
                    values = {'field-keywords' : captcha}
                    data = urllib.urlencode(values)
                    req = urllib2.Request(link, data, {'User-agent': header})
                    resp = urllib2.urlopen(req)
                    the_page = resp.read()
                    self.parse_captcha(link, status)
                else:
                    status = 1
                    return
        except Exception as e:
            print "\n Exception : ", e
项目:wswp    作者:kjam    | 项目源码 | 文件源码
def ocr(img):
    bw = img_to_bw(img)
    captcha = pytesseract.image_to_string(bw)
    cleaned = ''.join(c for c in captcha.lower() if c in string.ascii_lowercase)
    if len(cleaned) != len(captcha):
        print('removed bad characters: {}'.format(set(captcha) - set(cleaned)))
    return cleaned
项目:Epsilon    作者:Capuno    | 项目源码 | 文件源码
def cmd_info(message, parameters, recursion=0):
    async for msg in client.logs_from(message.channel, limit=25):
        try:
            if msg.attachments:
                image = Image.open(BytesIO(requests.get(msg.attachments[0]['url']).content)).filter(ImageFilter.SHARPEN)
                text = pytesseract.image_to_string(image)
                if not text:
                    e = discord.Embed(colour=0xB5434E)
                    e.description = "I just forgot how to read..."
                else:
                    e = discord.Embed(colour=0x43B581)
                    e.description = text
                await client.send_message(message.channel, embed=e)
                return

        except OSError:
            e = discord.Embed(colour=0xB5434E)
            e.description = "Image way big, are you trying to kill me?"
            await client.send_message(message.channel, embed=e)
            return
        except TypeError:
            e = discord.Embed(colour=0xB5434E)
            e.description = "Latest attachment is not a static image, try again."
            await client.send_message(message.channel, embed=e)
            return
        except:
            e = discord.Embed(colour=0xB5434E)
            e.description = "Error ocurred, not related to OSError or TypeError I guess."
            await client.send_message(message.channel, embed=e)
            return
    e = discord.Embed(colour=0xB5434E)
    e.description = "I can't find an image in the last 25 posts, that or I'm retarded."
    await client.send_message(message.channel, embed=e)
项目:Spider_index    作者:DarkSand    | 项目源码 | 文件源码
def get_vcode(path):
    with Image.open(path) as image:
        mutex.acquire(1)
        vcode = pytesseract.image_to_string(image, lang='numfont')
        mutex.release()
        return vcode.replace(',', '').replace('\n', '')
项目:Spider_index    作者:DarkSand    | 项目源码 | 文件源码
def get_vcode_by_img_0(img):
    mutex.acquire(1)
    vcode = pytesseract.image_to_string(img, lang='numfont')
    if vcode == '':
        img = merge_thumb_0(img)
        vcode = pytesseract.image_to_string(img, lang='numfont')
        if vcode == '00':
            vcode = '0'
        else:
            vcode = vcode.strip('0')
    mutex.release()
    return vcode.replace(',', '').replace('\n', '')