我们从Python开源项目中,提取了以下49个代码示例,用于说明如何使用pytesseract.image_to_string()。
def velocity_ocr(image,coords,f1app): # crop and convert image to greyscale img = Image.fromarray(image).crop(coords).convert('L') img = img.resize([img.width*2,img.height*2]) if f1app: # filters for video from the f1 app img = ImageEnhance.Brightness(img).enhance(3.0) img = ImageEnhance.Contrast(img).enhance(2.0) else: # filters for onboard video graphic img = ImageEnhance.Brightness(img).enhance(0.1) img = ImageEnhance.Contrast(img).enhance(2.0) img = ImageEnhance.Contrast(img).enhance(4.0) img = ImageEnhance.Brightness(img).enhance(0.2) img = ImageEnhance.Contrast(img).enhance(16.0) try: # vel = pytesseract.image_to_string(img,config='digits') vel = pytesseract.image_to_string(img) except UnicodeDecodeError: vel = -1 return vel
def image_recognize(): import pytesseract from PIL import Image class GetImageDate(object): def m(self): image = Image.open("data/0.jpg") text = pytesseract.image_to_string(image) return text def SaveResultToDocument(self): text = self.m() f = open(u"Verification.txt", "w") print text f.write(str(text)) f.close() g = GetImageDate() g.SaveResultToDocument()
def recognize(self): def format_captcha(captcha): temp = '' for i in captcha: if (ord(i)>=48 and ord(i)<=57) or (ord(i)>=65 and ord(i)<=90) or (ord(i)>=97 and ord(i)<=122): temp = temp + i if temp=='': temp = 'aaaa' return temp cookie = self.browser.cookies.all() opener = urllib2.build_opener() opener.addheaders.append(('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36')) opener.addheaders.append(('Host','jaccount.sjtu.edu.cn')) opener.addheaders.append(('Referer',self.browser.url)) opener.addheaders.append(('Cookie', "; ".join('%s=%s' % (k,v) for k,v in cookie.items()))) f = opener.open("https://jaccount.sjtu.edu.cn/jaccount/captcha?1488154642719") data = f.read() with file('captcha.png','wb') as f: f.write(data) img = Image.open("captcha.png").convert('L') result = format_captcha(pytesseract.image_to_string(img,lang="eng")) return result
def detect_gf_result(image_path): from PIL import ImageFilter, Image import pytesseract img = Image.open(image_path) for x in range(img.width): for y in range(img.height): if img.getpixel((x, y)) < (100, 100, 100): img.putpixel((x, y), (256, 256, 256)) gray = img.convert('L') two = gray.point(lambda x: 0 if 68 < x < 90 else 256) min_res = two.filter(ImageFilter.MinFilter) med_res = min_res.filter(ImageFilter.MedianFilter) for _ in range(2): med_res = med_res.filter(ImageFilter.MedianFilter) res = pytesseract.image_to_string(med_res, config='-psm 6') return res.replace(' ', '')
def readCaptcha(self):#????? headers = { 'Accept-Language':'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3', 'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:53.0) Gecko/20100101 Firefox/53.0', 'Referer':'http://******/login.jsp', 'Content-Type':'application/x-www-form-urlencoded; charset=UTF-8', 'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'X-Forwarded-For':self.captchaId } url = 'http://******/common/captcha.jhtml?captchaId='+self.captchaId try: res = requests.get(url,headers=headers) except requests.exceptions.ConnectionError: print '??????' path = "i:/img/"+self.captchaId+".png" fp = open(path,'wb') fp.write(res.content) fp.close() image = Image.open(path) code = pytesseract.image_to_string(image) self.captcha = code #print code
def test(image_name): """ ?????????????? :param image_name: :return: """ with Image.open(image_name) as image: # ??????????????????????????????????RBG???HSI???????L??? image = image.convert("L") # ???????????????????????????????????????threshold?????1??????0?????????????????????????? image = cut_noise(image) tools = pyocr.get_available_tools() if len(tools) == 0: print("No OCR tool found") sys.exit(1) tool = tools[0] image.save("test.jpg") txt = tool.image_to_string(image, lang="eng", builder=pyocr.builders.TextBuilder()) # Digits - Only Tesseract digits = tool.image_to_string(image, lang="eng", builder=pyocr.tesseract.DigitBuilder()) print(txt) print(digits)
def __post_data_with_captcha(self,postdata,captchaurl): session = requests.session() response = session.get(captchaurl) imagedata = response.content time = datetime.datetime.now().time() f = open('image.jpg','wb') f.write(imagedata) f.close() #decode the captcha try: imgstr =image_to_string(Image.open('image.jpg')) print(imgstr) postdata[self.formdata.maindata['captcha']] = imgstr except UnicodeDecodeError: pass postdata.update(self.formdata.extradata) postresponse = session.post(self.formdata.post_url,data=postdata,headers=self.formdata.headers) responseHtml = postresponse.text pageLength = len(responseHtml) return pageLength
def main(): # parse command line options if len(sys.argv) != 2: print 'Usage: python input_name output_name' exit(1) filePath = sys.argv[1] print "<----- processing %s ----->" % filePath #??????????????????????????????? img = cv2.imread(filePath, 0) img = cv2.resize(img, (1200, 900)) # ?????? # imgArr = np.array(img) # imgMean = np.mean(img) # imgcopy = imgArr - imgMean # imgcopy = imgcopy * 2 + imgMean * 3 # imgcopy = imgcopy / 255 canny = cv2.Canny(img, 60, 300) inverted = cv2.bitwise_not(canny) cv2.imshow('Canny', inverted) test1 = Image.fromarray(canny) test2 = Image.fromarray(inverted) result = pytesseract.image_to_string(test1, lang="eng", config="-c tessedit_char_whitelist=0123456789X") print result print "-------" result = pytesseract.image_to_string(test2, lang="eng") print result k = cv2.waitKey(0)
def vcode(self): r = self._session.get( 'https://jy.yongjinbao.com.cn/winner_gj/gjzq/user/extraCode.jsp', params={'randomStamp': random.random()} ) r.raise_for_status() # ???????? img_buffer = BytesIO(r.content) img = Image.open(img_buffer) code = pytesseract.image_to_string(img) img.close() img_buffer.close() if self.code_rule.findall(code) == []: raise VerifyCodeError('Wrong verify code: %s' % code) else: logger.debug('Verify Code is: %s' % code) return code
def gg(name): # ???? im = Image.open(name) # ?????? imgry = im.convert('L') # ???? imgry.save(name) # ????????????threshold???? out = imgry.point(table, '1') out.save(name) # ?? text = pytesseract.image_to_string(out) # ???? text = text.strip() text = text.upper() for r in rep: text = text.replace(r, rep[r]) # out.save(text+'.jpg') print(text) # gg(CAPTCHA_PATH)
def extracttext(imgpath, preprocess): if imgpath.startswith('http://') or imgpath.startswith('https://') or imgpath.startswith('ftp://'): image = url_to_image(imgpath) else: image = cv2.imread(imgpath) gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) if preprocess == "thresh": gray = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1] elif preprocess == "blur": gray = cv2.medianBlur(gray, 3) filename = "{}.png".format(os.getpid()) cv2.imwrite(filename, gray) text = pytesseract.image_to_string(Image.open(filename)) os.remove(filename) return {"text": text}
def recognize_url(url): import urllib urllib.urlretrieve(url, "imgs/tmp-img.jpg") img = Image.open('imgs/tmp-img.jpg') img = img.convert('RGBA') w, h = img.size[0], img.size[1] print w, h point_list = gen_white_black_points(img) print_char_pic(w, h, point_list) reduce_noisy(w, h, point_list) print_char_pic(w, h, point_list) img.putdata(point_list) img.save("imgs/rebuild.jpg") return pytesseract.image_to_string(Image.open('imgs/rebuild.jpg'))
def rec_img(imgPath): img = Image.open(imgPath).convert("L") binarizing(img,170) # img.save('C:\\NotBackedUp\\00.png') depoint(img) depoint(img,True) # img.save('C:\\NotBackedUp\\01.png') seperated_img = split_image(img,save_temp=True) recdString = "" for cur_img in seperated_img: recNum = pytesseract.image_to_string(cur_img,config='-psm 10 outputbase digits') recdString = recdString + recNum print recdString #img.save('temp/%s.png' % recdString) if len(recdString)==4: # img.save('temp/%s.png' % recdString) print "success" else: print "error ..." img.save('temp/error_%s.png' % recdString) return recdString
def tesser_money_image(image): image = cv2.resize(image, (0,0), fx=2, fy=2) image = PIL.Image.fromarray(image) txt = pytesseract.image_to_string(image, config='-psm 7') txt_list = list(txt) for i in range(len(txt_list)): if txt_list[i] == 'o': txt_list[i] = '0' elif txt_list[i] == 'O': txt_list[i] = '0' elif txt_list[i] == 'l': txt_list[i] = '1' elif txt_list[i] == 'I': txt_list[i] = '1' elif txt_list[i] == 'i': txt_list[i] = '1' elif txt_list[i] == 'M': txt_list[i] = '000000' elif txt_list[i] == 'K': txt_list[i] = '000' elif txt_list[i] == 'm': txt_list[i] = '000000' elif txt_list[i] == 'k': txt_list[i] = '000' elif txt_list[i] == 's': txt_list[i] = '5' elif txt_list[i] == 'S': txt_list[i] = '5' elif txt_list[i] == 'W': txt_list[i] = '40' txt = int(''.join(txt_list)) return(txt)
def tesser(image): txt = pytesseract.image_to_string(image, config='-psm 7') print(txt) return(txt)
def get_capture(self): data = requests.get(self.capture_url, cookies=self.cookies) with open("./img_cache/" + self.cookies['PHPSESSID'] + ".gif", "wb+") as f: f.write(data.content) gif = Image.open("./img_cache/" + self.cookies['PHPSESSID'] + ".gif") png = Image.new("RGB", gif.size) png.paste(gif) str = image_to_string(png).strip() remove("./img_cache/" + self.cookies['PHPSESSID'] + ".gif") return str
def getOcr(filename): #,config='-psm 10') option for single digit recognition #return image_to_string(Image.open(filename),config="-psm 6") #5 #return image_to_string(Image.open(filename)) #3 #return image_to_string(Image.open(filename),config="-psm 7") #5 return image_to_string(Image.open(filename),config="-psm 8 digits") #4
def genNC(image=None, listofwords=[], artist=None, song=None): Words = {} Information = {} for i, image in enumerate(image): i = i + 1 Words[i] = pytesseract.image_to_string(Image.open(image)) Information['GuessedWords'] = Words Information["Real_Lyrics"] = listofwords with open('{}Transcript.json'.format(Words[1]), 'w') as f: json.dump(Information, f)
def ocrList(image): response = pytesseract.image_to_string(Image.open(image)).encode('utf-8','replace') if len(response) > 5: response = response.replace('\n', ' ').replace(' ', ' ').split(' ') return response
def calcSpaces(image): response = pytesseract.image_to_string(Image.open(image)).encode('utf-8','replace') print response response = response.replace('\n', '').split(' ') return len(response)
def genNC(image=None, listofwords=[], artist=None, song=None): threads = [] Words = {} def batchExtract(listofimages): for image in listofimages: try: extractText(image) except Exception as exp: print(exp) pass def doCommand(image, listofwords): a = pytesseract.image_to_string(Image.open(image)).encode('utf-8','replace').split(' ') for a in a: if len(a) > 3: print difflib.get_close_matches(str(a), listofwords)[0] Information = {} listofwords = GrabSongLyrics(artist, song) d = [] for i in range(len(image) / 5): t = threading.Thread(target=batchExtract, args=([image[i*5:(i*5) + 4]])) d.append(t) t.start() for t in d: t.join() for i, image in enumerate(image): t = threading.Thread(target=doCommand, args=(image, i)) threads.append(t) t.start() for t in threads: t.join() Information["GuessedWords"] = Words Information["Real_Lyrics"] = listofwords with open('{}Transcript.json'.format(Words[1]), 'w') as f: json.dump(Information, f)
def ocr(img): # threshold the image to ignore background and keep text gray = img.convert('L') #gray.save('captcha_greyscale.png') bw = gray.point(lambda x: 0 if x < 1 else 255, '1') #bw.save('captcha_threshold.png') word = pytesseract.image_to_string(bw) ascii_word = ''.join(c for c in word if c in string.letters).lower() return ascii_word
def is_text_on_screen(target, notify=True): if notify: _notify("starting is_text_on_screen") if isinstance(target, str): target = target.decode('utf-8') #GET SCREENSHOT path_to_screenshot = take_a_screenshot() sleep(1) #FIND TEXTS im = cv2.imread(path_to_screenshot) im = cv2.resize(im, (0,0), fx=2, fy=2) imgray = cv2.cvtColor(im,cv2.COLOR_BGR2GRAY) #ret,thresh = cv2.threshold(imgray,127,255,0) ret,thresh = cv2.threshold(imgray,127,255,cv2.THRESH_BINARY) contours, hierarchy = find_contours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) for index, contour in enumerate(contours): b = Box(cv2.boundingRect(contour)) if b.width > 10 and b.height > 6: cropped = im[b.ymin:b.ymax, b.xmin:b.xmax] text = image_to_string(Image.fromarray(cropped)) print("text:", text) if target in text.decode("utf-8"): return True return False
def process_ocr(force=False): """Invokes tesseract-ocr and translate_hex_values(), after which, the converted text is displayed to the screen :param force: If True, all images will be scanned regardless if they've been processed previously. force=True also skips the message/filename process """ path = TesseractConfig() scan_logs = ScanLogs() skipped_files = 0 tessdata = '--tessdata-dir "{}"'.format(path.tessdata) for file in os.listdir(path.screenshots): if force or not scan_logs.check_if_scanned(file): image = Image.open('{}\\{}'.format(path.screenshots, file)) converted_text = TextConversion(pytesseract.image_to_string(image, config=tessdata)) print(converted_text) if not force: scan_logs.logs = {file: str(converted_text)} else: skipped_files += 1 print("\n----------Scan Complete-----------") if skipped_files: print_delay("{} Files Skipped (Already Scanned)".format(skipped_files))
def extract_text(self): temp_path = 'text_temp.png' cv2.imwrite(temp_path, self.image) self.text = pytesseract.image_to_string(Image.open(temp_path), lang=self.lang) os.remove(temp_path) return self.text
def recognize_card(original_image, country='kg', preview=False): from processing.border_removal import resize from processing.crop import process_image result = [] cropped_image = "croped-image.jpg" process_image(original_image, cropped_image) idcard = cv2.imread(cropped_image, cv2.COLOR_BGR2GRAY) idcard = resize(idcard, width=720) scale_down = (8 * 170 / detect_dpi(idcard)) if scale_down <= 4: rows, cols = idcard.shape[:2] idcard = cv2.resize(idcard, (scale_down * cols / 8, scale_down * rows / 8)) contours, hierarchy = recognize_text(idcard) for index, contour in enumerate(contours): [x, y, w, h] = cv2.boundingRect(contour) gray = cv2.cvtColor(idcard, cv2.COLOR_RGB2GRAY) roi = gray[y:y + h, x:x + w] if cv2.countNonZero(roi) / h * w > 0.55: if h > 16 and w > 16: filename = '%s.jpg' % index cv2.imwrite(filename, roi) text = pytesseract.image_to_string( Image.open(filename), lang="kir+eng", config="-psm 7" ) item = {'x': x, 'y': y, 'w': w, 'h': h, 'text': text} result.append(item) cv2.rectangle(idcard, (x, y), (x + w, y + h), (255, 0, 255), 2) if preview: original_image = original_image.split('/')[-1] location = save_image('regions' + original_image, idcard) return location, regionskir(result) return regionskir(result)
def init_ui(self): self._fileDialog = QFileDialog(self) self._v_layout = QVBoxLayout() self._v_layout.setSpacing(2) self.setLayout(self._v_layout) self._path = "TEXT.png" self._pixmap = QPixmap(self._path) self._btnFile = QPushButton("Open") self._hWidget = QWidget() self._hLayout = QHBoxLayout() self._hWidget.setLayout(self._hLayout) self._image = Image.open(self._path) self._line = QLineEdit() self._hLayout.addWidget(self._btnFile) self._hLayout.addWidget(self._line) size = QSize(160, 90) pix = self._pixmap.scaled(size, transformMode=Qt.SmoothTransformation) self._lbl = QLabel() self._lbl.setPixmap(pix) self._v_layout.addWidget(self._lbl) self._v_layout.addWidget(self._hWidget) self._btnFile.clicked.connect(self.openFilePressed) self._line.setText(pytesseract.image_to_string(Image.open('TEXT.png')))
def openFilePressed(self): self._path = self._fileDialog.\ getOpenFileName(self, "Image Files (*.png *.jpg)") if self._path[0] != "": self._pixmap = QPixmap(self._path[0]) size = QSize(160, 90) pix = self._pixmap.scaled(size, transformMode=Qt.SmoothTransformation) self._lbl.setPixmap(pix) self._image = Image.open(self._path[0]) text = pytesseract.image_to_string(self._image) self._line.setText(text)
def updateText(self): self._pixmap = QPixmap('TEXT.png') size = QSize(160, 90) pix = self._pixmap.scaled(size, transformMode=Qt.SmoothTransformation) self._lbl.setPixmap(pix) self._image = Image.open('TEXT.png') text = pytesseract.image_to_string(self._image, lang='eng', config='-psm 8', ) self._line.setText(text) self.signal_send_text.emit(text)
def img_to_string(self): """ ??????? :return: """ # ???? self.crop_img() # ???? self.optimize_img() # ???? self.img_text = pytesseract.image_to_string(self.img_fp) # ???? print '??????%s' % self.img_text self.optimize_text() print '??????%s' % self.img_text
def test2(image_name): """ ????????????? :param image_name: :return: """ with Image.open(image_name) as image: image = image.convert("RGBA") pixdata = image.load() # Make the letters bolder for easier recognition for y in range(image.size[1]): for x in range(image.size[0]): if pixdata[x, y][0] < 90: pixdata[x, y] = (0, 0, 0, 255) for y in range(image.size[1]): for x in range(image.size[0]): if pixdata[x, y][1] < 136: pixdata[x, y] = (0, 0, 0, 255) for y in range(image.size[1]): for x in range(image.size[0]): if pixdata[x, y][2] > 0: pixdata[x, y] = (255, 255, 255, 255) # image.save("input-black.gif", "GIF") print(pytesseract.image_to_string(image))
def image_to_string(image): """ ?????????? 4 ??? :param image: :return: """ global font test1 = convert_black_white(image) text = str() for each in cut(test1): for num in range(10): if create_pix_tables(each) == font[num]: text += str(num) break return text
def solve(): """ ?? WP ??, ??????????????????, ????????? :return: """ global font font = get_font() path = "/Users/L1n/Desktop/bmp" sum = 0 for i in range(1, 10000): with Image.open(path + os.sep + str(i) + ".bmp") as image: sum += i * int(image_to_string(image)) print("Sum: {}".format(sum))
def recognizeImage(results, cvimage ,rect, language, charWhiteList=None): config = "-psm 7" # single line mode if charWhiteList is not None: config += " -c tessedit_char_whitelist=" + charWhiteList image = Image.fromarray(cvimage) result = pytesseract.image_to_string(image, lang=language, config=config) item = ImageRecognizerItem(result, rect) results.append(item) # ??ImageRecognizerItem
def recognize_url(url): import urllib.request urllib.request.urlretrieve(url, './img.jpg') img = Image.open('./img.jpg') img = img.convert('RGBA') w, h = img.size[0], img.size[1] point_list = gen_white_black_points(img) print_char_pic(w, h, point_list) reduce_noisy(w, h, point_list) print_char_pic(w, h, point_list) img.putdata(point_list) img.save("C:\\Users\\poluo\\PycharmProjects\\douban\\douban\\processed.jpg") tmp=Image.open('C:\\Users\\poluo\\PycharmProjects\\douban\\douban\\processed.jpg') return pytesseract.image_to_string(tmp)
def solution_from_image(image): pieces = filter_split(image) if len(pieces) != 4: return '????' string = '' for piece in pieces: try: solved = pytesseract.image_to_string(piece, config='-psm 10 -c tessedit_char_whitelist=0123456789abcdefghijklmnopqrstuvwxyz') except pytesseract.pytesseract.TesseractError: solved = None if not solved: solved = '?' string += solved return string
def parse_captcha(self, link, status): print "\n\n status in captcha : ", status print "\n link in captcha : ", link try: if status == 0: #proxies = ['http://43.242.104.43', 'http://115.113.43.215', 'http://115.113.43.215'] #proxy = random.choice(proxies) proxy = urllib2.ProxyHandler({'http': 'http://14.142.4.33'}) opener = urllib2.build_opener(proxy) header = ua.random print "\n header : ", header print "\n link : ", link opener.addheaders = [('User-agent', header)] data = opener.open(link).read() soup = BeautifulSoup(data, 'html.parser') div1 = soup.find("div", {"class": "a-row a-text-center"}) if div1 is not None: img = div1.find("img") image = img["src"] print "\n captcha.." print "image : ", image image = Image.open(StringIO(requests.get(image).content)) image.filter(ImageFilter.SHARPEN) captcha = pytesseract.image_to_string(image) print "captcha : ", captcha values = {'field-keywords' : captcha} data = urllib.urlencode(values) req = urllib2.Request(link, data, {'User-agent': header}) resp = urllib2.urlopen(req) the_page = resp.read() self.parse_captcha(link, status) else: status = 1 return except Exception as e: print "\n Exception : ", e
def parse_captcha(self, link, status): try: if status == 0: #proxies = ['http://43.242.104.43', 'http://115.113.43.215', 'http://115.113.43.215'] #proxy = random.choice(proxies) proxy = urllib2.ProxyHandler({'http': 'http://14.142.4.33'}) opener = urllib2.build_opener() header = ua.random print "\n header : ", header print "\n link : ", link opener.addheaders = [('User-agent', header)] data = opener.open(link).read() soup = BeautifulSoup(data, 'html.parser') div1 = soup.find("div", {"class": "a-row a-text-center"}) if div1 is not None: print "\n\n status in captcha : ", status print "\n link in captcha : ", link img = div1.find("img") image = img["src"] print "\n captcha.." print "image : ", image image = Image.open(StringIO(requests.get(image).content)) image.filter(ImageFilter.SHARPEN) captcha = pytesseract.image_to_string(image) print "captcha : ", captcha values = {'field-keywords' : captcha} data = urllib.urlencode(values) req = urllib2.Request(link, data, {'User-agent': header}) resp = urllib2.urlopen(req) the_page = resp.read() self.parse_captcha(link, status) else: status = 1 return except Exception as e: print "\n Exception : ", e
def parse_captcha(self, link, status): print "\n\n status in captcha : ", status print "\n link in captcha : ", link try: if status == 0: #proxies = ['http://43.242.104.43', 'http://115.113.43.215', 'http://115.113.43.215'] #proxy = random.choice(proxies) proxy = urllib2.ProxyHandler({'http': 'http://14.142.4.33'}) opener = urllib2.build_opener() header = ua.random print "\n header : ", header print "\n link : ", link opener.addheaders = [('User-agent', header)] data = opener.open(link).read() soup = BeautifulSoup(data, 'html.parser') div1 = soup.find("div", {"class": "a-row a-text-center"}) if div1 is not None: img = div1.find("img") image = img["src"] print "\n captcha.." print "image : ", image image = Image.open(StringIO(requests.get(image).content)) image.filter(ImageFilter.SHARPEN) captcha = pytesseract.image_to_string(image) print "captcha : ", captcha values = {'field-keywords' : captcha} data = urllib.urlencode(values) req = urllib2.Request(link, data, {'User-agent': header}) resp = urllib2.urlopen(req) the_page = resp.read() self.parse_captcha(link, status) else: status = 1 return except Exception as e: print "\n Exception : ", e
def parse_captcha(self, link, status): print "\n\n status in captcha : ", status print "\n link in captcha : ", link try: if status == 0: #proxies = ['http://43.242.104.43', 'http://115.113.43.215', 'http://115.113.43.215'] #proxy = random.choice(proxies) #proxy = urllib2.ProxyHandler({'http': 'http://115.113.43.215'}) opener = urllib2.build_opener() header = ua.random print "\n header : ", header print "\n link : ", link opener.addheaders = [('User-agent', header)] data = opener.open(link).read() soup = BeautifulSoup(data, 'html.parser') div1 = soup.find("div", {"class": "a-row a-text-center"}) if div1 is not None: img = div1.find("img") image = img["src"] print "\n captcha.." print "image : ", image image = Image.open(StringIO(requests.get(image).content)) image.filter(ImageFilter.SHARPEN) captcha = pytesseract.image_to_string(image) print "captcha : ", captcha values = {'field-keywords' : captcha} data = urllib.urlencode(values) req = urllib2.Request(link, data, {'User-agent': header}) resp = urllib2.urlopen(req) the_page = resp.read() self.parse_captcha(link, status) else: status = 1 return except Exception as e: print "\n Exception : ", e
def ocr(img): bw = img_to_bw(img) captcha = pytesseract.image_to_string(bw) cleaned = ''.join(c for c in captcha.lower() if c in string.ascii_lowercase) if len(cleaned) != len(captcha): print('removed bad characters: {}'.format(set(captcha) - set(cleaned))) return cleaned
def cmd_info(message, parameters, recursion=0): async for msg in client.logs_from(message.channel, limit=25): try: if msg.attachments: image = Image.open(BytesIO(requests.get(msg.attachments[0]['url']).content)).filter(ImageFilter.SHARPEN) text = pytesseract.image_to_string(image) if not text: e = discord.Embed(colour=0xB5434E) e.description = "I just forgot how to read..." else: e = discord.Embed(colour=0x43B581) e.description = text await client.send_message(message.channel, embed=e) return except OSError: e = discord.Embed(colour=0xB5434E) e.description = "Image way big, are you trying to kill me?" await client.send_message(message.channel, embed=e) return except TypeError: e = discord.Embed(colour=0xB5434E) e.description = "Latest attachment is not a static image, try again." await client.send_message(message.channel, embed=e) return except: e = discord.Embed(colour=0xB5434E) e.description = "Error ocurred, not related to OSError or TypeError I guess." await client.send_message(message.channel, embed=e) return e = discord.Embed(colour=0xB5434E) e.description = "I can't find an image in the last 25 posts, that or I'm retarded." await client.send_message(message.channel, embed=e)
def get_vcode(path): with Image.open(path) as image: mutex.acquire(1) vcode = pytesseract.image_to_string(image, lang='numfont') mutex.release() return vcode.replace(',', '').replace('\n', '')
def get_vcode_by_img_0(img): mutex.acquire(1) vcode = pytesseract.image_to_string(img, lang='numfont') if vcode == '': img = merge_thumb_0(img) vcode = pytesseract.image_to_string(img, lang='numfont') if vcode == '00': vcode = '0' else: vcode = vcode.strip('0') mutex.release() return vcode.replace(',', '').replace('\n', '')