mirror of
https://github.com/langbot-app/LangBot.git
synced 2026-06-27 07:54:19 +00:00
chore: 提交部分测试文件
This commit is contained in:
@@ -0,0 +1,42 @@
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
import os
|
||||
import random
|
||||
import sys
|
||||
|
||||
|
||||
user_agents = [
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:88.0) Gecko/20100101 Firefox/88.0',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Version/14.1.2 Safari/537.36',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Version/14.1 Safari/537.36',
|
||||
'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:89.0) Gecko/20100101 Firefox/89.0',
|
||||
'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:88.0) Gecko/20100101 Firefox/88.0'
|
||||
]
|
||||
|
||||
r = requests.get(
|
||||
sys.argv[1],
|
||||
headers={
|
||||
"User-Agent": random.choice(user_agents)
|
||||
}
|
||||
)
|
||||
soup = BeautifulSoup(r.text, 'html.parser')
|
||||
# print(soup.get_text())
|
||||
|
||||
raw = soup.get_text()
|
||||
|
||||
import re
|
||||
|
||||
# strip每一行
|
||||
# raw = '\n'.join([line.strip() for line in raw.split('\n')])
|
||||
|
||||
# # 删除所有空行或只有空格的行
|
||||
# raw = re.sub(r'\n\s*\n', '\n', raw)
|
||||
|
||||
|
||||
print(raw)
|
||||
Reference in New Issue
Block a user