diff --git a/tests/bs_test/bs_test.py b/tests/bs_test/bs_test.py new file mode 100644 index 00000000..8a8e7eac --- /dev/null +++ b/tests/bs_test/bs_test.py @@ -0,0 +1,42 @@ + +import requests +from bs4 import BeautifulSoup +import os +import random +import sys + + +user_agents = [ + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:88.0) Gecko/20100101 Firefox/88.0', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Version/14.1.2 Safari/537.36', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Version/14.1 Safari/537.36', + 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:89.0) Gecko/20100101 Firefox/89.0', + 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:88.0) Gecko/20100101 Firefox/88.0' +] + +r = requests.get( + sys.argv[1], + headers={ + "User-Agent": random.choice(user_agents) + } +) +soup = BeautifulSoup(r.text, 'html.parser') +# print(soup.get_text()) + +raw = soup.get_text() + +import re + +# strip每一行 +# raw = '\n'.join([line.strip() for line in raw.split('\n')]) + +# # 删除所有空行或只有空格的行 +# raw = re.sub(r'\n\s*\n', '\n', raw) + + +print(raw) \ No newline at end of file diff --git a/tests/ssh_client_test/ssh_client.py b/tests/ssh_client_test/ssh_client.py new file mode 100644 index 00000000..a8054a9b --- /dev/null +++ b/tests/ssh_client_test/ssh_client.py @@ -0,0 +1,57 @@ +import os +import sys +import paramiko +import time +import select + + +class sshClient: + #创建一个ssh客户端,和服务器连接上,准备发消息 + def __init__(self,host,port,user,password): + self.trans = paramiko.Transport((host, port)) + self.trans.start_client() + self.trans.auth_password(username=user, password=password) + self.channel = self.trans.open_session() + self.channel.get_pty() + self.channel.invoke_shell() + + #给服务器发送一个命令 + def sendCmd(self,cmd): + self.channel.sendall(cmd) + + #接收的时候,有时候服务器处理的比较慢,需要设置一个延时等待一下。 + def recvResponse(self,timeout): + data=b'' + while True: + try: + #使用select,不断的读取数据,直到没有多余的数据了,超时返回。 + readable,w,e= select.select([self.channel],[],[],timeout) + if self.channel in readable: + data = self.channel.recv(1024) + else: + sys.stdout.write(data.decode()) + sys.stdout.flush() + return data.decode() + except TimeoutError: + sys.stdout.write(data.decode()) + sys.stdout.flush() + return data.decode + #关闭客户端 + def close(self): + self.channel.close() + self.trans.close() + +host='host' +port=22#your port +user='root' +pwd='pass' + +ssh = sshClient(host,port,user,pwd) +response = ssh.recvResponse(1) +response = ssh.sendCmd("ls\n") +ssh.sendCmd("cd /home\n") +response = ssh.recvResponse(1) +ssh.sendCmd("ls\n") +response = ssh.recvResponse(1) + +ssh.close() diff --git a/tests/token_test/tiktoken_test.py b/tests/token_test/tiktoken_test.py new file mode 100644 index 00000000..c66de117 --- /dev/null +++ b/tests/token_test/tiktoken_test.py @@ -0,0 +1,124 @@ +import tiktoken +import openai +import json +import os + + +openai.api_key = os.getenv("OPENAI_API_KEY") + + +def encode(text: str, model: str): + import tiktoken + enc = tiktoken.get_encoding("cl100k_base") + assert enc.decode(enc.encode("hello world")) == "hello world" + + # To get the tokeniser corresponding to a specific model in the OpenAI API: + enc = tiktoken.encoding_for_model(model) + + return enc.encode(text) + + +# def ask(prompt: str, model: str = "gpt-3.5-turbo"): +# # To get the tokeniser corresponding to a specific model in the OpenAI API: +# enc = tiktoken.encoding_for_model(model) + +# resp = openai.ChatCompletion.create( +# model=model, +# messages=[ +# { +# "role": "user", +# "content": prompt +# } +# ] +# ) + +# return enc.encode(prompt), enc.encode(resp['choices'][0]['message']['content']), resp + +def ask( + messages: list, + model: str = "gpt-3.5-turbo" +): + enc = tiktoken.encoding_for_model(model) + + resp = openai.ChatCompletion.create( + model=model, + messages=messages + ) + + txt = "" + + for r in messages: + txt += r['role'] + r['content'] + "\n" + + txt += "assistant: " + + return enc.encode(txt), enc.encode(resp['choices'][0]['message']['content']), resp + + +def num_tokens_from_messages(messages, model="gpt-3.5-turbo-0613"): + """Return the number of tokens used by a list of messages.""" + try: + encoding = tiktoken.encoding_for_model(model) + except KeyError: + print("Warning: model not found. Using cl100k_base encoding.") + encoding = tiktoken.get_encoding("cl100k_base") + if model in { + "gpt-3.5-turbo-0613", + "gpt-3.5-turbo-16k-0613", + "gpt-4-0314", + "gpt-4-32k-0314", + "gpt-4-0613", + "gpt-4-32k-0613", + }: + tokens_per_message = 3 + tokens_per_name = 1 + elif model == "gpt-3.5-turbo-0301": + tokens_per_message = 4 # every message follows <|start|>{role/name}\n{content}<|end|>\n + tokens_per_name = -1 # if there's a name, the role is omitted + elif "gpt-3.5-turbo" in model: + print("Warning: gpt-3.5-turbo may update over time. Returning num tokens assuming gpt-3.5-turbo-0613.") + return num_tokens_from_messages(messages, model="gpt-3.5-turbo-0613") + elif "gpt-4" in model: + print("Warning: gpt-4 may update over time. Returning num tokens assuming gpt-4-0613.") + return num_tokens_from_messages(messages, model="gpt-4-0613") + else: + raise NotImplementedError( + f"""num_tokens_from_messages() is not implemented for model {model}. See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens.""" + ) + num_tokens = 0 + for message in messages: + num_tokens += tokens_per_message + for key, value in message.items(): + num_tokens += len(encoding.encode(value)) + if key == "name": + num_tokens += tokens_per_name + num_tokens += 3 # every reply is primed with <|start|>assistant<|message|> + return num_tokens + +messages = [ + { + "role": "user", + "content": "你叫什么名字?" + },{ + "role": "assistant", + "content": "我是AI助手,没有具体的名字。你可以叫我GPT-3。有什么可以帮到你的吗?" + },{ + "role": "user", + "content": "你是由谁开发的?" + },{ + "role": "assistant", + "content": "我是由OpenAI开发的,一家人工智能研究实验室。OpenAI的使命是促进人工智能的发展,使其为全人类带来积极影响。我是由OpenAI团队使用GPT-3模型训练而成的。" + },{ + "role": "user", + "content": "很高兴见到你。" + } +] + + +pro, rep, resp=ask(messages) + +print(len(pro), len(rep)) +print(resp) +print(resp['choices'][0]['message']['content']) + +print(num_tokens_from_messages(messages, model="gpt-3.5-turbo")) \ No newline at end of file