Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
L
LAE
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
文靖昊
LAE
Commits
4d063424
Commit
4d063424
authored
Jul 04, 2024
by
文靖昊
Browse files
Options
Browse Files
Download
Plain Diff
Merge remote-tracking branch 'origin/geo' into geo
parents
824785fb
2c36a162
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
1628 additions
and
15 deletions
+1628
-15
.gitignore
.gitignore
+4
-2
create_table.sql
init_db/create_table.sql
+2
-0
web.py
src/controller/web.py
+48
-4
crud.py
src/pgdb/chat/crud.py
+8
-6
turn_qa_table.py
src/pgdb/chat/turn_qa_table.py
+2
-0
pgsqldocstore.py
src/pgdb/knowledge/pgsqldocstore.py
+3
-1
rerank.py
src/server/rerank.py
+1561
-2
No files found.
.gitignore
View file @
4d063424
...
...
@@ -58,4 +58,6 @@ exam/
aaa/
bbb/
ccc/
.env
\ No newline at end of file
.env
lae_pg_data
tmp
\ No newline at end of file
init_db/create_table.sql
View file @
4d063424
...
...
@@ -30,6 +30,7 @@ CREATE TABLE turn_qa (
chat_id
varchar
(
1000
),
question
text
,
answer
text
,
similar_docs
text
,
create_time
timestamp
(
6
)
DEFAULT
current_timestamp
,
turn_number
int
,
is_last
int2
...
...
@@ -38,6 +39,7 @@ COMMENT ON COLUMN "turn_qa"."turn_id" IS '会话轮次id';
COMMENT
ON
COLUMN
"turn_qa"
.
"chat_id"
IS
'会话id'
;
COMMENT
ON
COLUMN
"turn_qa"
.
"question"
IS
'该轮会话问题'
;
COMMENT
ON
COLUMN
"turn_qa"
.
"answer"
IS
'该轮会话答案'
;
COMMENT
ON
COLUMN
"turn_qa"
.
"similar_docs"
IS
'该轮会话相似文档 hash 索引'
;
COMMENT
ON
COLUMN
"turn_qa"
.
"create_time"
IS
'该轮会话创建时间,默认为当前时间'
;
COMMENT
ON
COLUMN
"turn_qa"
.
"turn_number"
IS
'会话轮数'
;
COMMENT
ON
COLUMN
"turn_qa"
.
"is_last"
IS
'是否为最后一轮对话:0=否,1=是'
;
...
...
src/controller/web.py
View file @
4d063424
...
...
@@ -5,6 +5,8 @@ from fastapi import FastAPI, Header,Query
from
fastapi.middleware.cors
import
CORSMiddleware
from
datetime
import
datetime
,
timedelta
from
src.pgdb.chat.c_db
import
UPostgresDB
from
src.pgdb.knowledge.k_db
import
PostgresDB
from
src.pgdb.knowledge.txt_doc_table
import
TxtDoc
import
uvicorn
import
json
from
src.pgdb.chat.crud
import
CRUD
...
...
@@ -13,6 +15,7 @@ from src.server.get_similarity import QAExt
from
src.server.qa
import
QA
from
langchain_core.prompts
import
PromptTemplate
from
langchain_openai
import
ChatOpenAI
from
langchain_core.documents
import
Document
import
re
from
src.controller.request
import
(
PhoneLoginRequest
,
...
...
@@ -49,6 +52,9 @@ c_db = UPostgresDB(host=CHAT_DB_HOST, database=CHAT_DB_DBNAME, user=CHAT_DB_USER
port
=
CHAT_DB_PORT
,
)
c_db
.
connect
()
k_db
=
PostgresDB
(
host
=
VEC_DB_HOST
,
database
=
VEC_DB_DBNAME
,
user
=
VEC_DB_USER
,
password
=
VEC_DB_PASSWORD
,
port
=
VEC_DB_PORT
)
k_db
.
connect
()
vecstore_faiss
=
VectorStore_FAISS
(
embedding_model_name
=
EMBEEDING_MODEL_PATH
,
store_path
=
FAISS_STORE_PATH
,
...
...
@@ -127,7 +133,7 @@ def get_history_by_session_id(session_id:str,token: str = Header(None)):
j
[
"Question"
]
=
h
[
1
]
j
[
"Answer"
]
=
h
[
2
]
j
[
"IsLast"
]
=
h
[
3
]
j
[
"SimilarDocuments"
]
=
[]
j
[
"SimilarDocuments"
]
=
get_similarity_doc
(
h
[
4
])
history_json
.
append
(
j
)
history_str
=
json
.
dumps
(
history_json
)
return
{
...
...
@@ -172,19 +178,26 @@ def question(chat_request: ChatRequest, token: str = Header(None)):
prompt
+=
"问:{}
\n
答:{}
\n\n
"
.
format
(
h
[
0
],
h
[
1
])
answer
,
docs
=
my_chat
.
chat_with_history_with_ext
(
question
,
ext
=
matches
,
history
=
prompt
,
with_similarity
=
True
)
docs_json
=
[]
doc_hash
=
[]
for
d
in
docs
:
j
=
{}
j
[
"page_content"
]
=
d
.
page_content
j
[
"from_file"
]
=
d
.
metadata
[
"filename"
]
j
[
"page_number"
]
=
0
if
"hash"
in
d
.
metadata
:
doc_hash
.
append
(
d
.
metadata
[
"hash"
])
docs_json
.
append
(
j
)
if
len
(
doc_hash
)
>
0
:
hash_str
=
","
.
join
(
doc_hash
)
else
:
hash_str
=
""
# answer = "test Answer"
if
session_id
==
""
:
session_id
=
crud
.
create_chat
(
token
,
'
\t\t
'
,
'0'
)
crud
.
insert_turn_qa
(
session_id
,
question
,
answer
,
0
,
1
)
crud
.
insert_turn_qa
(
session_id
,
question
,
answer
,
0
,
1
,
hash_str
)
else
:
last_turn_id
=
crud
.
get_last_turn_num
(
str
(
session_id
))
crud
.
insert_turn_qa
(
session_id
,
question
,
answer
,
last_turn_id
+
1
,
1
)
crud
.
insert_turn_qa
(
session_id
,
question
,
answer
,
last_turn_id
+
1
,
1
,
hash_str
)
return
{
'code'
:
200
,
...
...
@@ -217,16 +230,24 @@ def re_generate(chat_request: ReGenerateRequest, token: str = Header(None)):
prompt
+=
"问:{}
\n
答:{}
\n\n
"
.
format
(
h
[
0
],
h
[
1
])
answer
,
docs
=
my_chat
.
chat_with_history_with_ext
(
question
,
ext
=
matches
,
history
=
prompt
,
with_similarity
=
True
)
docs_json
=
[]
doc_hash
=
[]
for
d
in
docs
:
j
=
{}
j
[
"page_content"
]
=
d
.
page_content
j
[
"from_file"
]
=
d
.
metadata
[
"filename"
]
j
[
"page_number"
]
=
0
docs_json
.
append
(
j
)
if
"hash"
in
d
.
metadata
:
doc_hash
.
append
(
d
.
metadata
[
"hash"
])
if
len
(
doc_hash
)
>
0
:
hash_str
=
","
.
join
(
doc_hash
)
else
:
hash_str
=
""
# answer = "reGenerate Answer"
crud
.
update_turn_last
(
str
(
session_id
),
last_turn_id
)
crud
.
insert_turn_qa
(
session_id
,
question
,
answer
,
last_turn_id
,
1
)
crud
.
insert_turn_qa
(
session_id
,
question
,
answer
,
last_turn_id
,
1
,
hash_str
)
return
{
'code'
:
200
,
...
...
@@ -238,5 +259,28 @@ def re_generate(chat_request: ReGenerateRequest, token: str = Header(None)):
}
}
def
get_similarity_doc
(
similarity_doc_hash
:
str
):
if
similarity_doc_hash
:
hashs
=
similarity_doc_hash
.
split
(
","
)
if
not
similarity_doc_hash
or
len
(
hashs
)
==
0
:
return
[]
docs
=
[]
txt_doc
=
TxtDoc
(
k_db
)
for
h
in
hashs
:
doc
=
txt_doc
.
search
(
h
)
d
=
Document
(
page_content
=
doc
[
0
],
metadata
=
json
.
loads
(
doc
[
1
]))
docs
.
append
(
d
)
return
docs_to_json
(
docs
)
def
docs_to_json
(
docs
):
docs_json
=
[]
for
d
in
docs
:
j
=
{}
j
[
"page_content"
]
=
d
.
page_content
j
[
"from_file"
]
=
d
.
metadata
[
"filename"
]
j
[
"page_number"
]
=
0
docs_json
.
append
(
j
)
return
docs_json
if
__name__
==
"__main__"
:
uvicorn
.
run
(
app
,
host
=
'0.0.0.0'
,
port
=
8088
)
src/pgdb/chat/crud.py
View file @
4d063424
...
...
@@ -32,6 +32,7 @@ CREATE TABLE turn_qa (
chat_id varchar(1000),
question text,
answer text,
similar_docs text,
create_time timestamp(6) DEFAULT current_timestamp,
turn_number int,
is_last int2
...
...
@@ -40,6 +41,7 @@ COMMENT ON COLUMN "turn_qa"."turn_id" IS '会话轮次id';
COMMENT ON COLUMN "turn_qa"."chat_id" IS '会话id';
COMMENT ON COLUMN "turn_qa"."question" IS '该轮会话问题';
COMMENT ON COLUMN "turn_qa"."answer" IS '该轮会话答案';
COMMENT ON COLUMN "turn_qa"."similar_docs" IS '该轮会话相似文档 hash 索引';
COMMENT ON COLUMN "turn_qa"."create_time" IS '该轮会话创建时间,默认为当前时间';
COMMENT ON COLUMN "turn_qa"."turn_number" IS '会话轮数';
COMMENT ON COLUMN "turn_qa"."is_last" IS '是否为最后一轮对话:0=否,1=是';
...
...
@@ -86,26 +88,26 @@ class CRUD:
self
.
db
.
execute
(
TABLE_USER
)
def
get_history
(
self
,
_chat_id
):
query
=
f
'SELECT turn_number,question,answer,is_last FROM turn_qa WHERE chat_id=(
%
s) ORDER BY turn_number ASC'
query
=
f
'SELECT turn_number,question,answer,is_last
,similar_docs
FROM turn_qa WHERE chat_id=(
%
s) ORDER BY turn_number ASC'
self
.
db
.
execute_args
(
query
,
(
_chat_id
,))
ans
=
self
.
db
.
fetchall
()
return
ans
def
get_last_history
(
self
,
_chat_id
):
query
=
f
'SELECT question,answer FROM turn_qa WHERE chat_id=(
%
s) and is_last=1 ORDER BY turn_number ASC'
query
=
f
'SELECT question,answer
,similar_docs
FROM turn_qa WHERE chat_id=(
%
s) and is_last=1 ORDER BY turn_number ASC'
self
.
db
.
execute_args
(
query
,
(
_chat_id
,))
ans
=
self
.
db
.
fetchall
()
return
ans
def
get_last_history_before_turn_id
(
self
,
_chat_id
,
turn_id
):
query
=
f
'SELECT question,answer FROM turn_qa WHERE chat_id=(
%
s) and is_last=1 and turn_number<(
%
s) ORDER BY turn_number ASC'
query
=
f
'SELECT question,answer
,similar_docs
FROM turn_qa WHERE chat_id=(
%
s) and is_last=1 and turn_number<(
%
s) ORDER BY turn_number ASC'
self
.
db
.
execute_args
(
query
,
(
_chat_id
,
turn_id
))
ans
=
self
.
db
.
fetchall
()
return
ans
def
insert_turn_qa
(
self
,
chat_id
,
question
,
answer
,
turn_number
,
is_last
):
query
=
f
'INSERT INTO turn_qa(chat_id, question, answer, turn_number, is_last
) VALUES (
%
s,
%
s,
%
s,
%
s,
%
s)'
self
.
db
.
execute_args
(
query
,
(
chat_id
,
question
,
answer
,
turn_number
,
is_last
))
def
insert_turn_qa
(
self
,
chat_id
,
question
,
answer
,
turn_number
,
is_last
,
similar_docs
=
None
):
query
=
f
'INSERT INTO turn_qa(chat_id, question, answer, turn_number, is_last
, similar_docs) VALUES (
%
s,
%
s,
%
s,
%
s,
%
s,
%
s)'
self
.
db
.
execute_args
(
query
,
(
chat_id
,
question
,
answer
,
turn_number
,
is_last
,
similar_docs
))
...
...
src/pgdb/chat/turn_qa_table.py
View file @
4d063424
...
...
@@ -5,6 +5,7 @@ CREATE TABLE turn_qa (
chat_id varchar(1000),
question text,
answer text,
similar_docs text,
create_time timestamp(6) DEFAULT current_timestamp,
turn_number int,
is_last int2
...
...
@@ -13,6 +14,7 @@ COMMENT ON COLUMN "turn_qa"."turn_id" IS '会话轮次id';
COMMENT ON COLUMN "turn_qa"."chat_id" IS '会话id';
COMMENT ON COLUMN "turn_qa"."question" IS '该轮会话问题';
COMMENT ON COLUMN "turn_qa"."answer" IS '该轮会话答案';
COMMENT ON COLUMN "turn_qa"."similar_docs" IS '该轮会话相似文档 hash 索引';
COMMENT ON COLUMN "turn_qa"."create_time" IS '该轮会话创建时间,默认为当前时间';
COMMENT ON COLUMN "turn_qa"."turn_number" IS '会话轮数';
COMMENT ON COLUMN "turn_qa"."is_last" IS '是否为最后一轮对话:0=否,1=是';
...
...
src/pgdb/knowledge/pgsqldocstore.py
View file @
4d063424
...
...
@@ -64,8 +64,10 @@ class PgSqlDocstore(Docstore, AddableMixin):
self
.
__sub_init__
()
anwser
=
self
.
VEC_TXT
.
search
(
search
)
content
=
self
.
TXT_DOC
.
search
(
anwser
[
0
])
meta
=
json
.
loads
(
content
[
1
])
meta
.
update
({
"hash"
:
anwser
[
0
]})
# paragraph_id = hash 插入到metadata中,便于后续根据段落查找
if
content
:
return
Document
(
page_content
=
content
[
0
],
metadata
=
json
.
loads
(
content
[
1
])
)
return
Document
(
page_content
=
content
[
0
],
metadata
=
meta
)
else
:
return
Document
()
...
...
src/server/rerank.py
View file @
4d063424
...
...
@@ -81,4 +81,1563 @@ class Base(ABC):
pass
def
similarity
(
self
,
query
:
str
,
texts
:
list
):
raise
NotImplementedError
(
"Please implement encode method!"
)
\ No newline at end of file
raise
NotImplementedError
(
"Please implement encode method!"
)
# https://github.com/kzhisa/rag-fusion/blob/main/rag_fusion.py
from
langchain.load
import
dumps
,
loads
MAX_DOCS_FOR_CONTEXT
=
8
def
reciprocal_rank_fusion
(
results
:
list
[
set
]):
"""Rerank docs (Reciprocal Rank Fusion)
Args:
results (list[set]): retrieved documents [(k,docs)]
# k (int, optional): parameter k for RRF. Defaults to 60.
Returns:
ranked_results: list of documents reranked by RRF
"""
fused_scores
=
{}
for
(
k
,
docs
)
in
results
:
# Assumes the docs are returned in sorted order of relevance
for
rank
,
doc
in
enumerate
(
docs
):
doc_str
=
dumps
(
doc
)
if
doc_str
not
in
fused_scores
:
fused_scores
[
doc_str
]
=
0
fused_scores
[
doc_str
]
+=
1
/
(
rank
+
k
)
reranked_results
=
[
(
loads
(
doc
),
score
)
for
doc
,
score
in
sorted
(
fused_scores
.
items
(),
key
=
lambda
x
:
x
[
1
],
reverse
=
True
)
]
# for TEST (print reranked documentsand scores)
print
(
"Reranked documents: "
,
len
(
reranked_results
))
for
doc
in
reranked_results
:
print
(
'---'
)
print
(
'Docs: '
,
' '
.
join
(
doc
[
0
]
.
page_content
[:
100
]
.
split
()))
print
(
'RRF score: '
,
doc
[
1
])
# return only documents
return
[
x
[
0
]
for
x
in
reranked_results
[:
MAX_DOCS_FOR_CONTEXT
]]
# 使用 jieba 分词
from
jieba
import
cut
stopWords
=
[
'--'
,
'?'
,
'“'
,
'”'
,
'》'
,
'--'
,
'able'
,
'about'
,
'above'
,
'according'
,
'accordingly'
,
'across'
,
'actually'
,
'after'
,
'afterwards'
,
'again'
,
'against'
,
"ain't"
,
'all'
,
'allow'
,
'allows'
,
'almost'
,
'alone'
,
'along'
,
'already'
,
'also'
,
'although'
,
'always'
,
'am'
,
'among'
,
'amongst'
,
'an'
,
'and'
,
'another'
,
'any'
,
'anybody'
,
'anyhow'
,
'anyone'
,
'anything'
,
'anyway'
,
'anyways'
,
'anywhere'
,
'apart'
,
'appear'
,
'appreciate'
,
'appropriate'
,
'are'
,
"aren't"
,
'around'
,
'as'
,
"a's"
,
'aside'
,
'ask'
,
'asking'
,
'associated'
,
'at'
,
'available'
,
'away'
,
'awfully'
,
'be'
,
'became'
,
'because'
,
'become'
,
'becomes'
,
'becoming'
,
'been'
,
'before'
,
'beforehand'
,
'behind'
,
'being'
,
'believe'
,
'below'
,
'beside'
,
'besides'
,
'best'
,
'better'
,
'between'
,
'beyond'
,
'both'
,
'brief'
,
'but'
,
'by'
,
'came'
,
'can'
,
'cannot'
,
'cant'
,
"can't"
,
'cause'
,
'causes'
,
'certain'
,
'certainly'
,
'changes'
,
'clearly'
,
"c'mon"
,
'co'
,
'com'
,
'come'
,
'comes'
,
'concerning'
,
'consequently'
,
'consider'
,
'considering'
,
'contain'
,
'containing'
,
'contains'
,
'corresponding'
,
'could'
,
"couldn't"
,
'course'
,
"c's"
,
'currently'
,
'definitely'
,
'described'
,
'despite'
,
'did'
,
"didn't"
,
'different'
,
'do'
,
'does'
,
"doesn't"
,
'doing'
,
'done'
,
"don't"
,
'down'
,
'downwards'
,
'during'
,
'each'
,
'edu'
,
'eg'
,
'eight'
,
'either'
,
'else'
,
'elsewhere'
,
'enough'
,
'entirely'
,
'especially'
,
'et'
,
'etc'
,
'even'
,
'ever'
,
'every'
,
'everybody'
,
'everyone'
,
'everything'
,
'everywhere'
,
'ex'
,
'exactly'
,
'example'
,
'except'
,
'far'
,
'few'
,
'fifth'
,
'first'
,
'five'
,
'followed'
,
'following'
,
'follows'
,
'for'
,
'former'
,
'formerly'
,
'forth'
,
'four'
,
'from'
,
'further'
,
'furthermore'
,
'get'
,
'gets'
,
'getting'
,
'given'
,
'gives'
,
'go'
,
'goes'
,
'going'
,
'gone'
,
'got'
,
'gotten'
,
'greetings'
,
'had'
,
"hadn't"
,
'happens'
,
'hardly'
,
'has'
,
"hasn't"
,
'have'
,
"haven't"
,
'having'
,
'he'
,
'hello'
,
'help'
,
'hence'
,
'her'
,
'here'
,
'hereafter'
,
'hereby'
,
'herein'
,
"here's"
,
'hereupon'
,
'hers'
,
'herself'
,
"he's"
,
'hi'
,
'him'
,
'himself'
,
'his'
,
'hither'
,
'hopefully'
,
'how'
,
'howbeit'
,
'however'
,
"i'd"
,
'ie'
,
'if'
,
'ignored'
,
"i'll"
,
"i'm"
,
'immediate'
,
'in'
,
'inasmuch'
,
'inc'
,
'indeed'
,
'indicate'
,
'indicated'
,
'indicates'
,
'inner'
,
'insofar'
,
'instead'
,
'into'
,
'inward'
,
'is'
,
"isn't"
,
'it'
,
"it'd"
,
"it'll"
,
'its'
,
"it's"
,
'itself'
,
"i've"
,
'just'
,
'keep'
,
'keeps'
,
'kept'
,
'know'
,
'known'
,
'knows'
,
'last'
,
'lately'
,
'later'
,
'latter'
,
'latterly'
,
'least'
,
'less'
,
'lest'
,
'let'
,
"let's"
,
'like'
,
'liked'
,
'likely'
,
'little'
,
'look'
,
'looking'
,
'looks'
,
'ltd'
,
'mainly'
,
'many'
,
'may'
,
'maybe'
,
'me'
,
'mean'
,
'meanwhile'
,
'merely'
,
'might'
,
'more'
,
'moreover'
,
'most'
,
'mostly'
,
'much'
,
'must'
,
'my'
,
'myself'
,
'name'
,
'namely'
,
'nd'
,
'near'
,
'nearly'
,
'necessary'
,
'need'
,
'needs'
,
'neither'
,
'never'
,
'nevertheless'
,
'new'
,
'next'
,
'nine'
,
'no'
,
'nobody'
,
'non'
,
'none'
,
'noone'
,
'nor'
,
'normally'
,
'not'
,
'nothing'
,
'novel'
,
'now'
,
'nowhere'
,
'obviously'
,
'of'
,
'off'
,
'often'
,
'oh'
,
'ok'
,
'okay'
,
'old'
,
'on'
,
'once'
,
'one'
,
'ones'
,
'only'
,
'onto'
,
'or'
,
'other'
,
'others'
,
'otherwise'
,
'ought'
,
'our'
,
'ours'
,
'ourselves'
,
'out'
,
'outside'
,
'over'
,
'overall'
,
'own'
,
'particular'
,
'particularly'
,
'per'
,
'perhaps'
,
'placed'
,
'please'
,
'plus'
,
'possible'
,
'presumably'
,
'probably'
,
'provides'
,
'que'
,
'quite'
,
'qv'
,
'rather'
,
'rd'
,
're'
,
'really'
,
'reasonably'
,
'regarding'
,
'regardless'
,
'regards'
,
'relatively'
,
'respectively'
,
'right'
,
'said'
,
'same'
,
'saw'
,
'say'
,
'saying'
,
'says'
,
'second'
,
'secondly'
,
'see'
,
'seeing'
,
'seem'
,
'seemed'
,
'seeming'
,
'seems'
,
'seen'
,
'self'
,
'selves'
,
'sensible'
,
'sent'
,
'serious'
,
'seriously'
,
'seven'
,
'several'
,
'shall'
,
'she'
,
'should'
,
"shouldn't"
,
'since'
,
'six'
,
'so'
,
'some'
,
'somebody'
,
'somehow'
,
'someone'
,
'something'
,
'sometime'
,
'sometimes'
,
'somewhat'
,
'somewhere'
,
'soon'
,
'sorry'
,
'specified'
,
'specify'
,
'specifying'
,
'still'
,
'sub'
,
'such'
,
'sup'
,
'sure'
,
'take'
,
'taken'
,
'tell'
,
'tends'
,
'th'
,
'than'
,
'thank'
,
'thanks'
,
'thanx'
,
'that'
,
'thats'
,
"that's"
,
'the'
,
'their'
,
'theirs'
,
'them'
,
'themselves'
,
'then'
,
'thence'
,
'there'
,
'thereafter'
,
'thereby'
,
'therefore'
,
'therein'
,
'theres'
,
"there's"
,
'thereupon'
,
'these'
,
'they'
,
"they'd"
,
"they'll"
,
"they're"
,
"they've"
,
'think'
,
'third'
,
'this'
,
'thorough'
,
'thoroughly'
,
'those'
,
'though'
,
'three'
,
'through'
,
'throughout'
,
'thru'
,
'thus'
,
'to'
,
'together'
,
'too'
,
'took'
,
'toward'
,
'towards'
,
'tried'
,
'tries'
,
'truly'
,
'try'
,
'trying'
,
"t's"
,
'twice'
,
'two'
,
'un'
,
'under'
,
'unfortunately'
,
'unless'
,
'unlikely'
,
'until'
,
'unto'
,
'up'
,
'upon'
,
'us'
,
'use'
,
'used'
,
'useful'
,
'uses'
,
'using'
,
'usually'
,
'value'
,
'various'
,
'very'
,
'via'
,
'viz'
,
'vs'
,
'want'
,
'wants'
,
'was'
,
"wasn't"
,
'way'
,
'we'
,
"we'd"
,
'welcome'
,
'well'
,
"we'll"
,
'went'
,
'were'
,
"we're"
,
"weren't"
,
"we've"
,
'what'
,
'whatever'
,
"what's"
,
'when'
,
'whence'
,
'whenever'
,
'where'
,
'whereafter'
,
'whereas'
,
'whereby'
,
'wherein'
,
"where's"
,
'whereupon'
,
'wherever'
,
'whether'
,
'which'
,
'while'
,
'whither'
,
'who'
,
'whoever'
,
'whole'
,
'whom'
,
"who's"
,
'whose'
,
'why'
,
'will'
,
'willing'
,
'wish'
,
'with'
,
'within'
,
'without'
,
'wonder'
,
"won't"
,
'would'
,
"wouldn't"
,
'yes'
,
'yet'
,
'you'
,
"you'd"
,
"you'll"
,
'your'
,
"you're"
,
'yours'
,
'yourself'
,
'yourselves'
,
"you've"
,
'zero'
,
'zt'
,
'ZT'
,
'zz'
,
'ZZ'
,
'一'
,
'一下'
,
'一些'
,
'一切'
,
'一则'
,
'一天'
,
'一定'
,
'一方面'
,
'一旦'
,
'一时'
,
'一来'
,
'一样'
,
'一次'
,
'一片'
,
'一直'
,
'一致'
,
'一般'
,
'一起'
,
'一边'
,
'一面'
,
'万一'
,
'上下'
,
'上升'
,
'上去'
,
'上来'
,
'上述'
,
'上面'
,
'下列'
,
'下去'
,
'下来'
,
'下面'
,
'不一'
,
'不久'
,
'不仅'
,
'不会'
,
'不但'
,
'不光'
,
'不单'
,
'不变'
,
'不只'
,
'不可'
,
'不同'
,
'不够'
,
'不如'
,
'不得'
,
'不怕'
,
'不惟'
,
'不成'
,
'不拘'
,
'不敢'
,
'不断'
,
'不是'
,
'不比'
,
'不然'
,
'不特'
,
'不独'
,
'不管'
,
'不能'
,
'不要'
,
'不论'
,
'不足'
,
'不过'
,
'不问'
,
'与'
,
'与其'
,
'与否'
,
'与此同时'
,
'专门'
,
'且'
,
'两者'
,
'严格'
,
'严重'
,
'个'
,
'个人'
,
'个别'
,
'中小'
,
'中间'
,
'丰富'
,
'临'
,
'为'
,
'为主'
,
'为了'
,
'为什么'
,
'为什麽'
,
'为何'
,
'为着'
,
'主张'
,
'主要'
,
'举行'
,
'乃'
,
'乃至'
,
'么'
,
'之'
,
'之一'
,
'之前'
,
'之后'
,
'之後'
,
'之所以'
,
'之类'
,
'乌乎'
,
'乎'
,
'乘'
,
'也'
,
'也好'
,
'也是'
,
'也罢'
,
'了'
,
'了解'
,
'争取'
,
'于'
,
'于是'
,
'于是乎'
,
'云云'
,
'互相'
,
'产生'
,
'人们'
,
'人家'
,
'什么'
,
'什么样'
,
'什麽'
,
'今后'
,
'今天'
,
'今年'
,
'今後'
,
'仍然'
,
'从'
,
'从事'
,
'从而'
,
'他'
,
'他人'
,
'他们'
,
'他的'
,
'代替'
,
'以'
,
'以上'
,
'以下'
,
'以为'
,
'以便'
,
'以免'
,
'以前'
,
'以及'
,
'以后'
,
'以外'
,
'以後'
,
'以来'
,
'以至'
,
'以至于'
,
'以致'
,
'们'
,
'任'
,
'任何'
,
'任凭'
,
'任务'
,
'企图'
,
'伟大'
,
'似乎'
,
'似的'
,
'但'
,
'但是'
,
'何'
,
'何况'
,
'何处'
,
'何时'
,
'作为'
,
'你'
,
'你们'
,
'你的'
,
'使得'
,
'使用'
,
'例如'
,
'依'
,
'依照'
,
'依靠'
,
'促进'
,
'保持'
,
'俺'
,
'俺们'
,
'倘'
,
'倘使'
,
'倘或'
,
'倘然'
,
'倘若'
,
'假使'
,
'假如'
,
'假若'
,
'做到'
,
'像'
,
'允许'
,
'充分'
,
'先后'
,
'先後'
,
'先生'
,
'全部'
,
'全面'
,
'兮'
,
'共同'
,
'关于'
,
'其'
,
'其一'
,
'其中'
,
'其二'
,
'其他'
,
'其余'
,
'其它'
,
'其实'
,
'其次'
,
'具体'
,
'具体地说'
,
'具体说来'
,
'具有'
,
'再者'
,
'再说'
,
'冒'
,
'冲'
,
'决定'
,
'况且'
,
'准备'
,
'几'
,
'几乎'
,
'几时'
,
'凭'
,
'凭借'
,
'出去'
,
'出来'
,
'出现'
,
'分别'
,
'则'
,
'别'
,
'别的'
,
'别说'
,
'到'
,
'前后'
,
'前者'
,
'前进'
,
'前面'
,
'加之'
,
'加以'
,
'加入'
,
'加强'
,
'十分'
,
'即'
,
'即令'
,
'即使'
,
'即便'
,
'即或'
,
'即若'
,
'却不'
,
'原来'
,
'又'
,
'及'
,
'及其'
,
'及时'
,
'及至'
,
'双方'
,
'反之'
,
'反应'
,
'反映'
,
'反过来'
,
'反过来说'
,
'取得'
,
'受到'
,
'变成'
,
'另'
,
'另一方面'
,
'另外'
,
'只是'
,
'只有'
,
'只要'
,
'只限'
,
'叫'
,
'叫做'
,
'召开'
,
'叮咚'
,
'可'
,
'可以'
,
'可是'
,
'可能'
,
'可见'
,
'各'
,
'各个'
,
'各人'
,
'各位'
,
'各地'
,
'各种'
,
'各级'
,
'各自'
,
'合理'
,
'同'
,
'同一'
,
'同时'
,
'同样'
,
'后来'
,
'后面'
,
'向'
,
'向着'
,
'吓'
,
'吗'
,
'否则'
,
'吧'
,
'吧哒'
,
'吱'
,
'呀'
,
'呃'
,
'呕'
,
'呗'
,
'呜'
,
'呜呼'
,
'呢'
,
'周围'
,
'呵'
,
'呸'
,
'呼哧'
,
'咋'
,
'和'
,
'咚'
,
'咦'
,
'咱'
,
'咱们'
,
'咳'
,
'哇'
,
'哈'
,
'哈哈'
,
'哉'
,
'哎'
,
'哎呀'
,
'哎哟'
,
'哗'
,
'哟'
,
'哦'
,
'哩'
,
'哪'
,
'哪个'
,
'哪些'
,
'哪儿'
,
'哪天'
,
'哪年'
,
'哪怕'
,
'哪样'
,
'哪边'
,
'哪里'
,
'哼'
,
'哼唷'
,
'唉'
,
'啊'
,
'啐'
,
'啥'
,
'啦'
,
'啪达'
,
'喂'
,
'喏'
,
'喔唷'
,
'嗡嗡'
,
'嗬'
,
'嗯'
,
'嗳'
,
'嘎'
,
'嘎登'
,
'嘘'
,
'嘛'
,
'嘻'
,
'嘿'
,
'因'
,
'因为'
,
'因此'
,
'因而'
,
'固然'
,
'在'
,
'在下'
,
'地'
,
'坚决'
,
'坚持'
,
'基本'
,
'处理'
,
'复杂'
,
'多'
,
'多少'
,
'多数'
,
'多次'
,
'大力'
,
'大多数'
,
'大大'
,
'大家'
,
'大批'
,
'大约'
,
'大量'
,
'失去'
,
'她'
,
'她们'
,
'她的'
,
'好的'
,
'好象'
,
'如'
,
'如上所述'
,
'如下'
,
'如何'
,
'如其'
,
'如果'
,
'如此'
,
'如若'
,
'存在'
,
'宁'
,
'宁可'
,
'宁愿'
,
'宁肯'
,
'它'
,
'它们'
,
'它们的'
,
'它的'
,
'安全'
,
'完全'
,
'完成'
,
'实现'
,
'实际'
,
'宣布'
,
'容易'
,
'密切'
,
'对'
,
'对于'
,
'对应'
,
'将'
,
'少数'
,
'尔后'
,
'尚且'
,
'尤其'
,
'就'
,
'就是'
,
'就是说'
,
'尽'
,
'尽管'
,
'属于'
,
'岂但'
,
'左右'
,
'巨大'
,
'巩固'
,
'己'
,
'已经'
,
'帮助'
,
'常常'
,
'并'
,
'并不'
,
'并不是'
,
'并且'
,
'并没有'
,
'广大'
,
'广泛'
,
'应当'
,
'应用'
,
'应该'
,
'开外'
,
'开始'
,
'开展'
,
'引起'
,
'强烈'
,
'强调'
,
'归'
,
'当'
,
'当前'
,
'当时'
,
'当然'
,
'当着'
,
'形成'
,
'彻底'
,
'彼'
,
'彼此'
,
'往'
,
'往往'
,
'待'
,
'後来'
,
'後面'
,
'得'
,
'得出'
,
'得到'
,
'心里'
,
'必然'
,
'必要'
,
'必须'
,
'怎'
,
'怎么'
,
'怎么办'
,
'怎么样'
,
'怎样'
,
'怎麽'
,
'总之'
,
'总是'
,
'总的来看'
,
'总的来说'
,
'总的说来'
,
'总结'
,
'总而言之'
,
'恰恰相反'
,
'您'
,
'意思'
,
'愿意'
,
'慢说'
,
'成为'
,
'我'
,
'我们'
,
'我的'
,
'或'
,
'或是'
,
'或者'
,
'战斗'
,
'所'
,
'所以'
,
'所有'
,
'所谓'
,
'打'
,
'扩大'
,
'把'
,
'抑或'
,
'拿'
,
'按'
,
'按照'
,
'换句话说'
,
'换言之'
,
'据'
,
'掌握'
,
'接着'
,
'接著'
,
'故'
,
'故此'
,
'整个'
,
'方便'
,
'方面'
,
'旁人'
,
'无宁'
,
'无法'
,
'无论'
,
'既'
,
'既是'
,
'既然'
,
'时候'
,
'明显'
,
'明确'
,
'是'
,
'是否'
,
'是的'
,
'显然'
,
'显著'
,
'普通'
,
'普遍'
,
'更加'
,
'曾经'
,
'替'
,
'最后'
,
'最大'
,
'最好'
,
'最後'
,
'最近'
,
'最高'
,
'有'
,
'有些'
,
'有关'
,
'有利'
,
'有力'
,
'有所'
,
'有效'
,
'有时'
,
'有点'
,
'有的'
,
'有着'
,
'有著'
,
'望'
,
'朝'
,
'朝着'
,
'本'
,
'本着'
,
'来'
,
'来着'
,
'极了'
,
'构成'
,
'果然'
,
'果真'
,
'某'
,
'某个'
,
'某些'
,
'根据'
,
'根本'
,
'欢迎'
,
'正在'
,
'正如'
,
'正常'
,
'此'
,
'此外'
,
'此时'
,
'此间'
,
'毋宁'
,
'每'
,
'每个'
,
'每天'
,
'每年'
,
'每当'
,
'比'
,
'比如'
,
'比方'
,
'比较'
,
'毫不'
,
'没有'
,
'沿'
,
'沿着'
,
'注意'
,
'深入'
,
'清楚'
,
'满足'
,
'漫说'
,
'焉'
,
'然则'
,
'然后'
,
'然後'
,
'然而'
,
'照'
,
'照着'
,
'特别是'
,
'特殊'
,
'特点'
,
'现代'
,
'现在'
,
'甚么'
,
'甚而'
,
'甚至'
,
'用'
,
'由'
,
'由于'
,
'由此可见'
,
'的'
,
'的话'
,
'目前'
,
'直到'
,
'直接'
,
'相似'
,
'相信'
,
'相反'
,
'相同'
,
'相对'
,
'相对而言'
,
'相应'
,
'相当'
,
'相等'
,
'省得'
,
'看出'
,
'看到'
,
'看来'
,
'看看'
,
'看见'
,
'真是'
,
'真正'
,
'着'
,
'着呢'
,
'矣'
,
'知道'
,
'确定'
,
'离'
,
'积极'
,
'移动'
,
'突出'
,
'突然'
,
'立即'
,
'第'
,
'等'
,
'等等'
,
'管'
,
'紧接着'
,
'纵'
,
'纵令'
,
'纵使'
,
'纵然'
,
'练习'
,
'组成'
,
'经'
,
'经常'
,
'经过'
,
'结合'
,
'结果'
,
'给'
,
'绝对'
,
'继续'
,
'继而'
,
'维持'
,
'综上所述'
,
'罢了'
,
'考虑'
,
'者'
,
'而'
,
'而且'
,
'而况'
,
'而外'
,
'而已'
,
'而是'
,
'而言'
,
'联系'
,
'能'
,
'能否'
,
'能够'
,
'腾'
,
'自'
,
'自个儿'
,
'自从'
,
'自各儿'
,
'自家'
,
'自己'
,
'自身'
,
'至'
,
'至于'
,
'良好'
,
'若'
,
'若是'
,
'若非'
,
'范围'
,
'莫若'
,
'获得'
,
'虽'
,
'虽则'
,
'虽然'
,
'虽说'
,
'行为'
,
'行动'
,
'表明'
,
'表示'
,
'被'
,
'要'
,
'要不'
,
'要不是'
,
'要不然'
,
'要么'
,
'要是'
,
'要求'
,
'规定'
,
'觉得'
,
'认为'
,
'认真'
,
'认识'
,
'让'
,
'许多'
,
'论'
,
'设使'
,
'设若'
,
'该'
,
'说明'
,
'诸位'
,
'谁'
,
'谁知'
,
'赶'
,
'起'
,
'起来'
,
'起见'
,
'趁'
,
'趁着'
,
'越是'
,
'跟'
,
'转动'
,
'转变'
,
'转贴'
,
'较'
,
'较之'
,
'边'
,
'达到'
,
'迅速'
,
'过'
,
'过去'
,
'过来'
,
'运用'
,
'还是'
,
'还有'
,
'这'
,
'这个'
,
'这么'
,
'这么些'
,
'这么样'
,
'这么点儿'
,
'这些'
,
'这会儿'
,
'这儿'
,
'这就是说'
,
'这时'
,
'这样'
,
'这点'
,
'这种'
,
'这边'
,
'这里'
,
'这麽'
,
'进入'
,
'进步'
,
'进而'
,
'进行'
,
'连'
,
'连同'
,
'适应'
,
'适当'
,
'适用'
,
'逐步'
,
'逐渐'
,
'通常'
,
'通过'
,
'造成'
,
'遇到'
,
'遭到'
,
'避免'
,
'那'
,
'那个'
,
'那么'
,
'那么些'
,
'那么样'
,
'那些'
,
'那会儿'
,
'那儿'
,
'那时'
,
'那样'
,
'那边'
,
'那里'
,
'那麽'
,
'部分'
,
'鄙人'
,
'采取'
,
'里面'
,
'重大'
,
'重新'
,
'重要'
,
'鉴于'
,
'问题'
,
'防止'
,
'阿'
,
'附近'
,
'限制'
,
'除'
,
'除了'
,
'除此之外'
,
'除非'
,
'随'
,
'随着'
,
'随著'
,
'集中'
,
'需要'
,
'非但'
,
'非常'
,
'非徒'
,
'靠'
,
'顺'
,
'顺着'
,
'首先'
,
'高兴'
,
'是不是'
,
'说说'
,
' '
,
'about'
,
'after'
,
'all'
,
'also'
,
'am'
,
'an'
,
'and'
,
'another'
,
'any'
,
'are'
,
'as'
,
'at'
,
'be'
,
'because'
,
'been'
,
'before'
,
'being'
,
'between'
,
'both'
,
'but'
,
'by'
,
'came'
,
'can'
,
'come'
,
'could'
,
'did'
,
'do'
,
'each'
,
'for'
,
'from'
,
'get'
,
'got'
,
'has'
,
'had'
,
'he'
,
'have'
,
'her'
,
'here'
,
'him'
,
'himself'
,
'his'
,
'how'
,
'if'
,
'in'
,
'into'
,
'is'
,
'it'
,
'like'
,
'make'
,
'many'
,
'me'
,
'might'
,
'more'
,
'most'
,
'much'
,
'must'
,
'my'
,
'never'
,
'now'
,
'of'
,
'on'
,
'only'
,
'or'
,
'other'
,
'our'
,
'out'
,
'over'
,
'said'
,
'same'
,
'should'
,
'since'
,
'some'
,
'still'
,
'such'
,
'take'
,
'than'
,
'that'
,
'the'
,
'their'
,
'them'
,
'then'
,
'there'
,
'these'
,
'they'
,
'this'
,
'those'
,
'through'
,
'to'
,
'too'
,
'under'
,
'up'
,
'very'
,
'was'
,
'way'
,
'we'
,
'well'
,
'were'
,
'what'
,
'where'
,
'which'
,
'while'
,
'who'
,
'with'
,
'would'
,
'you'
,
'your'
,
'a'
,
'i'
]
def
jieba_split
(
text
:
str
)
->
str
:
tokens
=
cut
(
text
,
True
)
return
(
item
.
replace
(
r'[\u3000-\u303f\uff00-\uffef]'
,
''
)
.
strip
()
for
item
in
tokens
if
item
and
item
not
in
stopWords
)
or
(
text
,)
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment