Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
L
LAE
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
文靖昊
LAE
Commits
135a0fe7
Commit
135a0fe7
authored
Jul 24, 2024
by
文靖昊
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
修改agent工具描述,分批获取相似文档,返回文档数量进行修改
parent
909d09a7
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
50 additions
and
49 deletions
+50
-49
rag_agent.py
src/agent/rag_agent.py
+15
-13
tool_divisions.py
src/agent/tool_divisions.py
+1
-1
txt_doc_table.py
src/pgdb/knowledge/txt_doc_table.py
+3
-10
get_similarity.py
src/server/get_similarity.py
+26
-22
rag_agent_test.py
test/rag_agent_test.py
+5
-3
No files found.
src/agent/rag_agent.py
View file @
135a0fe7
...
...
@@ -22,7 +22,7 @@ class IssuanceArgs(BaseModel):
class
RAGQuery
(
BaseTool
):
name
=
"rag_query"
description
=
"""这是一个区(县)级的水文气象地质知识库,当咨询区(县)的水文气象地质等相关信息的时候,你可以提供数据和资料。注意,这个查询只能获取
单个区(县)的水文气象地质等相关信息,
当需要查询省市的详细信息时,需要获取改省市下的具体行政规划,并一一获取具体的区(县)的水文气象地质等相关信息。这个知识库中信息并不全面,有可能缺失。"""
description
=
"""这是一个区(县)级的水文气象地质知识库,当咨询区(县)的水文气象地质等相关信息的时候,你可以提供数据和资料。注意,这个查询只能获取
一个区(县)的水文气象地质等相关信息。如果问题中有多个区县,请拆解出来,并一个区县一个区县的查询。
当需要查询省市的详细信息时,需要获取改省市下的具体行政规划,并一一获取具体的区(县)的水文气象地质等相关信息。这个知识库中信息并不全面,有可能缺失。"""
args_schema
:
Type
[
BaseModel
]
=
IssuanceArgs
rerank
:
Any
# 替换 Any 为适当的类型
rerank_model
:
Any
# 替换 Any 为适当的类型
...
...
@@ -53,17 +53,19 @@ class RAGQuery(BaseTool):
# split_list = []
# for l in split_str:
# split_list.append(l)
start
=
time
.
time
()
answer
=
self
.
db
.
find_like_doc
(
location
)
end
=
time
.
time
()
print
(
'find_like_doc time:
%
s Seconds'
%
(
end
-
start
))
print
(
len
(
answer
)
if
answer
else
0
)
split_docs
=
[]
for
a
in
answer
if
answer
else
[]:
d
=
Document
(
page_content
=
a
[
0
],
metadata
=
json
.
loads
(
a
[
1
]))
split_docs
.
append
(
d
)
print
(
len
(
split_docs
))
split_docs_list
=
[]
for
l
in
location
:
start
=
time
.
time
()
answer
=
self
.
db
.
find_like_doc
(
l
)
end
=
time
.
time
()
print
(
'find_like_doc time:
%
s Seconds'
%
(
end
-
start
))
print
(
len
(
answer
)
if
answer
else
0
)
split_docs
=
[]
for
a
in
answer
if
answer
else
[]:
d
=
Document
(
page_content
=
a
[
0
],
metadata
=
json
.
loads
(
a
[
1
]))
split_docs
.
append
(
d
)
print
(
len
(
split_docs
))
split_docs_list
.
append
(
split_docs
)
# if len(split_docs)>10:
# split_docs= split_docs[:10]
...
...
@@ -77,7 +79,7 @@ class RAGQuery(BaseTool):
print
(
matches
)
similarity
=
self
.
get_similarity_with_ext_origin
(
matches
,
_location
=
location
)
# cur_similarity = similarity.get_rerank(self.rerank_model)
cur_similarity
=
similarity
.
get_rerank_with_doc
(
self
.
rerank_model
,
split_docs
)
cur_similarity
=
similarity
.
get_rerank_with_doc
(
self
.
rerank_model
,
split_docs
_list
)
# docs = similarity.get_rerank_docs()
# print(cur_similarity)
# # geo_result = "以下是详细的水文气象地质资料:"+cur_similarity+"\n 以下是原问题"+question
...
...
src/agent/tool_divisions.py
View file @
135a0fe7
...
...
@@ -132,7 +132,7 @@ class AdministrativeDivisionArgs(BaseModel):
class
AdministrativeDivision
(
BaseTool
):
name
=
"administrative_division"
description
=
"根据用户提问中涉及到的地区信息补全其行政区划信息,明确具体的省、市、县信息。比如输入县,补全所属省市,输入市则补全省级以及下辖所有县区"
description
=
"根据用户提问中涉及到的地区信息补全其行政区划信息,明确具体的省、市、县信息。比如输入县,补全所属省市,输入市则补全省级以及下辖所有县区
,当问题中涉及区县的时候,一定要优先调用此工具
"
args_schema
:
Type
[
BaseModel
]
=
AdministrativeDivisionArgs
def
_run
(
self
,
input_text
:
str
)
->
str
:
...
...
src/pgdb/knowledge/txt_doc_table.py
View file @
135a0fe7
...
...
@@ -66,16 +66,9 @@ class TxtDoc:
print
(
"drop table txt_doc ok"
)
def
find_like_doc
(
self
,
item
:
list
):
print
(
item
)
i0
=
item
[
0
]
if
len
(
item
)
>=
1
:
item
=
item
[
1
:]
print
(
item
)
query
=
"select text,matadate FROM txt_doc WHERE matadate like '
%
"
+
i0
+
"
%
' or text like '
%
"
+
i0
+
"
%
' "
for
i
in
item
:
query
+=
"or matadate like '
%
"
+
i
+
"
%
' or text like '
%
"
+
i0
+
"
%
' "
print
(
query
)
def
find_like_doc
(
self
,
item
:
str
):
query
=
"select text,matadate FROM txt_doc WHERE matadate like '
%
"
+
item
+
"
%
' or text like '
%
"
+
item
+
"
%
' "
self
.
db
.
execute
(
query
)
answer
=
self
.
db
.
fetchall
()
...
...
src/server/get_similarity.py
View file @
135a0fe7
...
...
@@ -80,33 +80,31 @@ class GetSimilarityWithExt:
result
+=
"]"
return
result
def
get_rerank_with_doc
(
self
,
reranker
:
BgeRerank
,
split_doc
:
list
,
top_k
=
5
):
def
get_rerank_with_doc
(
self
,
reranker
:
BgeRerank
,
split_docs_list
:
list
):
top_k
=
self
.
get_doc_nums
(
len
(
split_docs_list
))
question
=
'
\n
'
.
join
(
self
.
question
)
print
(
question
)
start
=
time
.
time
()
rerank_docs1
=
reranker
.
compress_documents
(
split_doc
,
question
)
end
=
time
.
time
()
print
(
'重排1 time:
%
s Seconds'
%
(
end
-
start
))
start
=
time
.
time
()
rerank_docs2
=
reranker
.
compress_documents
(
self
.
similarity_docs
,
question
)
end
=
time
.
time
()
print
(
'重排2 time:
%
s Seconds'
%
(
end
-
start
))
rerank_docs1_hash
=
[]
rerank_docs2_hash
=
[]
m
=
{}
result
=
[]
for
split_doc
in
split_docs_list
:
start
=
time
.
time
()
rerank_docs1
=
reranker
.
compress_documents
(
split_doc
,
question
)
end
=
time
.
time
()
print
(
'重排1 time:
%
s Seconds'
%
(
end
-
start
))
for
doc
in
rerank_docs1
:
m
[
hash
(
doc
.
page_content
)]
=
doc
rerank_docs1_hash
.
append
(
hash
(
doc
.
page_content
))
result
.
append
((
60
,
rerank_docs1_hash
))
start
=
time
.
time
()
for
doc
in
rerank_docs1
:
m
[
hash
(
doc
.
page_content
)]
=
doc
rerank_docs1_hash
.
append
(
hash
(
doc
.
page_content
))
rerank_docs2
=
reranker
.
compress_documents
(
self
.
similarity_docs
,
question
)
end
=
time
.
time
()
print
(
'重排2 time:
%
s Seconds'
%
(
end
-
start
))
for
doc
in
rerank_docs2
:
m
[
hash
(
doc
.
page_content
)]
=
doc
rerank_docs2_hash
.
append
(
hash
(
doc
.
page_content
))
end
=
time
.
time
()
step_time1
=
end
-
start
result
=
[]
result
.
append
((
60
,
rerank_docs1_hash
))
result
.
append
((
55
,
rerank_docs2_hash
))
print
(
len
(
rerank_docs1_hash
))
print
(
len
(
rerank_docs2_hash
))
...
...
@@ -114,14 +112,11 @@ class GetSimilarityWithExt:
rrf_doc
=
reciprocal_rank_fusion
(
result
)
end
=
time
.
time
()
print
(
'混排 time:
%
s Seconds'
%
(
end
-
start
))
print
(
rrf_doc
)
print
(
"混排文档数量:"
,
len
(
rrf_doc
)
)
d_list
=
[]
start
=
time
.
time
()
for
key
in
rrf_doc
:
d_list
.
append
(
m
[
key
])
end
=
time
.
time
()
step_time2
=
end
-
start
print
(
'文档去重 time:
%
s Seconds'
%
(
step_time1
+
step_time2
))
print
(
"返回文档数量:"
,
top_k
)
self
.
rerank_docs
=
d_list
[:
top_k
]
return
self
.
join_document
(
d_list
[:
top_k
])
...
...
@@ -151,6 +146,15 @@ class GetSimilarityWithExt:
print
(
len
(
unique_documents
))
return
unique_documents
def
get_doc_nums
(
self
,
num
:
int
)
->
int
:
num
=
num
*
3
if
num
<
5
:
return
5
elif
num
>
30
:
return
30
else
:
return
num
class
QAExt
:
llm
=
None
...
...
test/rag_agent_test.py
View file @
135a0fe7
...
...
@@ -10,7 +10,8 @@ from langchain_core.prompts.chat import ChatPromptTemplate,HumanMessagePromptTem
from
langchain_core.prompts
import
PromptTemplate
from
langchain.chains
import
LLMChain
import
langchain_core
from
src.llm.ernie_with_sdk
import
ChatERNIESerLLM
from
qianfan
import
ChatCompletion
from
src.pgdb.knowledge.similarity
import
VectorStore_FAISS
from
src.server.get_similarity
import
QAExt
from
src.server.agent
import
create_chart_agent
...
...
@@ -41,7 +42,8 @@ base_llm = ChatOpenAI(
verbose
=
True
,
temperature
=
0
)
# base_llm = ChatERNIESerLLM(
# chat_completion=ChatCompletion(ak="pT7sV1smp4AeDl0LjyZuHBV9", sk="b3N0ibo1IKTLZlSs7weZc8jdR0oHjyMu"))
vecstore_faiss
=
VectorStore_FAISS
(
embedding_model_name
=
EMBEEDING_MODEL_PATH
,
...
...
@@ -98,7 +100,7 @@ for h in history:
prompt
+=
"问:{}
\n
答:{}
\n
"
.
format
(
h
[
0
],
h
[
1
])
print
(
prompt
)
# res = agent_executor.invoke({"input":"以下历史对话记录: "+prompt+"以下是问题:"+"攸县、长沙县、化隆县和大通县谁的年平均降雨量大"})
res
=
agent_executor
.
invoke
({
"input"
:
"
攸县、长沙县、化隆县和大通县谁的年平均降雨量大
"
,
"histories"
:
history
})
res
=
agent_executor
.
invoke
({
"input"
:
"
西宁市各区县年平均降雨量
"
,
"histories"
:
history
})
print
(
"====== result: ======"
)
print
(
res
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment