Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
L
LAE
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
文靖昊
LAE
Commits
135a0fe7
Commit
135a0fe7
authored
7 months ago
by
文靖昊
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
修改agent工具描述,分批获取相似文档,返回文档数量进行修改
parent
909d09a7
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
37 additions
and
36 deletions
+37
-36
rag_agent.py
src/agent/rag_agent.py
+6
-4
tool_divisions.py
src/agent/tool_divisions.py
+1
-1
txt_doc_table.py
src/pgdb/knowledge/txt_doc_table.py
+3
-10
get_similarity.py
src/server/get_similarity.py
+22
-18
rag_agent_test.py
test/rag_agent_test.py
+5
-3
No files found.
src/agent/rag_agent.py
View file @
135a0fe7
...
...
@@ -22,7 +22,7 @@ class IssuanceArgs(BaseModel):
class
RAGQuery
(
BaseTool
):
name
=
"rag_query"
description
=
"""这是一个区(县)级的水文气象地质知识库,当咨询区(县)的水文气象地质等相关信息的时候,你可以提供数据和资料。注意,这个查询只能获取
单个区(县)的水文气象地质等相关信息,
当需要查询省市的详细信息时,需要获取改省市下的具体行政规划,并一一获取具体的区(县)的水文气象地质等相关信息。这个知识库中信息并不全面,有可能缺失。"""
description
=
"""这是一个区(县)级的水文气象地质知识库,当咨询区(县)的水文气象地质等相关信息的时候,你可以提供数据和资料。注意,这个查询只能获取
一个区(县)的水文气象地质等相关信息。如果问题中有多个区县,请拆解出来,并一个区县一个区县的查询。
当需要查询省市的详细信息时,需要获取改省市下的具体行政规划,并一一获取具体的区(县)的水文气象地质等相关信息。这个知识库中信息并不全面,有可能缺失。"""
args_schema
:
Type
[
BaseModel
]
=
IssuanceArgs
rerank
:
Any
# 替换 Any 为适当的类型
rerank_model
:
Any
# 替换 Any 为适当的类型
...
...
@@ -53,9 +53,10 @@ class RAGQuery(BaseTool):
# split_list = []
# for l in split_str:
# split_list.append(l)
split_docs_list
=
[]
for
l
in
location
:
start
=
time
.
time
()
answer
=
self
.
db
.
find_like_doc
(
location
)
answer
=
self
.
db
.
find_like_doc
(
l
)
end
=
time
.
time
()
print
(
'find_like_doc time:
%
s Seconds'
%
(
end
-
start
))
print
(
len
(
answer
)
if
answer
else
0
)
...
...
@@ -64,6 +65,7 @@ class RAGQuery(BaseTool):
d
=
Document
(
page_content
=
a
[
0
],
metadata
=
json
.
loads
(
a
[
1
]))
split_docs
.
append
(
d
)
print
(
len
(
split_docs
))
split_docs_list
.
append
(
split_docs
)
# if len(split_docs)>10:
# split_docs= split_docs[:10]
...
...
@@ -77,7 +79,7 @@ class RAGQuery(BaseTool):
print
(
matches
)
similarity
=
self
.
get_similarity_with_ext_origin
(
matches
,
_location
=
location
)
# cur_similarity = similarity.get_rerank(self.rerank_model)
cur_similarity
=
similarity
.
get_rerank_with_doc
(
self
.
rerank_model
,
split_docs
)
cur_similarity
=
similarity
.
get_rerank_with_doc
(
self
.
rerank_model
,
split_docs
_list
)
# docs = similarity.get_rerank_docs()
# print(cur_similarity)
# # geo_result = "以下是详细的水文气象地质资料:"+cur_similarity+"\n 以下是原问题"+question
...
...
This diff is collapsed.
Click to expand it.
src/agent/tool_divisions.py
View file @
135a0fe7
...
...
@@ -132,7 +132,7 @@ class AdministrativeDivisionArgs(BaseModel):
class
AdministrativeDivision
(
BaseTool
):
name
=
"administrative_division"
description
=
"根据用户提问中涉及到的地区信息补全其行政区划信息,明确具体的省、市、县信息。比如输入县,补全所属省市,输入市则补全省级以及下辖所有县区"
description
=
"根据用户提问中涉及到的地区信息补全其行政区划信息,明确具体的省、市、县信息。比如输入县,补全所属省市,输入市则补全省级以及下辖所有县区
,当问题中涉及区县的时候,一定要优先调用此工具
"
args_schema
:
Type
[
BaseModel
]
=
AdministrativeDivisionArgs
def
_run
(
self
,
input_text
:
str
)
->
str
:
...
...
This diff is collapsed.
Click to expand it.
src/pgdb/knowledge/txt_doc_table.py
View file @
135a0fe7
...
...
@@ -66,16 +66,9 @@ class TxtDoc:
print
(
"drop table txt_doc ok"
)
def
find_like_doc
(
self
,
item
:
list
):
print
(
item
)
i0
=
item
[
0
]
if
len
(
item
)
>=
1
:
item
=
item
[
1
:]
print
(
item
)
query
=
"select text,matadate FROM txt_doc WHERE matadate like '
%
"
+
i0
+
"
%
' or text like '
%
"
+
i0
+
"
%
' "
for
i
in
item
:
query
+=
"or matadate like '
%
"
+
i
+
"
%
' or text like '
%
"
+
i0
+
"
%
' "
print
(
query
)
def
find_like_doc
(
self
,
item
:
str
):
query
=
"select text,matadate FROM txt_doc WHERE matadate like '
%
"
+
item
+
"
%
' or text like '
%
"
+
item
+
"
%
' "
self
.
db
.
execute
(
query
)
answer
=
self
.
db
.
fetchall
()
...
...
This diff is collapsed.
Click to expand it.
src/server/get_similarity.py
View file @
135a0fe7
...
...
@@ -80,33 +80,31 @@ class GetSimilarityWithExt:
result
+=
"]"
return
result
def
get_rerank_with_doc
(
self
,
reranker
:
BgeRerank
,
split_doc
:
list
,
top_k
=
5
):
def
get_rerank_with_doc
(
self
,
reranker
:
BgeRerank
,
split_docs_list
:
list
):
top_k
=
self
.
get_doc_nums
(
len
(
split_docs_list
))
question
=
'
\n
'
.
join
(
self
.
question
)
print
(
question
)
rerank_docs1_hash
=
[]
rerank_docs2_hash
=
[]
m
=
{}
result
=
[]
for
split_doc
in
split_docs_list
:
start
=
time
.
time
()
rerank_docs1
=
reranker
.
compress_documents
(
split_doc
,
question
)
end
=
time
.
time
()
print
(
'重排1 time:
%
s Seconds'
%
(
end
-
start
))
for
doc
in
rerank_docs1
:
m
[
hash
(
doc
.
page_content
)]
=
doc
rerank_docs1_hash
.
append
(
hash
(
doc
.
page_content
))
result
.
append
((
60
,
rerank_docs1_hash
))
start
=
time
.
time
()
rerank_docs2
=
reranker
.
compress_documents
(
self
.
similarity_docs
,
question
)
end
=
time
.
time
()
print
(
'重排2 time:
%
s Seconds'
%
(
end
-
start
))
rerank_docs1_hash
=
[]
rerank_docs2_hash
=
[]
m
=
{}
start
=
time
.
time
()
for
doc
in
rerank_docs1
:
m
[
hash
(
doc
.
page_content
)]
=
doc
rerank_docs1_hash
.
append
(
hash
(
doc
.
page_content
))
for
doc
in
rerank_docs2
:
m
[
hash
(
doc
.
page_content
)]
=
doc
rerank_docs2_hash
.
append
(
hash
(
doc
.
page_content
))
end
=
time
.
time
()
step_time1
=
end
-
start
result
=
[]
result
.
append
((
60
,
rerank_docs1_hash
))
result
.
append
((
55
,
rerank_docs2_hash
))
print
(
len
(
rerank_docs1_hash
))
print
(
len
(
rerank_docs2_hash
))
...
...
@@ -114,14 +112,11 @@ class GetSimilarityWithExt:
rrf_doc
=
reciprocal_rank_fusion
(
result
)
end
=
time
.
time
()
print
(
'混排 time:
%
s Seconds'
%
(
end
-
start
))
print
(
rrf_doc
)
print
(
"混排文档数量:"
,
len
(
rrf_doc
)
)
d_list
=
[]
start
=
time
.
time
()
for
key
in
rrf_doc
:
d_list
.
append
(
m
[
key
])
end
=
time
.
time
()
step_time2
=
end
-
start
print
(
'文档去重 time:
%
s Seconds'
%
(
step_time1
+
step_time2
))
print
(
"返回文档数量:"
,
top_k
)
self
.
rerank_docs
=
d_list
[:
top_k
]
return
self
.
join_document
(
d_list
[:
top_k
])
...
...
@@ -151,6 +146,15 @@ class GetSimilarityWithExt:
print
(
len
(
unique_documents
))
return
unique_documents
def
get_doc_nums
(
self
,
num
:
int
)
->
int
:
num
=
num
*
3
if
num
<
5
:
return
5
elif
num
>
30
:
return
30
else
:
return
num
class
QAExt
:
llm
=
None
...
...
This diff is collapsed.
Click to expand it.
test/rag_agent_test.py
View file @
135a0fe7
...
...
@@ -10,7 +10,8 @@ from langchain_core.prompts.chat import ChatPromptTemplate,HumanMessagePromptTem
from
langchain_core.prompts
import
PromptTemplate
from
langchain.chains
import
LLMChain
import
langchain_core
from
src.llm.ernie_with_sdk
import
ChatERNIESerLLM
from
qianfan
import
ChatCompletion
from
src.pgdb.knowledge.similarity
import
VectorStore_FAISS
from
src.server.get_similarity
import
QAExt
from
src.server.agent
import
create_chart_agent
...
...
@@ -41,7 +42,8 @@ base_llm = ChatOpenAI(
verbose
=
True
,
temperature
=
0
)
# base_llm = ChatERNIESerLLM(
# chat_completion=ChatCompletion(ak="pT7sV1smp4AeDl0LjyZuHBV9", sk="b3N0ibo1IKTLZlSs7weZc8jdR0oHjyMu"))
vecstore_faiss
=
VectorStore_FAISS
(
embedding_model_name
=
EMBEEDING_MODEL_PATH
,
...
...
@@ -98,7 +100,7 @@ for h in history:
prompt
+=
"问:{}
\n
答:{}
\n
"
.
format
(
h
[
0
],
h
[
1
])
print
(
prompt
)
# res = agent_executor.invoke({"input":"以下历史对话记录: "+prompt+"以下是问题:"+"攸县、长沙县、化隆县和大通县谁的年平均降雨量大"})
res
=
agent_executor
.
invoke
({
"input"
:
"
攸县、长沙县、化隆县和大通县谁的年平均降雨量大
"
,
"histories"
:
history
})
res
=
agent_executor
.
invoke
({
"input"
:
"
西宁市各区县年平均降雨量
"
,
"histories"
:
history
})
print
(
"====== result: ======"
)
print
(
res
)
...
...
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment