Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
L
LAE
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
文靖昊
LAE
Commits
ec3277e8
Commit
ec3277e8
authored
7 months ago
by
文靖昊
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
向量相似性搜索之后过滤不含地名的结果
parent
130b6514
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
22 additions
and
11 deletions
+22
-11
rag_agent.py
src/agent/rag_agent.py
+8
-8
web.py
src/controller/web.py
+2
-0
get_similarity.py
src/server/get_similarity.py
+7
-1
rag_agent_test.py
test/rag_agent_test.py
+5
-2
No files found.
src/agent/rag_agent.py
View file @
ec3277e8
...
...
@@ -42,8 +42,8 @@ class RAGQuery(BaseTool):
self
.
llm_chain
=
_llm_chain
def
get_similarity_with_ext_origin
(
self
,
_ext
):
return
GetSimilarityWithExt
(
_question
=
_ext
,
_faiss_db
=
self
.
faiss_db
)
def
get_similarity_with_ext_origin
(
self
,
_ext
,
_location
):
return
GetSimilarityWithExt
(
_question
=
_ext
,
_faiss_db
=
self
.
faiss_db
,
_location
=
_location
)
...
...
@@ -75,20 +75,20 @@ class RAGQuery(BaseTool):
matches
=
re
.
findall
(
r'"([^"]+)"'
,
result
.
content
)
print
(
matches
)
similarity
=
self
.
get_similarity_with_ext_origin
(
matches
)
similarity
=
self
.
get_similarity_with_ext_origin
(
matches
,
_location
=
location
)
# cur_similarity = similarity.get_rerank(self.rerank_model)
cur_similarity
=
similarity
.
get_rerank_with_doc
(
self
.
rerank_model
,
split_docs
)
docs
=
similarity
.
get_rerank_docs
()
#
docs = similarity.get_rerank_docs()
# print(cur_similarity)
# # geo_result = "以下是详细的水文气象地质资料:"+cur_similarity+"\n 以下是原问题"+question
# # cur_question = self.prompt.format(history=history, context=cur_similarity, question=question)
cur_answer
=
self
.
llm_chain
.
run
(
context
=
cur_similarity
,
question
=
question
)
# print(cur_answer)
# return cur_answer
loc
=
location
[
0
]
location
=
location
[
1
:]
for
i
in
location
:
loc
+=
(
","
+
i
)
#
loc = location[0]
#
location = location[1:]
#
for i in location:
#
loc += (","+i)
return
{
"详细信息"
:
cur_answer
,
"参考文档"
:
cur_similarity
}
...
...
This diff is collapsed.
Click to expand it.
src/controller/web.py
View file @
ec3277e8
...
...
@@ -226,6 +226,7 @@ def question(chat_request: ChatRequest, token: str = Header(None)):
# answer, docs = my_chat.chat_with_history_with_ext(question,ext=matches,history=prompt, with_similarity=True)
docs_json
=
[]
for
step
in
res
[
"intermediate_steps"
]:
if
"rag_query"
==
step
[
0
]
.
tool
:
j
=
json
.
loads
(
step
[
1
][
"参考文档"
],
strict
=
False
)
docs_json
.
extend
(
j
)
print
(
len
(
docs_json
))
...
...
@@ -283,6 +284,7 @@ def re_generate(chat_request: ReGenerateRequest, token: str = Header(None)):
answer
=
res
[
"output"
]
docs_json
=
[]
for
step
in
res
[
"intermediate_steps"
]:
if
"rag_query"
==
step
[
0
]
.
tool
:
j
=
json
.
loads
(
step
[
1
][
"参考文档"
],
strict
=
False
)
docs_json
.
extend
(
j
)
...
...
This diff is collapsed.
Click to expand it.
src/server/get_similarity.py
View file @
ec3277e8
...
...
@@ -36,13 +36,16 @@ class GetSimilarity:
class
GetSimilarityWithExt
:
def
__init__
(
self
,
_question
,
_faiss_db
:
VectorStore_FAISS
):
def
__init__
(
self
,
_question
,
_faiss_db
:
VectorStore_FAISS
,
_location
=
None
):
self
.
question
=
_question
self
.
faiss_db
=
_faiss_db
self
.
location
=
_location
self
.
similarity_docs
=
self
.
get_text_similarity_with_ext
()
self
.
similarity_doc_txt
=
self
.
faiss_db
.
join_document
(
self
.
similarity_docs
)
self
.
rerank_docs
=
[]
def
get_rerank
(
self
,
reranker
:
BgeRerank
,
top_k
=
5
):
question
=
'
\n
'
.
join
(
self
.
question
)
print
(
question
)
...
...
@@ -139,10 +142,13 @@ class GetSimilarityWithExt:
content_set
=
set
()
unique_documents
=
[]
for
doc
in
similarity_docs
:
if
self
.
location
is
not
None
:
if
any
(
substring
in
doc
.
page_content
for
substring
in
self
.
location
)
or
any
(
substring
in
doc
.
metadata
[
"filename"
]
for
substring
in
self
.
location
)
:
content
=
hash
(
doc
.
page_content
)
if
content
not
in
content_set
:
unique_documents
.
append
(
doc
)
content_set
.
add
(
content
)
print
(
len
(
unique_documents
))
return
unique_documents
class
QAExt
:
...
...
This diff is collapsed.
Click to expand it.
test/rag_agent_test.py
View file @
ec3277e8
...
...
@@ -106,9 +106,12 @@ print(type(res))
print
(
res
[
"output"
])
docs_json
=
[]
for
step
in
res
[
"intermediate_steps"
]:
print
(
type
(
step
[
1
][
"参考文档"
]))
print
(
type
(
step
[
0
]
.
tool
))
print
(
step
[
0
]
.
tool
)
if
"rag_query"
==
step
[
0
]
.
tool
:
print
(
True
)
print
(
step
[
1
][
"参考文档"
])
j
=
json
.
loads
(
step
[
1
][
"参考文档"
],
strict
=
False
)
j
=
json
.
loads
(
step
[
1
][
"参考文档"
],
strict
=
False
)
docs_json
.
extend
(
j
)
print
(
docs_json
)
...
...
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment