Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
L
LAE
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
文靖昊
LAE
Commits
27074f64
Commit
27074f64
authored
Jul 24, 2024
by
文靖昊
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
去除混排,使用重排之后的前三个文档之和作为返回文档
parent
135a0fe7
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
28 additions
and
26 deletions
+28
-26
get_similarity.py
src/server/get_similarity.py
+28
-26
No files found.
src/server/get_similarity.py
View file @
27074f64
...
...
@@ -81,44 +81,46 @@ class GetSimilarityWithExt:
return
result
def
get_rerank_with_doc
(
self
,
reranker
:
BgeRerank
,
split_docs_list
:
list
):
top_k
=
self
.
get_doc_nums
(
len
(
split_docs_list
))
#
top_k = self.get_doc_nums(len(split_docs_list))
question
=
'
\n
'
.
join
(
self
.
question
)
print
(
question
)
rerank_docs1_hash
=
[]
rerank_docs2_hash
=
[]
m
=
{}
#
rerank_docs1_hash = []
#
rerank_docs2_hash = []
#
m = {}
result
=
[]
for
split_doc
in
split_docs_list
:
start
=
time
.
time
()
rerank_docs1
=
reranker
.
compress_documents
(
split_doc
,
question
)
result
.
extend
(
rerank_docs1
[:
3
])
end
=
time
.
time
()
print
(
'重排1 time:
%
s Seconds'
%
(
end
-
start
))
for
doc
in
rerank_docs1
:
m
[
hash
(
doc
.
page_content
)]
=
doc
rerank_docs1_hash
.
append
(
hash
(
doc
.
page_content
))
result
.
append
((
60
,
rerank_docs1_hash
))
#
for doc in rerank_docs1:
#
m[hash(doc.page_content)] = doc
#
rerank_docs1_hash.append(hash(doc.page_content))
#
result.append((60, rerank_docs1_hash))
start
=
time
.
time
()
rerank_docs2
=
reranker
.
compress_documents
(
self
.
similarity_docs
,
question
)
result
.
extend
(
rerank_docs2
[:
3
])
end
=
time
.
time
()
print
(
'重排2 time:
%
s Seconds'
%
(
end
-
start
))
for
doc
in
rerank_docs2
:
m
[
hash
(
doc
.
page_content
)]
=
doc
rerank_docs2_hash
.
append
(
hash
(
doc
.
page_content
))
result
.
append
((
55
,
rerank_docs2_hash
))
print
(
len
(
rerank_docs1_hash
))
print
(
len
(
rerank_docs2_hash
))
start
=
time
.
time
()
rrf_doc
=
reciprocal_rank_fusion
(
result
)
end
=
time
.
time
()
print
(
'混排 time:
%
s Seconds'
%
(
end
-
start
))
print
(
"混排文档数量:"
,
len
(
rrf_doc
))
d_list
=
[]
for
key
in
rrf_doc
:
d_list
.
append
(
m
[
key
])
print
(
"返回文档数量:"
,
top_k
)
self
.
rerank_docs
=
d_list
[:
top_k
]
return
self
.
join_document
(
d_list
[:
top_k
]
)
#
for doc in rerank_docs2:
#
m[hash(doc.page_content)] = doc
#
rerank_docs2_hash.append(hash(doc.page_content))
#
#
result.append((55,rerank_docs2_hash))
#
print(len(rerank_docs1_hash))
#
print(len(rerank_docs2_hash))
#
start = time.time()
#
rrf_doc = reciprocal_rank_fusion(result)
#
end = time.time()
#
print('混排 time: %s Seconds' % (end - start))
#
print("混排文档数量:", len(rrf_doc))
#
d_list = []
#
for key in rrf_doc:
#
d_list.append(m[key])
#
print("返回文档数量:",top_k)
self
.
rerank_docs
=
result
return
self
.
join_document
(
result
)
def
get_similarity_doc
(
self
):
return
self
.
similarity_doc_txt
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment