Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
L
LAE
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
文靖昊
LAE
Commits
27074f64
Commit
27074f64
authored
a year ago
by
文靖昊
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
去除混排,使用重排之后的前三个文档之和作为返回文档
parent
135a0fe7
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
28 additions
and
26 deletions
+28
-26
get_similarity.py
src/server/get_similarity.py
+28
-26
No files found.
src/server/get_similarity.py
View file @
27074f64
...
@@ -81,44 +81,46 @@ class GetSimilarityWithExt:
...
@@ -81,44 +81,46 @@ class GetSimilarityWithExt:
return
result
return
result
def
get_rerank_with_doc
(
self
,
reranker
:
BgeRerank
,
split_docs_list
:
list
):
def
get_rerank_with_doc
(
self
,
reranker
:
BgeRerank
,
split_docs_list
:
list
):
top_k
=
self
.
get_doc_nums
(
len
(
split_docs_list
))
#
top_k = self.get_doc_nums(len(split_docs_list))
question
=
'
\n
'
.
join
(
self
.
question
)
question
=
'
\n
'
.
join
(
self
.
question
)
print
(
question
)
print
(
question
)
rerank_docs1_hash
=
[]
#
rerank_docs1_hash = []
rerank_docs2_hash
=
[]
#
rerank_docs2_hash = []
m
=
{}
#
m = {}
result
=
[]
result
=
[]
for
split_doc
in
split_docs_list
:
for
split_doc
in
split_docs_list
:
start
=
time
.
time
()
start
=
time
.
time
()
rerank_docs1
=
reranker
.
compress_documents
(
split_doc
,
question
)
rerank_docs1
=
reranker
.
compress_documents
(
split_doc
,
question
)
result
.
extend
(
rerank_docs1
[:
3
])
end
=
time
.
time
()
end
=
time
.
time
()
print
(
'重排1 time:
%
s Seconds'
%
(
end
-
start
))
print
(
'重排1 time:
%
s Seconds'
%
(
end
-
start
))
for
doc
in
rerank_docs1
:
#
for doc in rerank_docs1:
m
[
hash
(
doc
.
page_content
)]
=
doc
#
m[hash(doc.page_content)] = doc
rerank_docs1_hash
.
append
(
hash
(
doc
.
page_content
))
#
rerank_docs1_hash.append(hash(doc.page_content))
result
.
append
((
60
,
rerank_docs1_hash
))
#
result.append((60, rerank_docs1_hash))
start
=
time
.
time
()
start
=
time
.
time
()
rerank_docs2
=
reranker
.
compress_documents
(
self
.
similarity_docs
,
question
)
rerank_docs2
=
reranker
.
compress_documents
(
self
.
similarity_docs
,
question
)
result
.
extend
(
rerank_docs2
[:
3
])
end
=
time
.
time
()
end
=
time
.
time
()
print
(
'重排2 time:
%
s Seconds'
%
(
end
-
start
))
print
(
'重排2 time:
%
s Seconds'
%
(
end
-
start
))
for
doc
in
rerank_docs2
:
#
for doc in rerank_docs2:
m
[
hash
(
doc
.
page_content
)]
=
doc
#
m[hash(doc.page_content)] = doc
rerank_docs2_hash
.
append
(
hash
(
doc
.
page_content
))
#
rerank_docs2_hash.append(hash(doc.page_content))
#
result
.
append
((
55
,
rerank_docs2_hash
))
#
result.append((55,rerank_docs2_hash))
print
(
len
(
rerank_docs1_hash
))
#
print(len(rerank_docs1_hash))
print
(
len
(
rerank_docs2_hash
))
#
print(len(rerank_docs2_hash))
start
=
time
.
time
()
#
start = time.time()
rrf_doc
=
reciprocal_rank_fusion
(
result
)
#
rrf_doc = reciprocal_rank_fusion(result)
end
=
time
.
time
()
#
end = time.time()
print
(
'混排 time:
%
s Seconds'
%
(
end
-
start
))
#
print('混排 time: %s Seconds' % (end - start))
print
(
"混排文档数量:"
,
len
(
rrf_doc
))
#
print("混排文档数量:", len(rrf_doc))
d_list
=
[]
#
d_list = []
for
key
in
rrf_doc
:
#
for key in rrf_doc:
d_list
.
append
(
m
[
key
])
#
d_list.append(m[key])
print
(
"返回文档数量:"
,
top_k
)
#
print("返回文档数量:",top_k)
self
.
rerank_docs
=
d_list
[:
top_k
]
self
.
rerank_docs
=
result
return
self
.
join_document
(
d_list
[:
top_k
]
)
return
self
.
join_document
(
result
)
def
get_similarity_doc
(
self
):
def
get_similarity_doc
(
self
):
return
self
.
similarity_doc_txt
return
self
.
similarity_doc_txt
...
...
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment